From e6b7c3d97a444f30d9b942f5ce0f9fb7386d9213 Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 1 Jul 2021 10:13:50 +0200 Subject: [PATCH 1/5] Add WordSeparator::find_word_ranges method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the find_word_ranges method to the WordSeparator struct that makes it possible to find words within a string without using textwrap’s Word type. This is especially useful when using custom types for strings. --- src/word_separators.rs | 46 ++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/word_separators.rs b/src/word_separators.rs index abac7563..842324ee 100644 --- a/src/word_separators.rs +++ b/src/word_separators.rs @@ -43,7 +43,18 @@ pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug { // this isn't possible until Rust supports higher-kinded types: // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md /// Find all words in `line`. - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a>; + fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + Box::new( + self.find_word_ranges(line) + .map(move |range| Word::from(&line[range])), + ) + } + + /// Find all words in `line` and return their positions in `line`. + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a>; } // The internal `WordSeparatorClone` trait is allows us to implement @@ -69,9 +80,12 @@ impl Clone for Box { } impl WordSeparator for Box { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { use std::ops::Deref; - self.deref().find_words(line) + self.deref().find_word_ranges(line) } } @@ -92,7 +106,10 @@ pub struct AsciiSpace; /// Word::from("World!")]); /// ``` impl WordSeparator for AsciiSpace { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { let mut start = 0; let mut in_whitespace = false; let mut char_indices = line.char_indices(); @@ -106,19 +123,19 @@ impl WordSeparator for AsciiSpace { #[allow(clippy::while_let_on_iterator)] while let Some((idx, ch)) = char_indices.next() { if in_whitespace && ch != ' ' { - let word = Word::from(&line[start..idx]); + let word_range = start..idx; start = idx; in_whitespace = ch == ' '; - return Some(word); + return Some(word_range); } in_whitespace = ch == ' '; } if start < line.len() { - let word = Word::from(&line[start..]); + let word_range = start..line.len(); start = line.len(); - return Some(word); + return Some(word_range); } None @@ -194,7 +211,10 @@ pub struct UnicodeBreakProperties; /// ``` #[cfg(feature = "unicode-linebreak")] impl WordSeparator for UnicodeBreakProperties { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { // Construct an iterator over (original index, stripped index) // tuples. We find the Unicode linebreaks on a stripped string, // but we need the original indices so we can form words based on @@ -242,16 +262,16 @@ impl WordSeparator for UnicodeBreakProperties { while let Some((idx, _)) = opportunities.next() { if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) { - let word = Word::from(&line[start..orig_idx]); + let word_range = start..orig_idx; start = orig_idx; - return Some(word); + return Some(word_range); } } if start < line.len() { - let word = Word::from(&line[start..]); + let word_range = start..line.len(); start = line.len(); - return Some(word); + return Some(word_range); } None From 637809cb688579b089d12d4ba45c0129a8f60a0f Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 1 Jul 2021 11:10:35 +0200 Subject: [PATCH 2/5] Introduce word_splitters::Fragments struct This patch adds the word_splitters::Fragments struct that yields the fragments for a word. This makes it easier to reason over the lifetimes of the generated iterator and allows us to make it generic over the word type. --- src/word_splitters.rs | 91 ++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 28 deletions(-) diff --git a/src/word_splitters.rs b/src/word_splitters.rs index f4d94c70..d3104adb 100644 --- a/src/word_splitters.rs +++ b/src/word_splitters.rs @@ -190,36 +190,71 @@ where I: IntoIterator>, WordSplit: WordSplitter, { - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = word_splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } + words + .into_iter() + .flat_map(move |word| Fragments::new(word, word_splitter)) +} - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } +#[allow(missing_docs)] +#[derive(Debug)] +pub struct Fragments<'a, I: Iterator> { + word: Word<'a>, + split_points: I, + prev: usize, +} - None - }) - }) +impl<'a> Fragments<'a, std::vec::IntoIter> { + #[allow(missing_docs)] + pub fn new(word: Word<'a>, word_splitter: &impl WordSplitter) -> Self { + let split_points = word_splitter.split_points(&word).into_iter(); + Self { + word, + split_points, + prev: 0, + } + } +} + +impl<'a, I: Iterator> Fragments<'a, I> { + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Word<'a> { + let word = &self.word.word[range]; + Word { + word, + width: display_width(word), + whitespace: if keep_ending { + self.word.whitespace + } else { + "" + }, + penalty: if keep_ending { + self.word.penalty + } else if word.ends_with('-') { + "-" + } else { + "" + }, + } + } +} + +impl<'a, I: Iterator> Iterator for Fragments<'a, I> { + type Item = Word<'a>; + + fn next(&mut self) -> Option { + if let Some(idx) = self.split_points.next() { + let w = self.split(self.prev..idx, false); + self.prev = idx; + return Some(w); + } + + if self.prev < self.word.word.len() || self.prev == 0 { + let w = self.split(self.prev..self.word.len(), true); + self.prev = self.word.word.len() + 1; + return Some(w); + } + + None + } } #[cfg(test)] From 14fa737b02ab454de922ac5d4a6af6d7c89ccfaa Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 1 Jul 2021 11:17:35 +0200 Subject: [PATCH 3/5] Make word_splitters::Fragments generic over new trait This patch introduces the new word_splitters::Splittable trait and makes word_splitters::Fragments generic over that trait. This allows library users to use their own fragment types and not only core::Word. --- src/core.rs | 6 ++++ src/word_splitters.rs | 70 ++++++++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/src/core.rs b/src/core.rs index af024603..e4d0924e 100644 --- a/src/core.rs +++ b/src/core.rs @@ -224,6 +224,12 @@ pub struct Word<'a> { pub(crate) width: usize, } +impl AsRef for Word<'_> { + fn as_ref(&self) -> &str { + &self.word + } +} + impl std::ops::Deref for Word<'_> { type Target = str; diff --git a/src/word_splitters.rs b/src/word_splitters.rs index d3104adb..fff1b2c3 100644 --- a/src/word_splitters.rs +++ b/src/word_splitters.rs @@ -196,39 +196,25 @@ where } #[allow(missing_docs)] -#[derive(Debug)] -pub struct Fragments<'a, I: Iterator> { - word: Word<'a>, - split_points: I, - prev: usize, -} +pub trait Splittable: AsRef { + type Output; -impl<'a> Fragments<'a, std::vec::IntoIter> { #[allow(missing_docs)] - pub fn new(word: Word<'a>, word_splitter: &impl WordSplitter) -> Self { - let split_points = word_splitter.split_points(&word).into_iter(); - Self { - word, - split_points, - prev: 0, - } - } + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output; } -impl<'a, I: Iterator> Fragments<'a, I> { - fn split(&self, range: std::ops::Range, keep_ending: bool) -> Word<'a> { - let word = &self.word.word[range]; +impl<'a> Splittable for Word<'a> { + type Output = Self; + + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output { + let word = &self.word[range]; Word { word, width: display_width(word), - whitespace: if keep_ending { - self.word.whitespace - } else { - "" - }, + whitespace: if keep_ending { self.whitespace } else { "" }, penalty: if keep_ending { - self.word.penalty - } else if word.ends_with('-') { + self.penalty + } else if !word.ends_with('-') { "-" } else { "" @@ -237,19 +223,41 @@ impl<'a, I: Iterator> Fragments<'a, I> { } } -impl<'a, I: Iterator> Iterator for Fragments<'a, I> { - type Item = Word<'a>; +#[allow(missing_docs)] +#[derive(Debug)] +pub struct Fragments> { + word: W, + split_points: I, + prev: usize, +} + +impl Fragments> { + #[allow(missing_docs)] + pub fn new(word: W, word_splitter: &impl WordSplitter) -> Self { + let split_points = word_splitter.split_points(word.as_ref()).into_iter(); + Self { + word, + split_points, + prev: 0, + } + } +} + +impl> Iterator for Fragments { + type Item = W::Output; fn next(&mut self) -> Option { if let Some(idx) = self.split_points.next() { - let w = self.split(self.prev..idx, false); + let w = self.word.split(self.prev..idx, false); self.prev = idx; return Some(w); } - if self.prev < self.word.word.len() || self.prev == 0 { - let w = self.split(self.prev..self.word.len(), true); - self.prev = self.word.word.len() + 1; + let len = self.word.as_ref().len(); + if self.prev < len || self.prev == 0 { + let w = self.word.split(self.prev..len, true); + // TODO: shouldn’t this be just len? + self.prev = len + 1; return Some(w); } From a105b03fec1bf499fcf608b0375a4d17bcd27a2e Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 1 Jul 2021 12:04:22 +0200 Subject: [PATCH 4/5] Add styled example --- Cargo.toml | 2 + examples/style.rs | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 examples/style.rs diff --git a/Cargo.toml b/Cargo.toml index 3be33f48..b2c20ce2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,12 +32,14 @@ default = ["unicode-linebreak", "unicode-width", "smawk"] hyphenation = { version = "0.8.2", optional = true, features = ["embed_en-us"] } smawk = { version = "0.3", optional = true } terminal_size = { version = "0.1", optional = true } +text-style = { version = "0.3", features = ["termion"] } unicode-linebreak = { version = "0.1", optional = true } unicode-width = { version= "0.1", optional = true } [dev-dependencies] criterion = "0.3" lipsum = "0.8" +rand = "0.8" unic-emoji-char = "0.9.0" version-sync = "0.9" diff --git a/examples/style.rs b/examples/style.rs new file mode 100644 index 00000000..4e46ee82 --- /dev/null +++ b/examples/style.rs @@ -0,0 +1,134 @@ +use hyphenation::{Language, Load, Standard}; +use rand::Rng as _; +use textwrap::word_separators::WordSeparator as _; + +#[derive(Debug)] +struct StyledWord<'a> { + word: &'a str, + whitespace: &'a str, + hyphen: bool, + style: Option, +} + +impl StyledWord<'_> { + fn render(&self, is_end: bool) { + use text_style::termion::Termion as _; + + print!( + "{}", + text_style::StyledStr::new(self.word, self.style).termion() + ); + + if is_end { + if self.hyphen { + print!("{}", text_style::StyledStr::new("-", self.style).termion()); + } + } else { + print!("{}", self.whitespace); + } + } +} + +impl AsRef for StyledWord<'_> { + fn as_ref(&self) -> &str { + &self.word + } +} + +impl<'a> From> for StyledWord<'a> { + fn from(word: text_style::StyledStr<'a>) -> Self { + let trimmed = word.s.trim_end_matches(' '); + Self { + word: trimmed, + whitespace: &word.s[trimmed.len()..], + hyphen: false, + style: word.style, + } + } +} + +impl textwrap::core::Fragment for StyledWord<'_> { + fn width(&self) -> usize { + self.word.len() + } + + fn whitespace_width(&self) -> usize { + self.whitespace.len() + } + + fn penalty_width(&self) -> usize { + if self.hyphen { + 1 + } else { + 0 + } + } +} + +impl textwrap::word_splitters::Splittable for StyledWord<'_> { + type Output = Self; + + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output { + let word = &self.word[range]; + Self { + word, + whitespace: if keep_ending { self.whitespace } else { "" }, + hyphen: if keep_ending { + self.hyphen + } else { + !word.ends_with('-') + }, + style: self.style, + } + } +} + +fn generate_style(rng: &mut impl rand::Rng) -> text_style::Style { + let mut style = text_style::Style::default(); + + style.set_bold(rng.gen_bool(0.1)); + style.set_italic(rng.gen_bool(0.1)); + style.set_underline(rng.gen_bool(0.1)); + style.strikethrough(rng.gen_bool(0.01)); + + style.fg = match rng.gen_range(0..100) { + 0..=10 => Some(text_style::AnsiColor::Red), + 11..=20 => Some(text_style::AnsiColor::Green), + 21..=30 => Some(text_style::AnsiColor::Blue), + _ => None, + } + .map(|color| text_style::Color::Ansi { + color, + mode: text_style::AnsiMode::Light, + }); + + style +} + +fn main() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let mut rng = rand::thread_rng(); + + let text = lipsum::lipsum(rng.gen_range(100..500)); + + let styled = text + .split_inclusive(' ') + .map(|s| text_style::StyledStr::styled(s, generate_style(&mut rng))); + let words: Vec<_> = styled + .flat_map(|s| { + textwrap::word_separators::AsciiSpace + .find_word_ranges(&s.s) + .map(move |range| text_style::StyledStr::new(&s.s[range], s.style)) + }) + .map(StyledWord::from) + .flat_map(|w| textwrap::word_splitters::Fragments::new(w, &dictionary)) + .collect(); + + let lines = textwrap::wrap_algorithms::wrap_first_fit(&words, &[50]); + for line in lines { + for (idx, fragment) in line.into_iter().enumerate() { + fragment.render(idx + 1 == line.len()); + } + println!(); + } +} From e992b18cb74bc40909f3493a2f41864d1a0ed280 Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Sun, 4 Jul 2021 09:08:58 +0200 Subject: [PATCH 5/5] fixup! Add styled example Configure required features for style example --- Cargo.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b2c20ce2..ee3657df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,10 @@ license = "MIT" edition = "2018" exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"] +[[example]] +name = "style" +required-features = ["hyphenation"] + [package.metadata.docs.rs] all-features = true @@ -32,7 +36,6 @@ default = ["unicode-linebreak", "unicode-width", "smawk"] hyphenation = { version = "0.8.2", optional = true, features = ["embed_en-us"] } smawk = { version = "0.3", optional = true } terminal_size = { version = "0.1", optional = true } -text-style = { version = "0.3", features = ["termion"] } unicode-linebreak = { version = "0.1", optional = true } unicode-width = { version= "0.1", optional = true } @@ -40,6 +43,7 @@ unicode-width = { version= "0.1", optional = true } criterion = "0.3" lipsum = "0.8" rand = "0.8" +text-style = { version = "0.3", features = ["termion"] } unic-emoji-char = "0.9.0" version-sync = "0.9"