diff --git a/Cargo.toml b/Cargo.toml index 3be33f48..ee3657df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,10 @@ license = "MIT" edition = "2018" exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"] +[[example]] +name = "style" +required-features = ["hyphenation"] + [package.metadata.docs.rs] all-features = true @@ -38,6 +42,8 @@ unicode-width = { version= "0.1", optional = true } [dev-dependencies] criterion = "0.3" lipsum = "0.8" +rand = "0.8" +text-style = { version = "0.3", features = ["termion"] } unic-emoji-char = "0.9.0" version-sync = "0.9" diff --git a/examples/style.rs b/examples/style.rs new file mode 100644 index 00000000..4e46ee82 --- /dev/null +++ b/examples/style.rs @@ -0,0 +1,134 @@ +use hyphenation::{Language, Load, Standard}; +use rand::Rng as _; +use textwrap::word_separators::WordSeparator as _; + +#[derive(Debug)] +struct StyledWord<'a> { + word: &'a str, + whitespace: &'a str, + hyphen: bool, + style: Option, +} + +impl StyledWord<'_> { + fn render(&self, is_end: bool) { + use text_style::termion::Termion as _; + + print!( + "{}", + text_style::StyledStr::new(self.word, self.style).termion() + ); + + if is_end { + if self.hyphen { + print!("{}", text_style::StyledStr::new("-", self.style).termion()); + } + } else { + print!("{}", self.whitespace); + } + } +} + +impl AsRef for StyledWord<'_> { + fn as_ref(&self) -> &str { + &self.word + } +} + +impl<'a> From> for StyledWord<'a> { + fn from(word: text_style::StyledStr<'a>) -> Self { + let trimmed = word.s.trim_end_matches(' '); + Self { + word: trimmed, + whitespace: &word.s[trimmed.len()..], + hyphen: false, + style: word.style, + } + } +} + +impl textwrap::core::Fragment for StyledWord<'_> { + fn width(&self) -> usize { + self.word.len() + } + + fn whitespace_width(&self) -> usize { + self.whitespace.len() + } + + fn penalty_width(&self) -> usize { + if self.hyphen { + 1 + } else { + 0 + } + } +} + +impl textwrap::word_splitters::Splittable for StyledWord<'_> { + type Output = Self; + + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output { + let word = &self.word[range]; + Self { + word, + whitespace: if keep_ending { self.whitespace } else { "" }, + hyphen: if keep_ending { + self.hyphen + } else { + !word.ends_with('-') + }, + style: self.style, + } + } +} + +fn generate_style(rng: &mut impl rand::Rng) -> text_style::Style { + let mut style = text_style::Style::default(); + + style.set_bold(rng.gen_bool(0.1)); + style.set_italic(rng.gen_bool(0.1)); + style.set_underline(rng.gen_bool(0.1)); + style.strikethrough(rng.gen_bool(0.01)); + + style.fg = match rng.gen_range(0..100) { + 0..=10 => Some(text_style::AnsiColor::Red), + 11..=20 => Some(text_style::AnsiColor::Green), + 21..=30 => Some(text_style::AnsiColor::Blue), + _ => None, + } + .map(|color| text_style::Color::Ansi { + color, + mode: text_style::AnsiMode::Light, + }); + + style +} + +fn main() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let mut rng = rand::thread_rng(); + + let text = lipsum::lipsum(rng.gen_range(100..500)); + + let styled = text + .split_inclusive(' ') + .map(|s| text_style::StyledStr::styled(s, generate_style(&mut rng))); + let words: Vec<_> = styled + .flat_map(|s| { + textwrap::word_separators::AsciiSpace + .find_word_ranges(&s.s) + .map(move |range| text_style::StyledStr::new(&s.s[range], s.style)) + }) + .map(StyledWord::from) + .flat_map(|w| textwrap::word_splitters::Fragments::new(w, &dictionary)) + .collect(); + + let lines = textwrap::wrap_algorithms::wrap_first_fit(&words, &[50]); + for line in lines { + for (idx, fragment) in line.into_iter().enumerate() { + fragment.render(idx + 1 == line.len()); + } + println!(); + } +} diff --git a/src/core.rs b/src/core.rs index af024603..e4d0924e 100644 --- a/src/core.rs +++ b/src/core.rs @@ -224,6 +224,12 @@ pub struct Word<'a> { pub(crate) width: usize, } +impl AsRef for Word<'_> { + fn as_ref(&self) -> &str { + &self.word + } +} + impl std::ops::Deref for Word<'_> { type Target = str; diff --git a/src/word_separators.rs b/src/word_separators.rs index abac7563..842324ee 100644 --- a/src/word_separators.rs +++ b/src/word_separators.rs @@ -43,7 +43,18 @@ pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug { // this isn't possible until Rust supports higher-kinded types: // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md /// Find all words in `line`. - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a>; + fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + Box::new( + self.find_word_ranges(line) + .map(move |range| Word::from(&line[range])), + ) + } + + /// Find all words in `line` and return their positions in `line`. + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a>; } // The internal `WordSeparatorClone` trait is allows us to implement @@ -69,9 +80,12 @@ impl Clone for Box { } impl WordSeparator for Box { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { use std::ops::Deref; - self.deref().find_words(line) + self.deref().find_word_ranges(line) } } @@ -92,7 +106,10 @@ pub struct AsciiSpace; /// Word::from("World!")]); /// ``` impl WordSeparator for AsciiSpace { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { let mut start = 0; let mut in_whitespace = false; let mut char_indices = line.char_indices(); @@ -106,19 +123,19 @@ impl WordSeparator for AsciiSpace { #[allow(clippy::while_let_on_iterator)] while let Some((idx, ch)) = char_indices.next() { if in_whitespace && ch != ' ' { - let word = Word::from(&line[start..idx]); + let word_range = start..idx; start = idx; in_whitespace = ch == ' '; - return Some(word); + return Some(word_range); } in_whitespace = ch == ' '; } if start < line.len() { - let word = Word::from(&line[start..]); + let word_range = start..line.len(); start = line.len(); - return Some(word); + return Some(word_range); } None @@ -194,7 +211,10 @@ pub struct UnicodeBreakProperties; /// ``` #[cfg(feature = "unicode-linebreak")] impl WordSeparator for UnicodeBreakProperties { - fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + fn find_word_ranges<'a>( + &self, + line: &'a str, + ) -> Box> + 'a> { // Construct an iterator over (original index, stripped index) // tuples. We find the Unicode linebreaks on a stripped string, // but we need the original indices so we can form words based on @@ -242,16 +262,16 @@ impl WordSeparator for UnicodeBreakProperties { while let Some((idx, _)) = opportunities.next() { if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) { - let word = Word::from(&line[start..orig_idx]); + let word_range = start..orig_idx; start = orig_idx; - return Some(word); + return Some(word_range); } } if start < line.len() { - let word = Word::from(&line[start..]); + let word_range = start..line.len(); start = line.len(); - return Some(word); + return Some(word_range); } None diff --git a/src/word_splitters.rs b/src/word_splitters.rs index f4d94c70..fff1b2c3 100644 --- a/src/word_splitters.rs +++ b/src/word_splitters.rs @@ -190,36 +190,79 @@ where I: IntoIterator>, WordSplit: WordSplitter, { - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = word_splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } + words + .into_iter() + .flat_map(move |word| Fragments::new(word, word_splitter)) +} - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } +#[allow(missing_docs)] +pub trait Splittable: AsRef { + type Output; + + #[allow(missing_docs)] + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output; +} - None - }) - }) +impl<'a> Splittable for Word<'a> { + type Output = Self; + + fn split(&self, range: std::ops::Range, keep_ending: bool) -> Self::Output { + let word = &self.word[range]; + Word { + word, + width: display_width(word), + whitespace: if keep_ending { self.whitespace } else { "" }, + penalty: if keep_ending { + self.penalty + } else if !word.ends_with('-') { + "-" + } else { + "" + }, + } + } +} + +#[allow(missing_docs)] +#[derive(Debug)] +pub struct Fragments> { + word: W, + split_points: I, + prev: usize, +} + +impl Fragments> { + #[allow(missing_docs)] + pub fn new(word: W, word_splitter: &impl WordSplitter) -> Self { + let split_points = word_splitter.split_points(word.as_ref()).into_iter(); + Self { + word, + split_points, + prev: 0, + } + } +} + +impl> Iterator for Fragments { + type Item = W::Output; + + fn next(&mut self) -> Option { + if let Some(idx) = self.split_points.next() { + let w = self.word.split(self.prev..idx, false); + self.prev = idx; + return Some(w); + } + + let len = self.word.as_ref().len(); + if self.prev < len || self.prev == 0 { + let w = self.word.split(self.prev..len, true); + // TODO: shouldn’t this be just len? + self.prev = len + 1; + return Some(w); + } + + None + } } #[cfg(test)]