mgeisler · robinkrahl · Jul 1, 2021 · Jul 1, 2021 · Jul 1, 2021 · Jul 1, 2021
diff --git a/Cargo.toml b/Cargo.toml
@@ -12,6 +12,10 @@ license = "MIT"
 edition = "2018"
 exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"]
 
+[[example]]
+name = "style"
+required-features = ["hyphenation"]
+
 [package.metadata.docs.rs]
 all-features = true
 
@@ -38,6 +42,8 @@ unicode-width = { version= "0.1", optional = true }
 [dev-dependencies]
 criterion = "0.3"
 lipsum = "0.8"
+rand = "0.8"
+text-style = { version = "0.3", features = ["termion"] }
 unic-emoji-char = "0.9.0"
 version-sync = "0.9"
 

diff --git a/examples/style.rs b/examples/style.rs
@@ -0,0 +1,134 @@
+use hyphenation::{Language, Load, Standard};
+use rand::Rng as _;
+use textwrap::word_separators::WordSeparator as _;
+
+#[derive(Debug)]
+struct StyledWord<'a> {
+    word: &'a str,
+    whitespace: &'a str,
+    hyphen: bool,
+    style: Option<text_style::Style>,
+}
+
+impl StyledWord<'_> {
+    fn render(&self, is_end: bool) {
+        use text_style::termion::Termion as _;
+
+        print!(
+            "{}",
+            text_style::StyledStr::new(self.word, self.style).termion()
+        );
+
+        if is_end {
+            if self.hyphen {
+                print!("{}", text_style::StyledStr::new("-", self.style).termion());
+            }
+        } else {
+            print!("{}", self.whitespace);
+        }
+    }
+}
+
+impl AsRef<str> for StyledWord<'_> {
+    fn as_ref(&self) -> &str {
+        &self.word
+    }
+}
+
+impl<'a> From<text_style::StyledStr<'a>> for StyledWord<'a> {
+    fn from(word: text_style::StyledStr<'a>) -> Self {
+        let trimmed = word.s.trim_end_matches(' ');
+        Self {
+            word: trimmed,
+            whitespace: &word.s[trimmed.len()..],
+            hyphen: false,
+            style: word.style,
+        }
+    }
+}
+
+impl textwrap::core::Fragment for StyledWord<'_> {
+    fn width(&self) -> usize {
+        self.word.len()
+    }
+
+    fn whitespace_width(&self) -> usize {
+        self.whitespace.len()
+    }
+
+    fn penalty_width(&self) -> usize {
+        if self.hyphen {
+            1
+        } else {
+            0
+        }
+    }
+}
+
+impl textwrap::word_splitters::Splittable for StyledWord<'_> {
+    type Output = Self;
+
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output {
+        let word = &self.word[range];
+        Self {
+            word,
+            whitespace: if keep_ending { self.whitespace } else { "" },
+            hyphen: if keep_ending {
+                self.hyphen
+            } else {
+                !word.ends_with('-')
+            },
+            style: self.style,
+        }
+    }
+}
+
+fn generate_style(rng: &mut impl rand::Rng) -> text_style::Style {
+    let mut style = text_style::Style::default();
+
+    style.set_bold(rng.gen_bool(0.1));
+    style.set_italic(rng.gen_bool(0.1));
+    style.set_underline(rng.gen_bool(0.1));
+    style.strikethrough(rng.gen_bool(0.01));
+
+    style.fg = match rng.gen_range(0..100) {
+        0..=10 => Some(text_style::AnsiColor::Red),
+        11..=20 => Some(text_style::AnsiColor::Green),
+        21..=30 => Some(text_style::AnsiColor::Blue),
+        _ => None,
+    }
+    .map(|color| text_style::Color::Ansi {
+        color,
+        mode: text_style::AnsiMode::Light,
+    });
+
+    style
+}
+
+fn main() {
+    let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+    let mut rng = rand::thread_rng();
+
+    let text = lipsum::lipsum(rng.gen_range(100..500));
+
+    let styled = text
+        .split_inclusive(' ')
+        .map(|s| text_style::StyledStr::styled(s, generate_style(&mut rng)));
+    let words: Vec<_> = styled
+        .flat_map(|s| {
+            textwrap::word_separators::AsciiSpace
+                .find_word_ranges(&s.s)
+                .map(move |range| text_style::StyledStr::new(&s.s[range], s.style))
+        })
+        .map(StyledWord::from)
+        .flat_map(|w| textwrap::word_splitters::Fragments::new(w, &dictionary))
+        .collect();
+
+    let lines = textwrap::wrap_algorithms::wrap_first_fit(&words, &[50]);
+    for line in lines {
+        for (idx, fragment) in line.into_iter().enumerate() {
+            fragment.render(idx + 1 == line.len());
+        }
+        println!();
+    }
+}
diff --git a/src/core.rs b/src/core.rs
@@ -224,6 +224,12 @@ pub struct Word<'a> {
     pub(crate) width: usize,
 }
 
+impl AsRef<str> for Word<'_> {
+    fn as_ref(&self) -> &str {
+        &self.word
+    }
+}
+
 impl std::ops::Deref for Word<'_> {
     type Target = str;
 

diff --git a/src/word_separators.rs b/src/word_separators.rs
@@ -43,7 +43,18 @@ pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug {
     // this isn't possible until Rust supports higher-kinded types:
     // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
     /// Find all words in `line`.
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>;
+    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+        Box::new(
+            self.find_word_ranges(line)
+                .map(move |range| Word::from(&line[range])),
+        )
+    }
+
+    /// Find all words in `line` and return their positions in `line`.
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a>;
 }
 
 // The internal `WordSeparatorClone` trait is allows us to implement
@@ -69,9 +80,12 @@ impl Clone for Box<dyn WordSeparator> {
 }
 
 impl WordSeparator for Box<dyn WordSeparator> {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         use std::ops::Deref;
-        self.deref().find_words(line)
+        self.deref().find_word_ranges(line)
     }
 }
 
@@ -92,7 +106,10 @@ pub struct AsciiSpace;
 ///                        Word::from("World!")]);
 /// ```
 impl WordSeparator for AsciiSpace {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         let mut start = 0;
         let mut in_whitespace = false;
         let mut char_indices = line.char_indices();
@@ -106,19 +123,19 @@ impl WordSeparator for AsciiSpace {
             #[allow(clippy::while_let_on_iterator)]
             while let Some((idx, ch)) = char_indices.next() {
                 if in_whitespace && ch != ' ' {
-                    let word = Word::from(&line[start..idx]);
+                    let word_range = start..idx;
                     start = idx;
                     in_whitespace = ch == ' ';
-                    return Some(word);
+                    return Some(word_range);
                 }
 
                 in_whitespace = ch == ' ';
             }
 
             if start < line.len() {
-                let word = Word::from(&line[start..]);
+                let word_range = start..line.len();
                 start = line.len();
-                return Some(word);
+                return Some(word_range);
             }
 
             None
@@ -194,7 +211,10 @@ pub struct UnicodeBreakProperties;
 /// ```
 #[cfg(feature = "unicode-linebreak")]
 impl WordSeparator for UnicodeBreakProperties {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         // Construct an iterator over (original index, stripped index)
         // tuples. We find the Unicode linebreaks on a stripped string,
         // but we need the original indices so we can form words based on
@@ -242,16 +262,16 @@ impl WordSeparator for UnicodeBreakProperties {
             while let Some((idx, _)) = opportunities.next() {
                 if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx)
                 {
-                    let word = Word::from(&line[start..orig_idx]);
+                    let word_range = start..orig_idx;
                     start = orig_idx;
-                    return Some(word);
+                    return Some(word_range);
                 }
             }
 
             if start < line.len() {
-                let word = Word::from(&line[start..]);
+                let word_range = start..line.len();
                 start = line.len();
-                return Some(word);
+                return Some(word_range);
             }
 
             None

diff --git a/src/word_splitters.rs b/src/word_splitters.rs
@@ -190,36 +190,79 @@ where
     I: IntoIterator<Item = Word<'a>>,
     WordSplit: WordSplitter,
 {
-    words.into_iter().flat_map(move |word| {
-        let mut prev = 0;
-        let mut split_points = word_splitter.split_points(&word).into_iter();
-        std::iter::from_fn(move || {
-            if let Some(idx) = split_points.next() {
-                let need_hyphen = !word[..idx].ends_with('-');
-                let w = Word {
-                    word: &word.word[prev..idx],
-                    width: display_width(&word[prev..idx]),
-                    whitespace: "",
-                    penalty: if need_hyphen { "-" } else { "" },
-                };
-                prev = idx;
-                return Some(w);
-            }
+    words
+        .into_iter()
+        .flat_map(move |word| Fragments::new(word, word_splitter))
+}
 
-            if prev < word.word.len() || prev == 0 {
-                let w = Word {
-                    word: &word.word[prev..],
-                    width: display_width(&word[prev..]),
-                    whitespace: word.whitespace,
-                    penalty: word.penalty,
-                };
-                prev = word.word.len() + 1;
-                return Some(w);
-            }
+#[allow(missing_docs)]
+pub trait Splittable: AsRef<str> {
+    type Output;
+
+    #[allow(missing_docs)]
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output;
+}
 
-            None
-        })
-    })
+impl<'a> Splittable for Word<'a> {
+    type Output = Self;
+
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output {
+        let word = &self.word[range];
+        Word {
+            word,
+            width: display_width(word),
+            whitespace: if keep_ending { self.whitespace } else { "" },
+            penalty: if keep_ending {
+                self.penalty
+            } else if !word.ends_with('-') {
+                "-"
+            } else {
+                ""
+            },
+        }
+    }
+}
+
+#[allow(missing_docs)]
+#[derive(Debug)]
+pub struct Fragments<W: Splittable, I: Iterator<Item = usize>> {
+    word: W,
+    split_points: I,
+    prev: usize,
+}
+
+impl<W: Splittable> Fragments<W, std::vec::IntoIter<usize>> {
+    #[allow(missing_docs)]
+    pub fn new(word: W, word_splitter: &impl WordSplitter) -> Self {
+        let split_points = word_splitter.split_points(word.as_ref()).into_iter();
+        Self {
+            word,
+            split_points,
+            prev: 0,
+        }
+    }
+}
+
+impl<W: Splittable, I: Iterator<Item = usize>> Iterator for Fragments<W, I> {
+    type Item = W::Output;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(idx) = self.split_points.next() {
+            let w = self.word.split(self.prev..idx, false);
+            self.prev = idx;
+            return Some(w);
+        }
+
+        let len = self.word.as_ref().len();
+        if self.prev < len || self.prev == 0 {
+            let w = self.word.split(self.prev..len, true);
+            // TODO: shouldn’t this be just len?
+            self.prev = len + 1;
+            return Some(w);
+        }
+
+        None
+    }
 }
 
 #[cfg(test)]