From e6b7c3d97a444f30d9b942f5ce0f9fb7386d9213 Mon Sep 17 00:00:00 2001
From: Robin Krahl <robin.krahl@ireas.org>
Date: Thu, 1 Jul 2021 10:13:50 +0200
Subject: [PATCH 1/5] Add WordSeparator::find_word_ranges method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the find_word_ranges method to the WordSeparator struct
that makes it possible to find words within a string without using
textwrap’s Word type.  This is especially useful when using custom types
for strings.
---
 src/word_separators.rs | 46 ++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 13 deletions(-)
diff --git a/src/word_separators.rs b/src/word_separators.rs
index abac7563..842324ee 100644
--- a/src/word_separators.rs
+++ b/src/word_separators.rs
@@ -43,7 +43,18 @@ pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug {
     // this isn't possible until Rust supports higher-kinded types:
     // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
     /// Find all words in `line`.
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>;
+    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+        Box::new(
+            self.find_word_ranges(line)
+                .map(move |range| Word::from(&line[range])),
+        )
+    }
+
+    /// Find all words in `line` and return their positions in `line`.
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a>;
 }
 
 // The internal `WordSeparatorClone` trait is allows us to implement
@@ -69,9 +80,12 @@ impl Clone for Box<dyn WordSeparator> {
 }
 
 impl WordSeparator for Box<dyn WordSeparator> {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         use std::ops::Deref;
-        self.deref().find_words(line)
+        self.deref().find_word_ranges(line)
     }
 }
 
@@ -92,7 +106,10 @@ pub struct AsciiSpace;
 ///                        Word::from("World!")]);
 /// ```
 impl WordSeparator for AsciiSpace {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         let mut start = 0;
         let mut in_whitespace = false;
         let mut char_indices = line.char_indices();
@@ -106,19 +123,19 @@ impl WordSeparator for AsciiSpace {
             #[allow(clippy::while_let_on_iterator)]
             while let Some((idx, ch)) = char_indices.next() {
                 if in_whitespace && ch != ' ' {
-                    let word = Word::from(&line[start..idx]);
+                    let word_range = start..idx;
                     start = idx;
                     in_whitespace = ch == ' ';
-                    return Some(word);
+                    return Some(word_range);
                 }
 
                 in_whitespace = ch == ' ';
             }
 
             if start < line.len() {
-                let word = Word::from(&line[start..]);
+                let word_range = start..line.len();
                 start = line.len();
-                return Some(word);
+                return Some(word_range);
             }
 
             None
@@ -194,7 +211,10 @@ pub struct UnicodeBreakProperties;
 /// ```
 #[cfg(feature = "unicode-linebreak")]
 impl WordSeparator for UnicodeBreakProperties {
-    fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+    fn find_word_ranges<'a>(
+        &self,
+        line: &'a str,
+    ) -> Box<dyn Iterator<Item = std::ops::Range<usize>> + 'a> {
         // Construct an iterator over (original index, stripped index)
         // tuples. We find the Unicode linebreaks on a stripped string,
         // but we need the original indices so we can form words based on
@@ -242,16 +262,16 @@ impl WordSeparator for UnicodeBreakProperties {
             while let Some((idx, _)) = opportunities.next() {
                 if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx)
                 {
-                    let word = Word::from(&line[start..orig_idx]);
+                    let word_range = start..orig_idx;
                     start = orig_idx;
-                    return Some(word);
+                    return Some(word_range);
                 }
             }
 
             if start < line.len() {
-                let word = Word::from(&line[start..]);
+                let word_range = start..line.len();
                 start = line.len();
-                return Some(word);
+                return Some(word_range);
             }
 
             None

From 637809cb688579b089d12d4ba45c0129a8f60a0f Mon Sep 17 00:00:00 2001
From: Robin Krahl <robin.krahl@ireas.org>
Date: Thu, 1 Jul 2021 11:10:35 +0200
Subject: [PATCH 2/5] Introduce word_splitters::Fragments struct

This patch adds the word_splitters::Fragments struct that yields the
fragments for a word.  This makes it easier to reason over the lifetimes
of the generated iterator and allows us to make it generic over the word
type.
---
 src/word_splitters.rs | 91 ++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 28 deletions(-)

diff --git a/src/word_splitters.rs b/src/word_splitters.rs
index f4d94c70..d3104adb 100644
--- a/src/word_splitters.rs
+++ b/src/word_splitters.rs
@@ -190,36 +190,71 @@ where
     I: IntoIterator<Item = Word<'a>>,
     WordSplit: WordSplitter,
 {
-    words.into_iter().flat_map(move |word| {
-        let mut prev = 0;
-        let mut split_points = word_splitter.split_points(&word).into_iter();
-        std::iter::from_fn(move || {
-            if let Some(idx) = split_points.next() {
-                let need_hyphen = !word[..idx].ends_with('-');
-                let w = Word {
-                    word: &word.word[prev..idx],
-                    width: display_width(&word[prev..idx]),
-                    whitespace: "",
-                    penalty: if need_hyphen { "-" } else { "" },
-                };
-                prev = idx;
-                return Some(w);
-            }
+    words
+        .into_iter()
+        .flat_map(move |word| Fragments::new(word, word_splitter))
+}
 
-            if prev < word.word.len() || prev == 0 {
-                let w = Word {
-                    word: &word.word[prev..],
-                    width: display_width(&word[prev..]),
-                    whitespace: word.whitespace,
-                    penalty: word.penalty,
-                };
-                prev = word.word.len() + 1;
-                return Some(w);
-            }
+#[allow(missing_docs)]
+#[derive(Debug)]
+pub struct Fragments<'a, I: Iterator<Item = usize>> {
+    word: Word<'a>,
+    split_points: I,
+    prev: usize,
+}
 
-            None
-        })
-    })
+impl<'a> Fragments<'a, std::vec::IntoIter<usize>> {
+    #[allow(missing_docs)]
+    pub fn new(word: Word<'a>, word_splitter: &impl WordSplitter) -> Self {
+        let split_points = word_splitter.split_points(&word).into_iter();
+        Self {
+            word,
+            split_points,
+            prev: 0,
+        }
+    }
+}
+
+impl<'a, I: Iterator<Item = usize>> Fragments<'a, I> {
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Word<'a> {
+        let word = &self.word.word[range];
+        Word {
+            word,
+            width: display_width(word),
+            whitespace: if keep_ending {
+                self.word.whitespace
+            } else {
+                ""
+            },
+            penalty: if keep_ending {
+                self.word.penalty
+            } else if word.ends_with('-') {
+                "-"
+            } else {
+                ""
+            },
+        }
+    }
+}
+
+impl<'a, I: Iterator<Item = usize>> Iterator for Fragments<'a, I> {
+    type Item = Word<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(idx) = self.split_points.next() {
+            let w = self.split(self.prev..idx, false);
+            self.prev = idx;
+            return Some(w);
+        }
+
+        if self.prev < self.word.word.len() || self.prev == 0 {
+            let w = self.split(self.prev..self.word.len(), true);
+            self.prev = self.word.word.len() + 1;
+            return Some(w);
+        }
+
+        None
+    }
 }
 
 #[cfg(test)]

From 14fa737b02ab454de922ac5d4a6af6d7c89ccfaa Mon Sep 17 00:00:00 2001
From: Robin Krahl <robin.krahl@ireas.org>
Date: Thu, 1 Jul 2021 11:17:35 +0200
Subject: [PATCH 3/5] Make word_splitters::Fragments generic over new trait

This patch introduces the new word_splitters::Splittable trait and makes
word_splitters::Fragments generic over that trait.  This allows library
users to use their own fragment types and not only core::Word.
---
 src/core.rs           |  6 ++++
 src/word_splitters.rs | 70 ++++++++++++++++++++++++-------------------
 2 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/src/core.rs b/src/core.rs
index af024603..e4d0924e 100644
--- a/src/core.rs
+++ b/src/core.rs
@@ -224,6 +224,12 @@ pub struct Word<'a> {
     pub(crate) width: usize,
 }
 
+impl AsRef<str> for Word<'_> {
+    fn as_ref(&self) -> &str {
+        &self.word
+    }
+}
+
 impl std::ops::Deref for Word<'_> {
     type Target = str;
 
diff --git a/src/word_splitters.rs b/src/word_splitters.rs
index d3104adb..fff1b2c3 100644
--- a/src/word_splitters.rs
+++ b/src/word_splitters.rs
@@ -196,39 +196,25 @@ where
 }
 
 #[allow(missing_docs)]
-#[derive(Debug)]
-pub struct Fragments<'a, I: Iterator<Item = usize>> {
-    word: Word<'a>,
-    split_points: I,
-    prev: usize,
-}
+pub trait Splittable: AsRef<str> {
+    type Output;
 
-impl<'a> Fragments<'a, std::vec::IntoIter<usize>> {
     #[allow(missing_docs)]
-    pub fn new(word: Word<'a>, word_splitter: &impl WordSplitter) -> Self {
-        let split_points = word_splitter.split_points(&word).into_iter();
-        Self {
-            word,
-            split_points,
-            prev: 0,
-        }
-    }
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output;
 }
 
-impl<'a, I: Iterator<Item = usize>> Fragments<'a, I> {
-    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Word<'a> {
-        let word = &self.word.word[range];
+impl<'a> Splittable for Word<'a> {
+    type Output = Self;
+
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output {
+        let word = &self.word[range];
         Word {
             word,
             width: display_width(word),
-            whitespace: if keep_ending {
-                self.word.whitespace
-            } else {
-                ""
-            },
+            whitespace: if keep_ending { self.whitespace } else { "" },
             penalty: if keep_ending {
-                self.word.penalty
-            } else if word.ends_with('-') {
+                self.penalty
+            } else if !word.ends_with('-') {
                 "-"
             } else {
                 ""
@@ -237,19 +223,41 @@ impl<'a, I: Iterator<Item = usize>> Fragments<'a, I> {
     }
 }
 
-impl<'a, I: Iterator<Item = usize>> Iterator for Fragments<'a, I> {
-    type Item = Word<'a>;
+#[allow(missing_docs)]
+#[derive(Debug)]
+pub struct Fragments<W: Splittable, I: Iterator<Item = usize>> {
+    word: W,
+    split_points: I,
+    prev: usize,
+}
+
+impl<W: Splittable> Fragments<W, std::vec::IntoIter<usize>> {
+    #[allow(missing_docs)]
+    pub fn new(word: W, word_splitter: &impl WordSplitter) -> Self {
+        let split_points = word_splitter.split_points(word.as_ref()).into_iter();
+        Self {
+            word,
+            split_points,
+            prev: 0,
+        }
+    }
+}
+
+impl<W: Splittable, I: Iterator<Item = usize>> Iterator for Fragments<W, I> {
+    type Item = W::Output;
 
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(idx) = self.split_points.next() {
-            let w = self.split(self.prev..idx, false);
+            let w = self.word.split(self.prev..idx, false);
             self.prev = idx;
             return Some(w);
         }
 
-        if self.prev < self.word.word.len() || self.prev == 0 {
-            let w = self.split(self.prev..self.word.len(), true);
-            self.prev = self.word.word.len() + 1;
+        let len = self.word.as_ref().len();
+        if self.prev < len || self.prev == 0 {
+            let w = self.word.split(self.prev..len, true);
+            // TODO: shouldn’t this be just len?
+            self.prev = len + 1;
             return Some(w);
         }
 

From a105b03fec1bf499fcf608b0375a4d17bcd27a2e Mon Sep 17 00:00:00 2001
From: Robin Krahl <robin.krahl@ireas.org>
Date: Thu, 1 Jul 2021 12:04:22 +0200
Subject: [PATCH 4/5] Add styled example

---
 Cargo.toml        |   2 +
 examples/style.rs | 134 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 examples/style.rs

diff --git a/Cargo.toml b/Cargo.toml
index 3be33f48..b2c20ce2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,12 +32,14 @@ default = ["unicode-linebreak", "unicode-width", "smawk"]
 hyphenation = { version = "0.8.2", optional = true, features = ["embed_en-us"] }
 smawk = { version = "0.3", optional = true }
 terminal_size = { version = "0.1", optional = true }
+text-style = { version = "0.3", features = ["termion"] }
 unicode-linebreak = { version = "0.1", optional = true }
 unicode-width = { version= "0.1", optional = true }
 
 [dev-dependencies]
 criterion = "0.3"
 lipsum = "0.8"
+rand = "0.8"
 unic-emoji-char = "0.9.0"
 version-sync = "0.9"
 
diff --git a/examples/style.rs b/examples/style.rs
new file mode 100644
index 00000000..4e46ee82
--- /dev/null
+++ b/examples/style.rs
@@ -0,0 +1,134 @@
+use hyphenation::{Language, Load, Standard};
+use rand::Rng as _;
+use textwrap::word_separators::WordSeparator as _;
+
+#[derive(Debug)]
+struct StyledWord<'a> {
+    word: &'a str,
+    whitespace: &'a str,
+    hyphen: bool,
+    style: Option<text_style::Style>,
+}
+
+impl StyledWord<'_> {
+    fn render(&self, is_end: bool) {
+        use text_style::termion::Termion as _;
+
+        print!(
+            "{}",
+            text_style::StyledStr::new(self.word, self.style).termion()
+        );
+
+        if is_end {
+            if self.hyphen {
+                print!("{}", text_style::StyledStr::new("-", self.style).termion());
+            }
+        } else {
+            print!("{}", self.whitespace);
+        }
+    }
+}
+
+impl AsRef<str> for StyledWord<'_> {
+    fn as_ref(&self) -> &str {
+        &self.word
+    }
+}
+
+impl<'a> From<text_style::StyledStr<'a>> for StyledWord<'a> {
+    fn from(word: text_style::StyledStr<'a>) -> Self {
+        let trimmed = word.s.trim_end_matches(' ');
+        Self {
+            word: trimmed,
+            whitespace: &word.s[trimmed.len()..],
+            hyphen: false,
+            style: word.style,
+        }
+    }
+}
+
+impl textwrap::core::Fragment for StyledWord<'_> {
+    fn width(&self) -> usize {
+        self.word.len()
+    }
+
+    fn whitespace_width(&self) -> usize {
+        self.whitespace.len()
+    }
+
+    fn penalty_width(&self) -> usize {
+        if self.hyphen {
+            1
+        } else {
+            0
+        }
+    }
+}
+
+impl textwrap::word_splitters::Splittable for StyledWord<'_> {
+    type Output = Self;
+
+    fn split(&self, range: std::ops::Range<usize>, keep_ending: bool) -> Self::Output {
+        let word = &self.word[range];
+        Self {
+            word,
+            whitespace: if keep_ending { self.whitespace } else { "" },
+            hyphen: if keep_ending {
+                self.hyphen
+            } else {
+                !word.ends_with('-')
+            },
+            style: self.style,
+        }
+    }
+}
+
+fn generate_style(rng: &mut impl rand::Rng) -> text_style::Style {
+    let mut style = text_style::Style::default();
+
+    style.set_bold(rng.gen_bool(0.1));
+    style.set_italic(rng.gen_bool(0.1));
+    style.set_underline(rng.gen_bool(0.1));
+    style.strikethrough(rng.gen_bool(0.01));
+
+    style.fg = match rng.gen_range(0..100) {
+        0..=10 => Some(text_style::AnsiColor::Red),
+        11..=20 => Some(text_style::AnsiColor::Green),
+        21..=30 => Some(text_style::AnsiColor::Blue),
+        _ => None,
+    }
+    .map(|color| text_style::Color::Ansi {
+        color,
+        mode: text_style::AnsiMode::Light,
+    });
+
+    style
+}
+
+fn main() {
+    let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+    let mut rng = rand::thread_rng();
+
+    let text = lipsum::lipsum(rng.gen_range(100..500));
+
+    let styled = text
+        .split_inclusive(' ')
+        .map(|s| text_style::StyledStr::styled(s, generate_style(&mut rng)));
+    let words: Vec<_> = styled
+        .flat_map(|s| {
+            textwrap::word_separators::AsciiSpace
+                .find_word_ranges(&s.s)
+                .map(move |range| text_style::StyledStr::new(&s.s[range], s.style))
+        })
+        .map(StyledWord::from)
+        .flat_map(|w| textwrap::word_splitters::Fragments::new(w, &dictionary))
+        .collect();
+
+    let lines = textwrap::wrap_algorithms::wrap_first_fit(&words, &[50]);
+    for line in lines {
+        for (idx, fragment) in line.into_iter().enumerate() {
+            fragment.render(idx + 1 == line.len());
+        }
+        println!();
+    }
+}

From e992b18cb74bc40909f3493a2f41864d1a0ed280 Mon Sep 17 00:00:00 2001
From: Robin Krahl <robin.krahl@ireas.org>
Date: Sun, 4 Jul 2021 09:08:58 +0200
Subject: [PATCH 5/5] fixup! Add styled example

Configure required features for style example
---
 Cargo.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index b2c20ce2..ee3657df 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,10 @@ license = "MIT"
 edition = "2018"
 exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"]
 
+[[example]]
+name = "style"
+required-features = ["hyphenation"]
+
 [package.metadata.docs.rs]
 all-features = true
 
@@ -32,7 +36,6 @@ default = ["unicode-linebreak", "unicode-width", "smawk"]
 hyphenation = { version = "0.8.2", optional = true, features = ["embed_en-us"] }
 smawk = { version = "0.3", optional = true }
 terminal_size = { version = "0.1", optional = true }
-text-style = { version = "0.3", features = ["termion"] }
 unicode-linebreak = { version = "0.1", optional = true }
 unicode-width = { version= "0.1", optional = true }
 
@@ -40,6 +43,7 @@ unicode-width = { version= "0.1", optional = true }
 criterion = "0.3"
 lipsum = "0.8"
 rand = "0.8"
+text-style = { version = "0.3", features = ["termion"] }
 unic-emoji-char = "0.9.0"
 version-sync = "0.9"