diff --git a/src/stages/deletion/mod.rs b/src/actions/deletion/mod.rs similarity index 51% rename from src/stages/deletion/mod.rs rename to src/actions/deletion/mod.rs index 8ab1b718..de8b46cc 100644 --- a/src/stages/deletion/mod.rs +++ b/src/actions/deletion/mod.rs @@ -1,14 +1,13 @@ use log::info; -use super::Stage; +use super::Action; /// Deletes everything in the input. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct DeletionStage {} +pub struct Deletion {} -impl Stage for DeletionStage { - fn process(&self, input: &str) -> String { +impl Action for Deletion { + fn act(&self, input: &str) -> String { info!("Deleting: '{}'", input); String::new() } diff --git a/src/stages/german/driver.rs b/src/actions/german/driver.rs similarity index 89% rename from src/stages/german/driver.rs rename to src/actions/german/driver.rs index a0da5c5c..a863c7be 100644 --- a/src/stages/german/driver.rs +++ b/src/actions/german/driver.rs @@ -1,9 +1,9 @@ -use crate::stages::{ +use crate::actions::{ german::{ machine::{StateMachine, Transition}, words::{Replace, Replacement, WordCasing}, }, - Stage, + Action, }; use cached::proc_macro::cached; use cached::SizedCache; @@ -14,9 +14,9 @@ use log::{debug, trace}; use once_cell::sync::Lazy; use unicode_titlecase::StrTitleCase; -/// German language stage, responsible for Umlauts and Eszett. +/// German language action, responsible for Umlauts and Eszett. /// -/// This stage is responsible for applying the following rules, [**where +/// This action is responsible for applying the following rules, [**where /// applicable**](#example-words-validly-containing-alternative-umlaut-spelling): /// - [*Umlauts*](https://en.wikipedia.org/wiki/Umlaut_(diacritic)): replace `ue`, `oe`, /// `ae` with `ü`, `ö`, `ä`, respectively, @@ -26,7 +26,7 @@ use unicode_titlecase::StrTitleCase; /// - both lower- and uppercase variants are handled correctly, /// - compound words are handled correctly. /// -/// Towards this, this stage does *not* simply replace all occurrences, but performs +/// Towards this, this action does *not* simply replace all occurrences, but performs /// checks to ensure only valid replacements are made. The core of these checks is an /// exhaustive word list. The better the word list, the better the results. As such, any /// errors in processing could be the result of a faulty word list *or* faulty @@ -35,10 +35,10 @@ use unicode_titlecase::StrTitleCase; /// # Example: A simple greeting, with Umlaut and Eszett /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let result = stage.process("Gruess Gott!"); +/// let action = German::default(); +/// let result = action.act("Gruess Gott!"); /// assert_eq!(result, "Grüß Gott!"); /// ``` /// @@ -48,10 +48,10 @@ use unicode_titlecase::StrTitleCase; /// *elaborate* word list!), but is still handled, as its constituents are. /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let result = stage.process("Du Suesswassertagtraeumer!"); +/// let action = German::default(); +/// let result = action.act("Du Suesswassertagtraeumer!"); /// assert_eq!(result, "Du Süßwassertagträumer!"); /// ``` /// @@ -62,7 +62,7 @@ use unicode_titlecase::StrTitleCase; /// [`tr`](https://en.wikipedia.org/wiki/Tr_(Unix))) would not handle this correctly. /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// /// for word in &[ /// // "ae" @@ -78,19 +78,19 @@ use unicode_titlecase::StrTitleCase; /// "Mauer", // should not be "Maür" /// "Steuerung", // should not be "Steürung" /// ] { -/// let stage = GermanStage::default(); -/// let result = stage.process(word); +/// let action = German::default(); +/// let result = action.act(word); /// assert_eq!(result, word.to_string()); /// } /// ``` /// /// Note that `ss`/`ß` is not mentioned, as it is handled -/// [elsewhere][`GermanStage::new`], dealing with the topic of words with valid +/// [elsewhere][`German::new`], dealing with the topic of words with valid /// alternative *and* special character spellings. /// /// # Example: Upper- and mixed case /// -/// This stage can handle any case, but assumes **nouns are never lower case** (a pretty +/// This action can handle any case, but assumes **nouns are never lower case** (a pretty /// mild assumption). The **first letter governs the case** of the entity (Umlaut, /// Eszett or entire word) in question: /// @@ -116,10 +116,10 @@ use unicode_titlecase::StrTitleCase; /// /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let result = stage.process("aEpFeL"); +/// let action = German::default(); +/// let result = action.act("aEpFeL"); /// /// // Error: MiXeD CaSe noun without leading capital letter /// assert_eq!(result, "aEpFeL"); @@ -133,10 +133,10 @@ use unicode_titlecase::StrTitleCase; /// output is `Äpfel` /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let result: String = stage.process("AePfEl"); +/// let action = German::default(); +/// let result: String = action.act("AePfEl"); /// /// // OK: MiXeD CaSe words nouns are okay, *if* starting with a capital letter /// assert_eq!(result, "ÄPfEl"); @@ -145,10 +145,10 @@ use unicode_titlecase::StrTitleCase; /// ## Subexample: other cases /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let f = |word: &str| -> String {stage.process(word)}; +/// let action = German::default(); +/// let f = |word: &str| -> String {action.act(word)}; /// /// // OK: The normal case, adjective lowercase /// assert_eq!(f("Voll suess!"), "Voll süß!"); @@ -204,7 +204,7 @@ use unicode_titlecase::StrTitleCase; /// /// # Example: Other bytes /// -/// This stage handles the German alphabet *only*, and will leave other input bytes +/// This action handles the German alphabet *only*, and will leave other input bytes /// untouched. You get to keep your trailing newlines, emojis (also multi-[`char`] /// ones), and everything else. /// @@ -212,16 +212,16 @@ use unicode_titlecase::StrTitleCase; /// ([`str`]). /// /// ``` -/// use srgn::{Stage, stages::GermanStage}; +/// use srgn::{Action, actions::German}; /// -/// let stage = GermanStage::default(); -/// let result = stage.process("\0Schoener 你好 Satz... 👋🏻\r\n\n"); +/// let action = German::default(); +/// let result = action.act("\0Schoener 你好 Satz... 👋🏻\r\n\n"); /// assert_eq!(result, "\0Schöner 你好 Satz... 👋🏻\r\n\n"); /// ``` /// /// # Performance /// -/// This stage is implemented as a [finite state +/// This action is implemented as a [finite state /// machine](https://en.wikipedia.org/wiki/Finite-state_machine), which means it runs in /// linear time as well as constant space. It should therefore be quite fast and memory /// efficient, requiring only a single pass over the input [`str`]. @@ -249,13 +249,13 @@ use unicode_titlecase::StrTitleCase; /// issue](https://github.com/alexpovel/srgn/issues/9) and [this /// thread](https://users.rust-lang.org/t/fast-string-lookup-in-a-single-str-containing-millions-of-unevenly-sized-substrings/98040). #[derive(Debug, Clone, Copy)] -pub struct GermanStage { +pub struct German { prefer_original: bool, naive: bool, } -impl GermanStage { - /// Create a new [`GermanStage`]. +impl German { + /// Create a new [`German`]. /// /// # Arguments /// @@ -279,7 +279,7 @@ impl GermanStage { /// reached for this crate in the first place), what do they mean? Such cases are /// tricky, as there isn't an easy solution without reaching for full-blown /// [NLP](https://en.wikipedia.org/wiki/Natural_language_processing) or ML, as the - /// word's context would be required. This stage is much too limited for that. A + /// word's context would be required. This action is much too limited for that. A /// choice has to be made: /// /// - do not replace: keep alternative spelling, or @@ -294,20 +294,20 @@ impl GermanStage { /// much more likely than for Umlauts. /// /// ``` - /// use srgn::{Stage, stages::GermanStage}; + /// use srgn::{Action, actions::German}; /// /// for (original, output) in &[ /// ("Busse", "Buße"), // busses / penance /// ("Masse", "Maße"), // mass / measurements /// ] { - /// let mut stage = GermanStage::default(); - /// stage.prefer_replacement(); - /// let result = stage.process(original); + /// let mut action = German::default(); + /// action.prefer_replacement(); + /// let result = action.act(original); /// assert_eq!(result, output.to_string()); /// - /// let mut stage = GermanStage::default(); - /// stage.prefer_original(); - /// let result = stage.process(original); + /// let mut action = German::default(); + /// action.prefer_original(); + /// let result = action.act(original); /// assert_eq!(result, original.to_string()); /// } /// ``` @@ -317,26 +317,26 @@ impl GermanStage { /// Naive mode is essentially forcing a maximum number of replacements. /// /// ``` - /// use srgn::{Stage, stages::GermanStage}; + /// use srgn::{Action, actions::German}; /// /// for (original, output) in &[ /// ("Frau Schroekedaek", "Frau Schrökedäk"), // Names are not in the word list /// ("Abenteuer", "Abenteür"), // Illegal, but possible now /// ] { - /// let mut stage = GermanStage::default(); - /// stage.naive(); - /// let result = stage.process(original); + /// let mut action = German::default(); + /// action.naive(); + /// let result = action.act(original); /// assert_eq!(result, output.to_string()); /// /// // However, this is overridden by: - /// stage.prefer_original(); - /// let result = stage.process(original); + /// action.prefer_original(); + /// let result = action.act(original); /// assert_eq!(result, original.to_string()); /// /// // The usual behavior: - /// let mut stage = GermanStage::default(); - /// stage.sophisticated(); - /// let result = stage.process(original); + /// let mut action = German::default(); + /// action.sophisticated(); + /// let result = action.act(original); /// assert_eq!(result, original.to_string()); /// } /// ``` @@ -374,10 +374,10 @@ impl GermanStage { } } -impl Default for GermanStage { - /// Create a new [`GermanStage`] with default settings. +impl Default for German { + /// Create a new [`German`] with default settings. /// - /// Default settings are such that features of this stage are leveraged most. + /// Default settings are such that features of this action are leveraged most. fn default() -> Self { let prefer_original = false; let naive = false; @@ -385,8 +385,8 @@ impl Default for GermanStage { } } -impl Stage for GermanStage { - fn process(&self, input: &str) -> String { +impl Action for German { + fn act(&self, input: &str) -> String { const INDICATOR: char = '\0'; let mut output = String::with_capacity(input.len()); @@ -725,8 +725,8 @@ mod tests { "Öl ist ein wichtiger Bestandteil von Öl." )] fn test_substitution(#[case] input: &str, #[case] expected: &str) { - let stage = GermanStage::default(); - let result = stage.process(input); + let action = German::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -751,9 +751,9 @@ mod tests { #[case("Guessa", "Güßa")] #[case("GUESSA", "GÜẞA")] fn test_casing_when_being_naive(#[case] input: &str, #[case] expected: &str) { - let mut stage = GermanStage::default(); - stage.naive(); - let result = stage.process(input); + let mut action = German::default(); + action.naive(); + let result = action.act(input); assert_eq!(result, expected); } } diff --git a/src/stages/german/machine.rs b/src/actions/german/machine.rs similarity index 100% rename from src/stages/german/machine.rs rename to src/actions/german/machine.rs diff --git a/src/stages/german/mod.rs b/src/actions/german/mod.rs similarity index 90% rename from src/stages/german/mod.rs rename to src/actions/german/mod.rs index 6b04958b..e3dff07d 100644 --- a/src/stages/german/mod.rs +++ b/src/actions/german/mod.rs @@ -7,5 +7,5 @@ const EXPECTABLE_AVERAGE_MATCHES_PER_WORD: u8 = 2; // Re-export symbols. #[allow(clippy::module_name_repetitions)] -pub use driver::GermanStage; +pub use driver::German; use words::{LetterCasing, SpecialCharacter, Umlaut, Word}; diff --git a/src/stages/german/words.rs b/src/actions/german/words.rs similarity index 98% rename from src/stages/german/words.rs rename to src/actions/german/words.rs index e76b6df6..501a1f9c 100644 --- a/src/stages/german/words.rs +++ b/src/actions/german/words.rs @@ -202,7 +202,7 @@ impl Replace for String { // Assert sorting, such that reversing actually does the right thing. if cfg!(debug_assertions) { let mut cloned = replacements.iter().cloned().collect_vec(); - cloned.sort_by_key(crate::stages::german::words::Replacement::start); + cloned.sort_by_key(crate::actions::german::words::Replacement::start); assert_eq!(cloned, replacements); } diff --git a/src/stages/lower/mod.rs b/src/actions/lower/mod.rs similarity index 81% rename from src/stages/lower/mod.rs rename to src/actions/lower/mod.rs index cfb197e2..551fab2f 100644 --- a/src/stages/lower/mod.rs +++ b/src/actions/lower/mod.rs @@ -1,14 +1,13 @@ use log::info; -use super::Stage; +use super::Action; /// Renders in lowercase. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct LowerStage {} +pub struct Lower {} -impl Stage for LowerStage { - fn process(&self, input: &str) -> String { +impl Action for Lower { + fn act(&self, input: &str) -> String { info!("Lowercasing: '{}'", input); input.to_lowercase() } @@ -50,6 +49,6 @@ mod tests { // Emojis #[case("👋\0", "👋\0")] fn substitute(#[case] input: &str, #[case] expected: &str) { - assert_eq!(LowerStage {}.process(input), expected); + assert_eq!(Lower {}.act(input), expected); } } diff --git a/src/stages/mod.rs b/src/actions/mod.rs similarity index 58% rename from src/stages/mod.rs rename to src/actions/mod.rs index 6ff0a3ad..0ee86348 100644 --- a/src/stages/mod.rs +++ b/src/actions/mod.rs @@ -20,47 +20,47 @@ mod upper; use std::fmt::Debug; #[cfg(feature = "deletion")] -pub use deletion::DeletionStage; +pub use deletion::Deletion; #[cfg(feature = "german")] -pub use german::GermanStage; +pub use german::German; #[cfg(feature = "lower")] -pub use lower::LowerStage; +pub use lower::Lower; #[cfg(feature = "normalization")] -pub use normalization::NormalizationStage; +pub use normalization::Normalization; #[cfg(feature = "replace")] -pub use replace::ReplacementStage; +pub use replace::Replacement; #[cfg(feature = "squeeze")] -pub use squeeze::SqueezeStage; +pub use squeeze::Squeeze; #[cfg(feature = "symbols")] -pub use symbols::{inversion::SymbolsInversionStage, SymbolsStage}; +pub use symbols::{inversion::SymbolsInversion, Symbols}; #[cfg(feature = "titlecase")] -pub use titlecase::TitlecaseStage; +pub use titlecase::Titlecase; #[cfg(feature = "upper")] -pub use upper::UpperStage; +pub use upper::Upper; use crate::scoping::ScopedView; -/// A stage in the processing pipeline, as initiated by [`crate::apply`]. +/// An action in the processing pipeline, as initiated by [`crate::apply`]. /// -/// Stages are the core of the text processing pipeline and can be applied in any order, -/// [any number of times each](https://en.wikipedia.org/wiki/Idempotence) (more than -/// once being wasted work, though). -pub trait Stage: Send + Sync + Debug { - /// Apply this stage to the given [`ScopedView`]. +/// Actions are the core of the text processing pipeline and can be applied in any +/// order, [any number of times each](https://en.wikipedia.org/wiki/Idempotence) (more +/// than once being wasted work, though). +pub trait Action: Send + Sync + Debug { + /// Apply this action to the given [`ScopedView`]. /// /// This is infallible: it cannot fail in the sense of [`Result`]. It can only /// return incorrect results, which would be bugs (please report). - fn process(&self, input: &str) -> String; + fn act(&self, input: &str) -> String; - /// Applies this stage to an `input`, working only on [`InScope`] items and + /// Applies this action to an `input`, working only on [`InScope`] items and /// forwarding [`OutOfScope`] items unchanged. /// - /// Always returns an owned version of the `input`, even for stages where that might + /// Always returns an owned version of the `input`, even for actions where that might /// technically be unnecessary. /// /// This is infallible: it cannot fail in the sense of [`Result`]. It can only /// return incorrect results, which would be bugs (please report). fn map<'viewee, 'a>(&self, view: &'a mut ScopedView<'viewee>) -> &'a mut ScopedView<'viewee> { - view.map(&|s| self.process(s)) + view.map(&|s| self.act(s)) } } diff --git a/src/stages/normalization/mod.rs b/src/actions/normalization/mod.rs similarity index 83% rename from src/stages/normalization/mod.rs rename to src/actions/normalization/mod.rs index eec5a41b..3cceadc0 100644 --- a/src/stages/normalization/mod.rs +++ b/src/actions/normalization/mod.rs @@ -1,4 +1,4 @@ -use super::Stage; +use super::Action; use unicode_categories::UnicodeCategories; use unicode_normalization::UnicodeNormalization; @@ -6,11 +6,10 @@ use unicode_normalization::UnicodeNormalization; /// /// Uses NFD (Normalization Form D), canonical decomposition. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct NormalizationStage {} +pub struct Normalization {} -impl Stage for NormalizationStage { - fn process(&self, input: &str) -> String { +impl Action for Normalization { + fn act(&self, input: &str) -> String { input.nfd().filter(|c| !c.is_mark()).collect() } } diff --git a/src/stages/replace/mod.rs b/src/actions/replace/mod.rs similarity index 77% rename from src/stages/replace/mod.rs rename to src/actions/replace/mod.rs index be4333c9..fa443961 100644 --- a/src/stages/replace/mod.rs +++ b/src/actions/replace/mod.rs @@ -1,7 +1,7 @@ use log::info; use unescape::unescape; -use super::Stage; +use super::Action; /// Replaces input with a fixed string. /// @@ -9,17 +9,17 @@ use super::Stage; /// /// ```rust /// use srgn::RegexPattern; -/// use srgn::stages::{Stage, ReplacementStage}; +/// use srgn::actions::{Action, Replacement}; /// use srgn::scoping::{ScopedViewBuilder, regex::Regex}; /// -/// let stage = ReplacementStage::try_from("_".to_string()).unwrap(); +/// let action = Replacement::try_from("_".to_string()).unwrap(); /// let scoper = Regex::new(RegexPattern::new(r"[^a-zA-Z0-9]+").unwrap()); /// let mut view = ScopedViewBuilder::new("hyphenated-variable-name").explode_from_scoper( /// &scoper /// ).build(); /// /// assert_eq!( -/// stage.map(&mut view).to_string(), +/// action.map(&mut view).to_string(), /// "hyphenated_variable_name" /// ); /// ``` @@ -28,10 +28,10 @@ use super::Stage; /// /// ```rust /// use srgn::RegexPattern; -/// use srgn::stages::{Stage, ReplacementStage}; +/// use srgn::actions::{Action, Replacement}; /// use srgn::scoping::{ScopedViewBuilder, regex::Regex}; /// -/// let stage = ReplacementStage::try_from(":(".to_string()).unwrap(); +/// let action = Replacement::try_from(":(".to_string()).unwrap(); /// // A Unicode character class category. See also /// // https://github.com/rust-lang/regex/blob/061ee815ef2c44101dba7b0b124600fcb03c1912/UNICODE.md#rl12-properties /// let scoper = Regex::new(RegexPattern::new(r"\p{Emoji}").unwrap()); @@ -40,17 +40,17 @@ use super::Stage; /// ).build(); /// /// assert_eq!( -/// stage.map(&mut view).to_string(), +/// action.map(&mut view).to_string(), /// // Party is over, sorry ¯\_(ツ)_/¯ /// "Party! :( :( :( :( So much fun! ╰(°▽°)╯" /// ); /// ``` #[derive(Debug, Clone, PartialEq, Eq, Default)] -pub struct ReplacementStage { +pub struct Replacement { replacement: String, } -impl TryFrom for ReplacementStage { +impl TryFrom for Replacement { type Error = String; fn try_from(replacement: String) -> Result { @@ -62,8 +62,8 @@ impl TryFrom for ReplacementStage { } } -impl Stage for ReplacementStage { - fn process(&self, input: &str) -> String { +impl Action for Replacement { + fn act(&self, input: &str) -> String { info!("Substituting '{}' with '{}'", input, self.replacement); self.replacement.clone() } diff --git a/src/stages/squeeze/mod.rs b/src/actions/squeeze/mod.rs similarity index 93% rename from src/stages/squeeze/mod.rs rename to src/actions/squeeze/mod.rs index f72dc9b1..78afa502 100644 --- a/src/stages/squeeze/mod.rs +++ b/src/actions/squeeze/mod.rs @@ -1,15 +1,14 @@ -use super::Stage; +use super::Action; use crate::scoping::{Scope, ScopedView}; use log::{debug, trace}; /// Squeezes all consecutive matched scopes into a single occurrence. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct SqueezeStage {} +pub struct Squeeze {} -impl Stage for SqueezeStage { - fn process(&self, _input: &str) -> String { - unimplemented!("Stage works without processing individual input") +impl Action for Squeeze { + fn act(&self, _input: &str) -> String { + unimplemented!("Action works without processing individual input") } fn map<'viewee, 'a>(&self, view: &'a mut ScopedView<'viewee>) -> &'a mut ScopedView<'viewee> { @@ -144,13 +143,13 @@ mod tests { " dirty Strings \t with \t\t messed up whitespace\n\n\n" )] fn test_squeeze(#[case] input: &str, #[case] pattern: RegexPattern, #[case] expected: &str) { - let stage = SqueezeStage {}; + let action = Squeeze {}; let builder = ScopedViewBuilder::new(input) .explode_from_scoper(&crate::scoping::regex::Regex::new(pattern.clone())); let mut view = builder.build(); - stage.map(&mut view); + action.map(&mut view); let result = view.to_string(); assert_eq!(result, expected); diff --git a/src/stages/symbols/inversion.rs b/src/actions/symbols/inversion.rs similarity index 75% rename from src/stages/symbols/inversion.rs rename to src/actions/symbols/inversion.rs index 54648828..fedcf956 100644 --- a/src/stages/symbols/inversion.rs +++ b/src/actions/symbols/inversion.rs @@ -1,24 +1,23 @@ -use crate::Stage; +use crate::Action; use super::Symbol; -/// Inverts all symbols inserted by [`SymbolsStage`]. +/// Inverts all symbols inserted by [`Symbols`]. /// -/// This is guaranteed to be the inverse of [`SymbolsStage`], as the replacements and +/// This is guaranteed to be the inverse of [`Symbols`], as the replacements and /// originals form a [bijection](https://en.wikipedia.org/wiki/Bijection). #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct SymbolsInversionStage {} +pub struct SymbolsInversion {} -impl Stage for SymbolsInversionStage { - fn process(&self, input: &str) -> String { +impl Action for SymbolsInversion { + fn act(&self, input: &str) -> String { input .chars() .map(|c| match Symbol::try_from(c) { Ok(s) => match s { // This is *horrible* as in the current implementation, we cannot // access these symbols. They are implicitly encoded in the - // `substitute` method of `SymbolsStage`. As such, this inversion + // `substitute` method of `Symbols`. As such, this inversion // can get out of sync with the original. There is a property test // in place to catch this. Symbol::EmDash => "---", diff --git a/src/stages/symbols/mod.rs b/src/actions/symbols/mod.rs similarity index 93% rename from src/stages/symbols/mod.rs rename to src/actions/symbols/mod.rs index 7147c33f..2450caf9 100644 --- a/src/stages/symbols/mod.rs +++ b/src/actions/symbols/mod.rs @@ -1,6 +1,6 @@ #[cfg(doc)] -use super::GermanStage; -use crate::Stage; +use super::German; +use crate::Action; #[cfg(test)] use enum_iterator::{all, Sequence}; use std::collections::VecDeque; @@ -10,11 +10,10 @@ pub mod inversion; /// Replace ASCII symbols (`--`, `->`, `!=`, ...) with proper Unicode equivalents (`–`, /// `→`, `≠`, ...). /// -/// This stage is greedy, i.e. it will try to replace as many symbols as possible, +/// This action is greedy, i.e. it will try to replace as many symbols as possible, /// replacing left-to-right as greedily as possible. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct SymbolsStage {} +pub struct Symbols {} macro_rules! fetch_next { ($it:expr, $stack:expr, $buf:expr $(, $label:tt)?) => { @@ -31,20 +30,20 @@ macro_rules! fetch_next { }; } -impl Stage for SymbolsStage { +impl Action for Symbols { /// ## Implementation note /// /// Only relevant when looking at the source code. /// /// The implementation is in the style of coroutines as presented [in this /// article](https://www.chiark.greenend.org.uk/~sgtatham/quasiblog/coroutines-philosophy/). - /// Instead of constructing an explicit state machine (like in [`GermanStage`]), we + /// Instead of constructing an explicit state machine (like in [`German`]), we /// use a generator coroutine to consume values from. The position in code itself is /// then our state. `undo_overfetching` is a bit like sending a value back into the /// coroutine so it can be yielded again. /// /// All in all, ugly and verbose, would not recommend, but a worthwhile experiment. - fn process(&self, input: &str) -> String { + fn act(&self, input: &str) -> String { let mut deque = input.chars().collect::>(); let mut out = String::new(); @@ -244,8 +243,8 @@ mod tests { #[case(">=", "≥")] #[case("!=", "≠")] fn test_symbol_substitution_base_cases(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -276,8 +275,8 @@ mod tests { #[case] input: &str, #[case] expected: &str, ) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -300,8 +299,8 @@ mod tests { #[case("A>=B", "A≥B")] #[case("A!=B", "A≠B")] fn test_symbol_substitution_neighboring_letters(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -327,8 +326,8 @@ mod tests { #[case] input: &str, #[case] expected: &str, ) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -348,8 +347,8 @@ mod tests { // #[case("<--X-->", "⟵X⟶")] fn test_symbol_substitution_disrupting_symbols(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -361,8 +360,8 @@ mod tests { #[case("A <= B => C", "A ≤ B ⇒ C")] #[case("->In->Out->", "→In→Out→")] fn test_symbol_substitution_sentences(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -397,8 +396,8 @@ mod tests { #[case("!=!=", "≠≠")] #[case("!=!=!=", "≠≠≠")] fn test_symbol_substitution_ambiguous_sequences(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -416,8 +415,8 @@ mod tests { #[case("≤", "≤")] #[case("≥", "≥")] fn test_symbol_substitution_existing_symbol(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } @@ -439,8 +438,8 @@ mod tests { #[case("https:/->", "https:/→")] #[case("https://->", "https://->")] // Pivot point fn test_symbol_substitution_uri(#[case] input: &str, #[case] expected: &str) { - let stage = SymbolsStage::default(); - let result = stage.process(input); + let action = Symbols::default(); + let result = action.act(input); assert_eq!(result, expected); } diff --git a/src/stages/titlecase/mod.rs b/src/actions/titlecase/mod.rs similarity index 72% rename from src/stages/titlecase/mod.rs rename to src/actions/titlecase/mod.rs index 661ecf32..34fd2840 100644 --- a/src/stages/titlecase/mod.rs +++ b/src/actions/titlecase/mod.rs @@ -1,13 +1,12 @@ -use super::Stage; +use super::Action; use titlecase::titlecase; /// Renders in titlecase. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct TitlecaseStage {} +pub struct Titlecase {} -impl Stage for TitlecaseStage { - fn process(&self, input: &str) -> String { +impl Action for Titlecase { + fn act(&self, input: &str) -> String { titlecase(input) } } @@ -27,7 +26,7 @@ mod tests { // #[case("a dime a dozen", "A Dime a Dozen")] fn test_titlecasing(#[case] input: &str, #[case] expected: &str) { - let result = TitlecaseStage::default().process(input); + let result = Titlecase::default().act(input); assert_eq!(result, expected); } } diff --git a/src/stages/upper/mod.rs b/src/actions/upper/mod.rs similarity index 82% rename from src/stages/upper/mod.rs rename to src/actions/upper/mod.rs index cdd7de9f..29c6cb79 100644 --- a/src/stages/upper/mod.rs +++ b/src/actions/upper/mod.rs @@ -1,12 +1,11 @@ -use super::Stage; +use super::Action; /// Renders in uppercase. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[allow(clippy::module_name_repetitions)] -pub struct UpperStage {} +pub struct Upper {} -impl Stage for UpperStage { - fn process(&self, input: &str) -> String { +impl Action for Upper { + fn act(&self, input: &str) -> String { input.replace('ß', "ẞ").to_uppercase() } } @@ -47,7 +46,7 @@ mod tests { // Emojis #[case("👋\0", "👋\0")] fn substitute(#[case] input: &str, #[case] expected: &str) { - let result = UpperStage::default().process(input); + let result = Upper::default().act(input); assert_eq!(result, expected); } } diff --git a/src/lib.rs b/src/lib.rs index 67172395..6a30797e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,15 +12,15 @@ #![allow(missing_docs)] #![allow(clippy::module_name_repetitions)] +pub use crate::actions::Action; use crate::scoping::ScopedViewBuilder; -pub use crate::stages::Stage; use log::debug; use scoping::ScopedViewBuildStep; use std::io::Error; +/// Main components around [`Action`]s and their [processing][Action::substitute]. +pub mod actions; pub mod scoping; -/// Main components around [`Stage`]s and their [processing][Stage::substitute]. -pub mod stages; pub mod text; /// Pattern signalling global scope, aka matching entire inputs. @@ -30,27 +30,27 @@ pub const GLOBAL_SCOPE: &str = r".*"; /// underlying implementation. pub use fancy_regex::Regex as RegexPattern; -/// Apply the list of [stages][Stage] to a source, writing results to the given +/// Apply the list of [actions][Action] to a source, writing results to the given /// destination. /// -/// The stages will be applied in the order given. The source is expected to be +/// The actions will be applied in the order given. The source is expected to be /// UTF-8-encoded text, and will be read [line-by-line][BufRead::read_line]. Each /// processed line will be written to the destination immediately. /// -/// # Example: Using a single stage (German) +/// # Example: Using a single action (German) /// -/// See also [`crate::stages::GermanStage`]. +/// See also [`crate::actions::German`]. /// /// /// ``` -/// use srgn::{apply, scoping::{ScopedViewBuildStep, regex::Regex}, stages::GermanStage, Stage}; +/// use srgn::{apply, scoping::{ScopedViewBuildStep, regex::Regex}, actions::German, Action}; /// -/// let stages: &[Box] = &[Box::new(GermanStage::default())]; +/// let actions: &[Box] = &[Box::new(German::default())]; /// let scopers: &[Box] = &[Box::new(Regex::default())]; /// /// let mut input = "Gruess Gott!\n"; /// -/// let result = apply(input, &scopers, &stages).unwrap(); +/// let result = apply(input, &scopers, &actions).unwrap(); /// assert_eq!(result, "Grüß Gott!\n"); /// ``` /// @@ -58,14 +58,14 @@ pub use fancy_regex::Regex as RegexPattern; /// /// An error will be returned in the following cases: /// -/// - when a [`Stage`] fails its substitution +/// - when an [`Action`] fails its substitution /// - when the source cannot be read /// - when the destination cannot be written to /// - when the destination cannot be flushed before exiting pub fn apply( input: &str, scopers: &[Box], - stages: &[Box], + actions: &[Box], ) -> Result { let mut builder = ScopedViewBuilder::new(input); for scoper in scopers { @@ -74,9 +74,9 @@ pub fn apply( let mut view = builder.build(); - for stage in stages { - debug!("Applying stage {:?}", stage); - stage.map(&mut view); + for action in actions { + debug!("Applying action {:?}", action); + action.map(&mut view); } Ok(view.to_string()) diff --git a/src/main.rs b/src/main.rs index 8cca0130..4842a6ca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,32 +1,32 @@ use log::{debug, info, warn, LevelFilter}; -use srgn::scoping::{ - langs::{ - csharp::{CSharp, CSharpQuery}, - python::{Python, PythonQuery}, - typescript::{TypeScript, TypeScriptQuery}, - }, - literal::Literal, - ScopedViewBuildStep, ScoperBuildError, -}; #[cfg(feature = "deletion")] -use srgn::stages::DeletionStage; +use srgn::actions::Deletion; #[cfg(feature = "german")] -use srgn::stages::GermanStage; +use srgn::actions::German; #[cfg(feature = "lower")] -use srgn::stages::LowerStage; +use srgn::actions::Lower; #[cfg(feature = "normalization")] -use srgn::stages::NormalizationStage; +use srgn::actions::Normalization; #[cfg(feature = "replace")] -use srgn::stages::ReplacementStage; +use srgn::actions::Replacement; #[cfg(feature = "squeeze")] -use srgn::stages::SqueezeStage; +use srgn::actions::Squeeze; #[cfg(feature = "titlecase")] -use srgn::stages::TitlecaseStage; +use srgn::actions::Titlecase; #[cfg(feature = "upper")] -use srgn::stages::UpperStage; +use srgn::actions::Upper; #[cfg(feature = "symbols")] -use srgn::stages::{SymbolsInversionStage, SymbolsStage}; -use srgn::{apply, scoping::regex::Regex, Stage}; +use srgn::actions::{Symbols, SymbolsInversion}; +use srgn::scoping::{ + langs::{ + csharp::{CSharp, CSharpQuery}, + python::{Python, PythonQuery}, + typescript::{TypeScript, TypeScriptQuery}, + }, + literal::Literal, + ScopedViewBuildStep, ScoperBuildError, +}; +use srgn::{apply, scoping::regex::Regex, Action}; use std::io::{self, Error, Read, Write}; fn main() -> Result<(), Error> { @@ -59,12 +59,13 @@ fn main() -> Result<(), Error> { }, }; - let stages = assemble_stages(&args).map_err(|e| Error::new(io::ErrorKind::InvalidInput, e))?; + let actions = + assemble_actions(&args).map_err(|e| Error::new(io::ErrorKind::InvalidInput, e))?; let mut buf = String::new(); std::io::stdin().read_to_string(&mut buf)?; - let result = apply(&buf, &scopers, &stages)?; + let result = apply(&buf, &scopers, &actions)?; let mut destination = io::stdout(); destination.write_all(result.as_bytes())?; @@ -123,78 +124,78 @@ fn assemble_scopers( Ok(scopers) } -fn assemble_stages(args: &cli::Cli) -> Result>, String> { - let mut stages: Vec> = Vec::new(); +fn assemble_actions(args: &cli::Cli) -> Result>, String> { + let mut actions: Vec> = Vec::new(); #[cfg(feature = "replace")] - if let Some(replacement) = args.composable_stages.replace.clone() { - stages.push(Box::new(ReplacementStage::try_from(replacement)?)); - debug!("Loaded stage: Replacement"); + if let Some(replacement) = args.composable_actions.replace.clone() { + actions.push(Box::new(Replacement::try_from(replacement)?)); + debug!("Loaded action: Replacement"); } #[cfg(feature = "squeeze")] - if args.standalone_stages.squeeze { - stages.push(Box::::default()); - debug!("Loaded stage: Squeeze"); + if args.standalone_actions.squeeze { + actions.push(Box::::default()); + debug!("Loaded action: Squeeze"); } #[cfg(feature = "german")] - if args.composable_stages.german { - stages.push(Box::new(GermanStage::new( + if args.composable_actions.german { + actions.push(Box::new(German::new( // Smell? Bug if bools swapped. args.german_options.german_prefer_original, args.german_options.german_naive, ))); - debug!("Loaded stage: German"); + debug!("Loaded action: German"); } #[cfg(feature = "symbols")] - if args.composable_stages.symbols { + if args.composable_actions.symbols { if args.options.invert { - stages.push(Box::::default()); - debug!("Loaded stage: SymbolsInversion"); + actions.push(Box::::default()); + debug!("Loaded action: SymbolsInversion"); } else { - stages.push(Box::::default()); - debug!("Loaded stage: Symbols"); + actions.push(Box::::default()); + debug!("Loaded action: Symbols"); } } #[cfg(feature = "deletion")] - if args.standalone_stages.delete { - stages.push(Box::::default()); - debug!("Loaded stage: Deletion"); + if args.standalone_actions.delete { + actions.push(Box::::default()); + debug!("Loaded action: Deletion"); } #[cfg(feature = "upper")] - if args.composable_stages.upper { - stages.push(Box::::default()); - debug!("Loaded stage: Upper"); + if args.composable_actions.upper { + actions.push(Box::::default()); + debug!("Loaded action: Upper"); } #[cfg(feature = "lower")] - if args.composable_stages.lower { - stages.push(Box::::default()); - debug!("Loaded stage: Lower"); + if args.composable_actions.lower { + actions.push(Box::::default()); + debug!("Loaded action: Lower"); } #[cfg(feature = "titlecase")] - if args.composable_stages.titlecase { - stages.push(Box::::default()); - debug!("Loaded stage: Titlecase"); + if args.composable_actions.titlecase { + actions.push(Box::::default()); + debug!("Loaded action: Titlecase"); } #[cfg(feature = "normalization")] - if args.composable_stages.normalize { - stages.push(Box::::default()); - debug!("Loaded stage: Normalization"); + if args.composable_actions.normalize { + actions.push(Box::::default()); + debug!("Loaded action: Normalization"); } - if stages.is_empty() { + if actions.is_empty() { // Doesn't hurt, but warn loudly - warn!("No stages loaded, will return input unchanged"); + warn!("No actions loaded, will return input unchanged"); } - Ok(stages) + Ok(actions) } /// To the default log level found in the environment, adds the requested additional @@ -242,7 +243,7 @@ mod cli { /// /// If string literal mode is requested, will be interpreted as a literal string. /// - /// Stages will apply their transformations within this scope only. + /// Actions will apply their transformations within this scope only. /// /// The default is the global scope, matching the entire input. /// @@ -257,10 +258,10 @@ mod cli { pub scope: String, #[command(flatten)] - pub composable_stages: ComposableStages, + pub composable_actions: ComposableActions, #[command(flatten)] - pub standalone_stages: StandaloneStages, + pub standalone_actions: StandaloneActions, #[command(flatten)] pub options: GlobalOptions, @@ -270,21 +271,21 @@ mod cli { #[cfg(feature = "german")] #[command(flatten)] - pub german_options: GermanStageOptions, + pub german_options: GermanOptions, } #[derive(Parser, Debug)] #[group(required = false, multiple = true)] #[command(next_help_heading = "Options (global)")] pub(super) struct GlobalOptions { - /// Undo the effects of passed stages, where applicable + /// Undo the effects of passed actions, where applicable /// /// Requires a 1:1 mapping (bijection) between replacements and original, which /// is currently available for: /// /// - symbols: '≠' <-> '!=' etc. /// - /// Other stages: + /// Other actions: /// /// - german: inverting e.g. 'Ä' is ambiguous (can be 'Ae' or 'AE') /// @@ -316,13 +317,13 @@ mod cli { #[derive(Parser, Debug)] #[group(required = false, multiple = true)] - #[command(next_help_heading = "Composable Stages")] - pub(super) struct ComposableStages { + #[command(next_help_heading = "Composable Actions")] + pub(super) struct ComposableActions { /// Replace scope by this (fixed) value /// - /// Specially treated stage for ergonomics and compatibility with `tr`. + /// Specially treated action for ergonomics and compatibility with `tr`. /// - /// If given, will run before any other stage. + /// If given, will run before any other action. #[cfg(feature = "replace")] #[arg(value_name = "REPLACEMENT", env, verbatim_doc_comment)] pub replace: Option, @@ -372,19 +373,19 @@ mod cli { #[derive(Parser, Debug)] #[group(required = false, multiple = false)] - #[command(next_help_heading = "Standalone Stages (only usable alone)")] - pub(super) struct StandaloneStages { + #[command(next_help_heading = "Standalone Actions (only usable alone)")] + pub(super) struct StandaloneActions { /// Delete scope /// - /// Cannot be used with any other stage: no point in deleting and performing any - /// other action. Sibling stages would either receive empty input or have their + /// Cannot be used with any other action: no point in deleting and performing any + /// other action. Sibling actions would either receive empty input or have their /// work wiped. #[cfg(feature = "deletion")] #[arg( short, long, requires = "scope", - conflicts_with = stringify!(ComposableStages), + conflicts_with = stringify!(ComposableActions), verbatim_doc_comment )] pub delete: bool, @@ -453,7 +454,7 @@ mod cli { #[derive(Parser, Debug)] #[group(required = false, multiple = true, id("german-opts"))] #[command(next_help_heading = "Options (german)")] - pub(super) struct GermanStageOptions { + pub(super) struct GermanOptions { /// When some original version and its replacement are equally legal, prefer the /// original and do not modify. /// diff --git a/src/scoping/regex.rs b/src/scoping/regex.rs index 39b982d3..aed9fa71 100644 --- a/src/scoping/regex.rs +++ b/src/scoping/regex.rs @@ -336,7 +336,7 @@ mod tests { ); assert!( - n_tries > 250, + n_tries > 100, // Might happen in CI, but we should ensure a certain lower bound; // locally, many more tests can run. "Too few tries; is the host machine very slow?" diff --git a/tests/properties/lower.rs b/tests/properties/lower.rs index 3e068852..3c68b561 100644 --- a/tests/properties/lower.rs +++ b/tests/properties/lower.rs @@ -1,7 +1,7 @@ use proptest::prelude::*; use srgn::{ + actions::{Action, Lower}, scoping::ScopedViewBuilder, - stages::{LowerStage, Stage}, }; use crate::properties::DEFAULT_NUMBER_OF_TEST_CASES; @@ -14,9 +14,9 @@ proptest! { // https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values input in r"\p{Lowercase_Letter}*" ) { - let stage = LowerStage::default(); + let action = Lower::default(); let mut view = ScopedViewBuilder::new(&input).build(); - stage.map(&mut view); + action.map(&mut view); let res = view.to_string(); assert_eq!(res, input); diff --git a/tests/properties/squeeze.rs b/tests/properties/squeeze.rs index 03393951..8ae8a304 100644 --- a/tests/properties/squeeze.rs +++ b/tests/properties/squeeze.rs @@ -1,8 +1,8 @@ use proptest::prelude::*; use srgn::{ + actions::Squeeze, scoping::{regex::Regex, ScopedViewBuilder}, - stages::SqueezeStage, - RegexPattern, Stage, + Action, RegexPattern, }; use crate::properties::DEFAULT_NUMBER_OF_TEST_CASES; @@ -15,12 +15,12 @@ proptest! { // https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values input in r"\p{Any}*AA\p{Any}*" ) { - let stage = SqueezeStage::default(); + let action = Squeeze::default(); let mut view = ScopedViewBuilder::new(&input).explode_from_scoper( &Regex::new(RegexPattern::new("A").unwrap()) ).build(); - stage.map(&mut view); + action.map(&mut view); let res = view.to_string(); assert!(res.len() < input.len()); diff --git a/tests/properties/symbols.rs b/tests/properties/symbols.rs index 594f33ff..e3f819fc 100644 --- a/tests/properties/symbols.rs +++ b/tests/properties/symbols.rs @@ -1,8 +1,8 @@ use proptest::prelude::*; use srgn::{ + actions::{Symbols, SymbolsInversion}, scoping::ScopedViewBuilder, - stages::{SymbolsInversionStage, SymbolsStage}, - Stage, + Action, }; use crate::properties::DEFAULT_NUMBER_OF_TEST_CASES; @@ -10,8 +10,8 @@ use crate::properties::DEFAULT_NUMBER_OF_TEST_CASES; proptest! { #![proptest_config(ProptestConfig::with_cases(DEFAULT_NUMBER_OF_TEST_CASES * 2))] /// Cannot be idempotent on non-ASCII input. Input might contain e.g. en-dash, which - /// the symbols stage will leave untouched, but will be decomposed into two hyphens - /// by the symbols inversion stage. + /// the symbols action will leave untouched, but will be decomposed into two hyphens + /// by the symbols inversion action. #[test] fn test_inverting_symbols_is_idempotent_on_ascii_input( // https://docs.rs/regex/latest/regex/#matching-one-character @@ -20,13 +20,13 @@ proptest! { ) { let applied = { let mut view = ScopedViewBuilder::new(&input).build(); - SymbolsStage::default().map(&mut view); + Symbols::default().map(&mut view); view.to_string() }; let inverted = { let mut view = ScopedViewBuilder::new(&applied).build(); - SymbolsInversionStage::default().map(&mut view); + SymbolsInversion::default().map(&mut view); view.to_string() }; diff --git a/tests/properties/upper.rs b/tests/properties/upper.rs index 47370a62..e54a166e 100644 --- a/tests/properties/upper.rs +++ b/tests/properties/upper.rs @@ -1,5 +1,5 @@ use proptest::prelude::*; -use srgn::{scoping::ScopedViewBuilder, stages::UpperStage, Stage}; +use srgn::{actions::Upper, scoping::ScopedViewBuilder, Action}; use crate::properties::DEFAULT_NUMBER_OF_TEST_CASES; proptest! { @@ -10,10 +10,10 @@ proptest! { // https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values input in r"\p{Uppercase_Letter}*" ) { - let stage = UpperStage::default(); + let action = Upper::default(); let mut view = ScopedViewBuilder::new(&input).build(); - stage.map(&mut view); + action.map(&mut view); let res = view.to_string(); assert_eq!(res, input);