Skip to content

Commit

Permalink
feat: Inversion
Browse files Browse the repository at this point in the history
Currently only possible/viable for symbols stage

Other, like german, might follow but there's downsides.
  • Loading branch information
alexpovel committed Sep 17, 2023
1 parent e0b097a commit b0c3b6b
Show file tree
Hide file tree
Showing 13 changed files with 370 additions and 18 deletions.
134 changes: 131 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ num_cpus = "1.16.0"
rand = "0.8.5"
rand_regex = "0.16.0"
test-log = "0.2.12"
proptest = "1.2.0"
enum-iterator = "1.4.1"

[profile.dev.package.insta]
# https://insta.rs/docs/quickstart/#optional-faster-runs
Expand Down
33 changes: 28 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ use betterletters::stages::GermanStage;
use betterletters::stages::LowerStage;
#[cfg(feature = "squeeze")]
use betterletters::stages::SqueezeStage;
#[cfg(feature = "symbols")]
use betterletters::stages::SymbolsStage;
#[cfg(feature = "upper")]
use betterletters::stages::UpperStage;
#[cfg(feature = "symbols")]
use betterletters::stages::{SymbolsInversionStage, SymbolsStage};
use log::{debug, info};
use std::io::{self, BufReader, Error};

Expand Down Expand Up @@ -39,8 +39,13 @@ fn main() -> Result<(), Error> {
}

if args.symbols {
stages.push(Box::<SymbolsStage>::default());
debug!("Loaded stage: Symbols");
if args.invert {
stages.push(Box::<SymbolsInversionStage>::default());
debug!("Loaded stage: SymbolsInversion");
} else {
stages.push(Box::<SymbolsStage>::default());
debug!("Loaded stage: Symbols");
}
}

if args.delete {
Expand Down Expand Up @@ -100,7 +105,7 @@ mod cli {
#[arg(short, long, env = "GERMAN")]
pub german: bool,
/// Perform substitutions on symbols, such as '!=' to '≠', '->' to '→'
#[arg(short = 'S', long, env = "SYMBOLS")]
#[arg(short = 'S', long, env = "SYMBOLS", group = "invertible")]
pub symbols: bool,
/// Delete what was matched
///
Expand All @@ -126,6 +131,24 @@ mod cli {
/// Useful for names, which are otherwise not modifiable as they do not occur in
/// dictionaries. Called 'naive' as this does not perform legal checks.
pub german_naive: bool,
/// Undo the effects of passed stages, where applicable
///
/// Requires a 1:1 mapping (bijection) between replacements and original, which
/// is currently available for:
///
/// - symbols: '≠' <-> '!=' etc.
///
/// Other stages:
///
/// - german: inverting e.g. 'Ä' is ambiguous (can be 'Ae' or 'AE')
///
/// - upper, lower, deletion, squeeze: inversion is impossible as information is
/// lost
///
/// These may still be passed, but will be ignored for inversion and applied
/// normally
#[arg(short, long, env = "INVERT", requires = "invertible")]
pub invert: bool,
}

impl Args {
Expand Down
27 changes: 27 additions & 0 deletions src/stages/german/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,4 +735,31 @@ mod tests {
let result = stage.substitute(input);
assert_eq!(result, expected);
}

#[rstest]
// Single letter. Notice the mapping is irreversible.
#[case("ue", "ü")]
#[case("uE", "ü")]
#[case("Ue", "Ü")]
#[case("UE", "Ü")]
//
// Beginning of word
#[case("uekol", "ükol")]
#[case("uEkol", "ükol")]
#[case("Uekol", "Ükol")]
#[case("UEkol", "Ükol")]
//
// Middle of word
#[case("guessa", "güßa")]
#[case("gUessa", "gÜßa")]
#[case("guEssa", "güßa")]
#[case("gUEssa", "gÜßa")]
#[case("Guessa", "Güßa")]
#[case("GUESSA", "GÜẞA")]
fn test_casing_when_being_naive(#[case] input: &str, #[case] expected: &str) {
let mut stage = GermanStage::default();
stage.naive();
let result = stage.substitute(input);
assert_eq!(result, expected);
}
}
1 change: 1 addition & 0 deletions src/stages/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub use deletion::DeletionStage;
pub use german::GermanStage;
pub use lower::LowerStage;
pub use squeeze::SqueezeStage;
pub use symbols::inversion::SymbolsInversionStage;
pub use symbols::SymbolsStage;
pub use upper::UpperStage;

Expand Down
42 changes: 42 additions & 0 deletions src/stages/symbols/inversion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use super::Symbol;
use crate::{scoped::Scoped, Stage};

/// Inverts all symbols inserted by [`SymbolsStage`].
///
/// This is guaranteed to be the inverse of [`SymbolsStage`], as the replacements and
/// originals form a [bijection](https://en.wikipedia.org/wiki/Bijection).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[allow(clippy::module_name_repetitions)]
pub struct SymbolsInversionStage {}

impl Scoped for SymbolsInversionStage {}

impl Stage for SymbolsInversionStage {
fn substitute(&self, input: &str) -> String {
input
.chars()
.map(|c| match Symbol::try_from(c) {
Ok(s) => match s {
// This is *horrible* as in the current implementation, we cannot
// access these symbols. They are implicitly encoded in the
// `substitute` method of `SymbolsStage`. As such, this inversion
// can get out of sync with the original. There is a property test
// in place to catch this.
Symbol::EmDash => "---",
Symbol::EnDash => "--",
Symbol::ShortRightArrow => "->",
Symbol::ShortLeftArrow => "<-",
Symbol::LongRightArrow => "-->",
Symbol::LongLeftArrow => "<--",
Symbol::LeftRightArrow => "<->",
Symbol::RightDoubleArrow => "=>",
Symbol::NotEqual => "!=",
Symbol::LessThanOrEqual => "<=",
Symbol::GreaterThanOrEqual => ">=",
}
.into(),
Err(_) => c.to_string(),
})
.collect()
}
}
Loading

0 comments on commit b0c3b6b

Please sign in to comment.