diff --git a/CHANGELOG.md b/CHANGELOG.md index b8f7908..e4b9b52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This project attempts to adhere to [Semantic Versioning](http://semver.org). ### Fixed - Fix transposition counting in Jaro and Jaro-Winkler. +- Limit common prefix in Jaro-Winkler to 4 characters ## [0.10.0] - (2020-01-31) diff --git a/README.md b/README.md index 3e19575..f3dcd96 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ - [Levenshtein] - distance & normalized - [Optimal string alignment] - [Damerau-Levenshtein] - distance & normalized - - [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length + - [Jaro and Jaro-Winkler] - [Sørensen-Dice] The normalized versions return values between `0.0` and `1.0`, where `1.0` means diff --git a/src/lib.rs b/src/lib.rs index 6f9bec7..9aff1e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -196,21 +196,14 @@ where { let jaro_distance = generic_jaro(a, b); - // Don't limit the length of the common prefix let prefix_length = a .into_iter() + .take(4) .zip(b) .take_while(|(a_elem, b_elem)| a_elem == b_elem) .count(); - let jaro_winkler_distance = - jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance)); - - if jaro_winkler_distance <= 1.0 { - jaro_winkler_distance - } else { - 1.0 - } + return jaro_distance + 0.1 * prefix_length as f64 * (1.0 - jaro_distance); } /// Like Jaro but gives a boost to strings that have a common prefix. @@ -218,7 +211,7 @@ where /// ``` /// use strsim::jaro_winkler; /// -/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < +/// assert!((0.866 - jaro_winkler("cheeseburger", "cheese fries")).abs() < /// 0.001); /// ``` pub fn jaro_winkler(a: &str, b: &str) -> f64 { @@ -968,7 +961,7 @@ mod tests { #[test] fn jaro_winkler_long_prefix() { - assert_delta!(0.911, jaro_winkler("cheeseburger", "cheese fries"), 0.001); + assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001); } #[test] @@ -984,7 +977,7 @@ mod tests { #[test] fn jaro_winkler_very_long_prefix() { assert_delta!( - 1.0, + 0.985, jaro_winkler("thequickbrownfoxjumpedoverx", "thequickbrownfoxjumpedovery") ); } diff --git a/tests/lib.rs b/tests/lib.rs index c170be0..991fc6f 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -67,5 +67,5 @@ fn jaro_works() { #[test] fn jaro_winkler_works() { - assert_delta!(0.911, jaro_winkler("cheeseburger", "cheese fries"), 0.001); + assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001); }