Skip to content

Commit

Permalink
limit common prefix in jaro-winkler
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Jan 5, 2024
1 parent 15e1a03 commit f6a7593
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This project attempts to adhere to [Semantic Versioning](http://semver.org).
### Fixed

- Fix transposition counting in Jaro and Jaro-Winkler.
- Limit common prefix in Jaro-Winkler to 4 characters

## [0.10.0] - (2020-01-31)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
- [Levenshtein] - distance & normalized
- [Optimal string alignment]
- [Damerau-Levenshtein] - distance & normalized
- [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length
- [Jaro and Jaro-Winkler]
- [Sørensen-Dice]

The normalized versions return values between `0.0` and `1.0`, where `1.0` means
Expand Down
17 changes: 5 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,29 +196,22 @@ where
{
let jaro_distance = generic_jaro(a, b);

// Don't limit the length of the common prefix
let prefix_length = a
.into_iter()
.take(4)
.zip(b)
.take_while(|(a_elem, b_elem)| a_elem == b_elem)
.count();

let jaro_winkler_distance =
jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));

if jaro_winkler_distance <= 1.0 {
jaro_winkler_distance
} else {
1.0
}
return jaro_distance + 0.1 * prefix_length as f64 * (1.0 - jaro_distance);
}

/// Like Jaro but gives a boost to strings that have a common prefix.
///
/// ```
/// use strsim::jaro_winkler;
///
/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
/// assert!((0.866 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
/// 0.001);
/// ```
pub fn jaro_winkler(a: &str, b: &str) -> f64 {
Expand Down Expand Up @@ -968,7 +961,7 @@ mod tests {

#[test]
fn jaro_winkler_long_prefix() {
assert_delta!(0.911, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
}

#[test]
Expand All @@ -984,7 +977,7 @@ mod tests {
#[test]
fn jaro_winkler_very_long_prefix() {
assert_delta!(
1.0,
0.985,
jaro_winkler("thequickbrownfoxjumpedoverx", "thequickbrownfoxjumpedovery")
);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,5 @@ fn jaro_works() {

#[test]
fn jaro_winkler_works() {
assert_delta!(0.911, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
}

0 comments on commit f6a7593

Please sign in to comment.