Skip to content

Commit

Permalink
Improve Osa implementation
Browse files Browse the repository at this point in the history
This reduces the binary size of osa_distance by more than 25% while
improving the performance.
  • Loading branch information
maxbachmann committed Jan 1, 2024
1 parent 5b512dc commit 3c3381c
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::collections::HashMap;
use std::error::Error;
use std::fmt::{self, Display, Formatter};
use std::hash::Hash;
use std::mem;
use std::str::Chars;

#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -288,18 +289,11 @@ pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
/// assert_eq!(3, osa_distance("ab", "bca"));
/// ```
pub fn osa_distance(a: &str, b: &str) -> usize {
let a_len = a.chars().count();
let b_len = b.chars().count();
if a == b {
return 0;
} else if a_len == 0 {
return b_len;
} else if b_len == 0 {
return a_len;
}

let mut prev_two_distances: Vec<usize> = (0..=b_len).collect();
let mut prev_distances: Vec<usize> = (0..=b_len).collect();
// 0..=b_len behaves like 0..b_len.saturating_add(1) which could be a different size
// this leads to significantly worse code gen when swapping the vectors below
let mut prev_two_distances: Vec<usize> = (0..b_len + 1).collect();
let mut prev_distances: Vec<usize> = (0..b_len + 1).collect();
let mut curr_distances: Vec<usize> = vec![0; b_len + 1];

let mut prev_a_char = char::MAX;
Expand All @@ -322,12 +316,15 @@ pub fn osa_distance(a: &str, b: &str) -> usize {
prev_b_char = b_char;
}

prev_two_distances.clone_from(&prev_distances);
prev_distances.clone_from(&curr_distances);
mem::swap(&mut prev_two_distances, &mut prev_distances);
mem::swap(&mut prev_distances, &mut curr_distances);
prev_a_char = a_char;
}

curr_distances[b_len]
// access prev_distances instead of curr_distances since we swapped
// them above. In case a is empty this would still contain the correct value
// from initializing the last element to b_len
prev_distances[b_len]
}

/* Returns the final index for a value in a single vector that represents a fixed
Expand Down

0 comments on commit 3c3381c

Please sign in to comment.