Skip to content

Commit

Permalink
Optimise by filtering out empty values.
Browse files Browse the repository at this point in the history
The `fits` function (and to a lesser extent `apply`) previously did vast
amounts of pointless work on empty values that could not succeed. This
commit does something very simple: it pre-filters out all the empty
values so that `fits` has a lot less work to do.

On small grammars, the quantity of pointless work probably wasn't very
noticeable, but on large grammars like Postgresql's, it became
punishing. On my machine this commit takes nimbleparse's running time
down from 101s to 3s.
  • Loading branch information
ltratt committed Nov 22, 2024
1 parent 0b8c682 commit b9823e6
Showing 1 changed file with 24 additions and 17 deletions.
41 changes: 24 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,25 @@ fn compress<T: Clone + Copy + PartialEq>(
let mut r = Vec::new(); // Result vector
r.resize(row_length, empty_val);

let mut dv = Vec::new(); // displacement vector
dv.resize(sorted.len(), 0);
let mut dv = vec![0; sorted.len()]; // displacement vector

let mut tmp = Vec::new();
for s in sorted {
let slice = &vec[s * row_length..(s + 1) * row_length];
// The row we're about to iterate over typically contains mostly empty values that can
// never succeed with `fits`. We pre-filter out all those empty values up-front, such that
// `tmp` contains `(index, non-empty-value)` pairs that we can then pass to `fits`. Because
// this is such a tight loop, we reuse the same `Vec` to avoid repeated allocations.
tmp.clear();
for (i, v) in vec[s * row_length..(s + 1) * row_length].iter().enumerate() {
if *v != empty_val {
tmp.push((i, v));
}
}

let mut d = 0; // displacement value
loop {
if fits(slice, &r, d, empty_val) {
apply(slice, &mut r, d, empty_val);
if fits(tmp.as_slice(), &r, d, empty_val) {
apply(tmp.as_slice(), &mut r, d);
dv[*s] = d;
break;
} else {
Expand All @@ -196,27 +206,24 @@ fn compress<T: Clone + Copy + PartialEq>(
(r, dv)
}

fn fits<T: PartialEq>(v: &[T], target: &[T], d: usize, empty_val: T) -> bool {
for i in 0..v.len() {
if v[i] != empty_val && target[d + i] != empty_val && target[d + i] != v[i] {
/// `v` is an array of `(index, non-empty_val)` pairs.
fn fits<T: PartialEq>(v: &[(usize, &T)], target: &[T], d: usize, empty_val: T) -> bool {
for (i, x) in v {
if target[d + i] != empty_val && target[d + i] != **x {
return false;
}
}
true
}

fn apply<T: Copy + PartialEq>(v: &[T], target: &mut [T], d: usize, empty_val: T) {
for i in 0..v.len() {
if v[i] != empty_val {
target[d + i] = v[i]
}
/// `v` is an array of `(index, non-empty_val)` pairs.
fn apply<T: Copy + PartialEq>(v: &[(usize, &T)], target: &mut [T], d: usize) {
for (i, x) in v {
target[d + i] = **x;
}
}

fn sort<T: PartialEq>(v: &[T], empty_val: T, row_length: usize) -> Vec<usize>
where
T: PartialEq<T>,
{
fn sort<T: PartialEq>(v: &[T], empty_val: T, row_length: usize) -> Vec<usize> {
let mut o: Vec<usize> = (0..v.len() / row_length).collect();
o.sort_by_key(|x| {
v[(x * row_length)..((x + 1) * row_length)]
Expand Down

0 comments on commit b9823e6

Please sign in to comment.