Skip to content

Commit

Permalink
Avoid Vec::remove with None token
Browse files Browse the repository at this point in the history
Using `Vec::remove(i)` causes the `n` tokens between position `i` and
the end of the vector to be shifted, which is an `O(n)` operation.

When done in a loop, as it is done in our `INSERT INTO ... VALUES`
repeated list sanitisation, it makes the overall performance `O(n^2)`.

To avoid using `Vec::remove`, implement a `Token::None` value that
represents a "non-token", a gap to be ignored in the token list.

Replace calls to `Vec::remove(i)` with replacing the token at
position `i` with `Token::None`.

Replace calls to `Vec::insert` for the same reason -- luckily, when
we insert a token, it's a replacement for a different token, so it's
possible to rewrite them in that manner. These are rarely done in a
loop, though, so the impact on sanitisation performance is much more
limited.
  • Loading branch information
unflxw committed Dec 20, 2024
1 parent f17159a commit 6ac7406
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 8 deletions.
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,12 @@ pub enum Token {
SquareBracketClose,
Colon,
Semicolon,
// Used by the sanitizer to replace values
Placeholder,
// Used by the sanitizer to replace repeated value lists
Ellipsis,
// Used by the sanitizer to remove tokens
None,
Null,
True,
False,
Expand Down
29 changes: 21 additions & 8 deletions src/sanitizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,17 @@ impl SqlSanitizer {
if keep {
kept += 1;
} else {
let mut removed = 1;
self.remove(pos + kept);
while kept > 0 {
kept -= 1;
removed += 1;
self.remove(pos + kept);
}
pos += removed;
}
}
self.sql.tokens.insert(start_pos, Token::Ellipsis);
self.ellipsis(start_pos);
}
Token::ParentheseClose | Token::SquareBracketClose => state = State::Default,
Token::Dot if state == State::JoinOn => (),
Expand Down Expand Up @@ -146,23 +149,28 @@ impl SqlSanitizer {
match self.sql.tokens[pos] {
Token::ParentheseClose => break,
Token::SquareBracketClose => break,
_ => self.remove(pos),
_ => {
self.remove(pos);
pos += 1;
},
}
}
self.sql.tokens.insert(start_pos, Token::Placeholder);
self.placeholder(start_pos);
}
_ => (),
}
}
// Remove comments
Token::Comment(_) => {
self.sql.tokens.remove(pos);
self.remove(pos);
if self.sql.tokens.get(pos - 1) == Some(&Token::Space) {
self.sql.tokens.remove(pos - 1);
self.remove(pos - 1);
}
}
// Spaces don't influence the state by default
Token::Space => (),
// Non-tokens don't influence the state
Token::None => (),
// Keep state the same if we're in a insert values or keyword scope state
_ if state == State::InsertValues || state == State::KeywordScopeStarted => (),
// Reset state to default if there were no matches
Expand All @@ -176,12 +184,17 @@ impl SqlSanitizer {
}

fn remove(&mut self, position: usize) {
self.sql.tokens.remove(position);
self.sql.tokens[position] = Token::None;
}

// Replaces the token at position `position` with a placeholder.
fn placeholder(&mut self, position: usize) {
self.sql.tokens.remove(position);
self.sql.tokens.insert(position, Token::Placeholder);
self.sql.tokens[position] = Token::Placeholder;
}

// Replaces the token at position `position` with an ellipsis.
fn ellipsis(&mut self, position: usize) {
self.sql.tokens[position] = Token::Ellipsis;
}
}

Expand Down
1 change: 1 addition & 0 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ impl SqlWriter {
Token::Semicolon => out.push(';'),
Token::Placeholder => out.push('?'),
Token::Ellipsis => out.push_str("..."),
Token::None => {},
Token::Null => out.push_str("NULL"),
Token::True => out.push_str("TRUE"),
Token::False => out.push_str("FALSE"),
Expand Down

0 comments on commit 6ac7406

Please sign in to comment.