From 6ac74068b8c443ea300e24d02b30406e89de509a Mon Sep 17 00:00:00 2001 From: Noemi <45180344+unflxw@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:24:06 +0100 Subject: [PATCH] Avoid `Vec::remove` with `None` token Using `Vec::remove(i)` causes the `n` tokens between position `i` and the end of the vector to be shifted, which is an `O(n)` operation. When done in a loop, as it is done in our `INSERT INTO ... VALUES` repeated list sanitisation, it makes the overall performance `O(n^2)`. To avoid using `Vec::remove`, implement a `Token::None` value that represents a "non-token", a gap to be ignored in the token list. Replace calls to `Vec::remove(i)` with replacing the token at position `i` with `Token::None`. Replace calls to `Vec::insert` for the same reason -- luckily, when we insert a token, it's a replacement for a different token, so it's possible to rewrite them in that manner. These are rarely done in a loop, though, so the impact on sanitisation performance is much more limited. --- src/lib.rs | 4 ++++ src/sanitizer.rs | 29 +++++++++++++++++++++-------- src/writer.rs | 1 + 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cae497f..b67f7b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -137,8 +137,12 @@ pub enum Token { SquareBracketClose, Colon, Semicolon, + // Used by the sanitizer to replace values Placeholder, + // Used by the sanitizer to replace repeated value lists Ellipsis, + // Used by the sanitizer to remove tokens + None, Null, True, False, diff --git a/src/sanitizer.rs b/src/sanitizer.rs index b9ff10c..cf241ac 100644 --- a/src/sanitizer.rs +++ b/src/sanitizer.rs @@ -97,14 +97,17 @@ impl SqlSanitizer { if keep { kept += 1; } else { + let mut removed = 1; self.remove(pos + kept); while kept > 0 { kept -= 1; + removed += 1; self.remove(pos + kept); } + pos += removed; } } - self.sql.tokens.insert(start_pos, Token::Ellipsis); + self.ellipsis(start_pos); } Token::ParentheseClose | Token::SquareBracketClose => state = State::Default, Token::Dot if state == State::JoinOn => (), @@ -146,23 +149,28 @@ impl SqlSanitizer { match self.sql.tokens[pos] { Token::ParentheseClose => break, Token::SquareBracketClose => break, - _ => self.remove(pos), + _ => { + self.remove(pos); + pos += 1; + }, } } - self.sql.tokens.insert(start_pos, Token::Placeholder); + self.placeholder(start_pos); } _ => (), } } // Remove comments Token::Comment(_) => { - self.sql.tokens.remove(pos); + self.remove(pos); if self.sql.tokens.get(pos - 1) == Some(&Token::Space) { - self.sql.tokens.remove(pos - 1); + self.remove(pos - 1); } } // Spaces don't influence the state by default Token::Space => (), + // Non-tokens don't influence the state + Token::None => (), // Keep state the same if we're in a insert values or keyword scope state _ if state == State::InsertValues || state == State::KeywordScopeStarted => (), // Reset state to default if there were no matches @@ -176,12 +184,17 @@ impl SqlSanitizer { } fn remove(&mut self, position: usize) { - self.sql.tokens.remove(position); + self.sql.tokens[position] = Token::None; } + // Replaces the token at position `position` with a placeholder. fn placeholder(&mut self, position: usize) { - self.sql.tokens.remove(position); - self.sql.tokens.insert(position, Token::Placeholder); + self.sql.tokens[position] = Token::Placeholder; + } + + // Replaces the token at position `position` with an ellipsis. + fn ellipsis(&mut self, position: usize) { + self.sql.tokens[position] = Token::Ellipsis; } } diff --git a/src/writer.rs b/src/writer.rs index 70aa426..3216f9a 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -171,6 +171,7 @@ impl SqlWriter { Token::Semicolon => out.push(';'), Token::Placeholder => out.push('?'), Token::Ellipsis => out.push_str("..."), + Token::None => {}, Token::Null => out.push_str("NULL"), Token::True => out.push_str("TRUE"), Token::False => out.push_str("FALSE"),