From b51b6b92d7ac69800f1f1cb9c531676409d2e9b0 Mon Sep 17 00:00:00 2001 From: ahiijny Date: Fri, 12 Feb 2021 12:58:03 -0500 Subject: [PATCH 1/2] Fix backslash handling in rows_from_chunks --- pydruid/db/api.py | 21 ++++++++++++++------- tests/db/test_rows_from_chunks.py | 12 ++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pydruid/db/api.py b/pydruid/db/api.py index 86b64e5e..3fbe94ae 100644 --- a/pydruid/db/api.py +++ b/pydruid/db/api.py @@ -394,20 +394,27 @@ def rows_from_chunks(chunks): body = "".join((body, chunk)) # find last complete row + # see also: https://www.json.org/ boundary = 0 brackets = 0 in_string = False + in_escape = False for i, char in enumerate(body): - if char == '"': - if not in_string: - in_string = True - elif body[i - 1] != "\\": - in_string = False - if in_string: + if in_escape: + # we're just looking for string boundaries, so we can + # ignore the trailing X in escapes like \uXXXX, since each + # of those X characters must be alphanumeric anyway + in_escape = False + elif char == '\\': + in_escape = True + elif char == '"': + in_string = False continue - if char == "{": + if char == '"': + in_string = True + elif char == "{": brackets += 1 elif char == "}": brackets -= 1 diff --git a/tests/db/test_rows_from_chunks.py b/tests/db/test_rows_from_chunks.py index 5b811811..e143a1e4 100644 --- a/tests/db/test_rows_from_chunks.py +++ b/tests/db/test_rows_from_chunks.py @@ -36,6 +36,18 @@ def test_rows_from_chunks_quote_in_string(self): result = list(rows_from_chunks(chunks)) self.assertEqual(result, expected) + def test_rows_from_chunks_string_ending_with_backslash(self): + chunks = [r'[{"name": "\\"}]'] + expected = [{"name": "\\"}] + result = list(rows_from_chunks(chunks)) + self.assertEqual(result, expected) + + def test_rows_from_chunks_multiple_rows_ending_with_backslashes(self): + chunks = [r'[{"name": "alice"}, {"name": "bob\\"}, {"name": "charlie\\"}]'] + expected = [{"name": "alice"}, {"name": "bob\\"}, {"name": "charlie\\"}] + result = list(rows_from_chunks(chunks)) + self.assertEqual(result, expected) + if __name__ == "__main__": unittest.main() From 4a3f7cce08de5b870ef4575215624756f125a301 Mon Sep 17 00:00:00 2001 From: ahiijny Date: Fri, 12 Feb 2021 14:42:58 -0500 Subject: [PATCH 2/2] Satisfy the code formatter --- pydruid/db/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydruid/db/api.py b/pydruid/db/api.py index 3fbe94ae..550fed0d 100644 --- a/pydruid/db/api.py +++ b/pydruid/db/api.py @@ -406,7 +406,7 @@ def rows_from_chunks(chunks): # ignore the trailing X in escapes like \uXXXX, since each # of those X characters must be alphanumeric anyway in_escape = False - elif char == '\\': + elif char == "\\": in_escape = True elif char == '"': in_string = False