From a10ecddfac958ac577d8807c434a4be550c24beb Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Tue, 15 Sep 2020 14:14:01 +0300 Subject: [PATCH] parser: Accept bytes as input In addition to (Unicode) strings, also accept "bytes" (and corresponding iterators) as input to the parser. This allows skipping the decode/encode step when reading raw data from a file or socket, e.g. with os.read(). This introduces small, but measurable performance increase for such cases. --- jq.pyx | 14 +++++++++----- tests/jq_tests.py | 4 +++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/jq.pyx b/jq.pyx index a8ead27..86c5b93 100644 --- a/jq.pyx +++ b/jq.pyx @@ -163,7 +163,11 @@ cdef class _JSONParser(object): cdef char* cbytes cdef ssize_t clen try: - self._bytes = next(self._text_iter).encode("utf8") + text = next(self._text_iter) + if isinstance(text, bytes): + self._bytes = text + else: + self._bytes = text.encode("utf8") PyBytes_AsStringAndSize(self._bytes, &cbytes, &clen) jv_parser_set_buf(self._parser, cbytes, clen, 1) except StopIteration: @@ -419,9 +423,10 @@ def parse_json(text=_NO_VALUE, text_iter=_NO_VALUE): Either "text" or "text_iter" must be specified. Args: - text: A string containing the JSON stream to parse. - text_iter: An iterator returning strings - pieces of the JSON stream - to parse. + text: A string or bytes object containing the JSON stream to + parse. + text_iter: An iterator returning strings or bytes - pieces of the + JSON stream to parse. Returns: An iterator returning parsed values. @@ -442,7 +447,6 @@ def parse_json_file(fp): Args: fp: The file-like object to read the JSON stream from. - Must be in text mode. Returns: An iterator returning parsed values. diff --git a/tests/jq_tests.py b/tests/jq_tests.py index 5a90bba..e90df58 100644 --- a/tests/jq_tests.py +++ b/tests/jq_tests.py @@ -198,9 +198,11 @@ def program_string_can_be_retrieved_from_program(): assert_equal(".", program.program_string) @istest -def parse_json_both_text_and_text_iter_accepted(): +def parse_json_all_inputs_accepted(): assert_equal(True, next(jq.parse_json(text="true"))) assert_equal(True, next(jq.parse_json(text_iter=iter(["true"])))) + assert_equal(True, next(jq.parse_json(text=b"true"))) + assert_equal(True, next(jq.parse_json(text_iter=iter([b"true"])))) @istest def parse_json_file_works():