Skip to content

Commit

Permalink
parser: Accept bytes as input
Browse files Browse the repository at this point in the history
In addition to (Unicode) strings, also accept "bytes" (and corresponding
iterators) as input to the parser. This allows skipping the
decode/encode step when reading raw data from a file or socket, e.g.
with os.read(). This introduces small, but measurable performance
increase for such cases.
  • Loading branch information
spbnick committed Sep 15, 2020
1 parent 6b5af09 commit 6490f6f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
39 changes: 31 additions & 8 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ cdef object _jv_to_python(jv value):

cdef class _Parser(object):
cdef jv_parser* _parser
cdef object _text_iter
cdef object _bytes_iter
cdef object _bytes

def __dealloc__(self):
jv_parser_free(self._parser)

def __cinit__(self, text_iter):
def __cinit__(self, bytes_iter):
self._parser = jv_parser_new(0)
self._text_iter = text_iter
self._bytes_iter = bytes_iter
self._bytes = None

def __iter__(self):
Expand Down Expand Up @@ -149,7 +149,7 @@ cdef class _Parser(object):
cdef char* cbytes
cdef ssize_t clen
try:
self._bytes = next(self._text_iter).encode("utf8")
self._bytes = next(self._bytes_iter)
PyBytes_AsStringAndSize(self._bytes, &cbytes, &clen)
jv_parser_set_buf(self._parser, cbytes, clen, 1)
except StopIteration:
Expand Down Expand Up @@ -399,10 +399,33 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
return compile(program).input(value, text=text).text()


def parse(text=_NO_VALUE, text_iter=_NO_VALUE):
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
raise ValueError("Either the text or text_iter argument should be set")
return _Parser(text_iter if text_iter is not _NO_VALUE else _iter((text,)))
def parse(text=_NO_VALUE, text_iter=_NO_VALUE,
bytes=_NO_VALUE, bytes_iter=_NO_VALUE):
"""
Parse a text/bytes stream into JSON. Only one of "text", "text_iter",
"bytes", "bytes_iter" arguments is accepted.
Args:
text: The text to parse.
text_iter: An iterator returning pieces of the text to parse.
bytes: The bytes to parse.
bytes_iter: An iterator returning pieces of the bytes to parse.
Returns:
An iterator yielding the parsed JSON values.
"""
if (text, text_iter, bytes, bytes_iter).count(_NO_VALUE) != 3:
raise ValueError("Exactly one argument should be set")
if text is not _NO_VALUE:
bytes = text.encode("utf8")
if text_iter is not _NO_VALUE:
def encode_text_iter():
for text in text_iter:
yield text.encode("utf8")
bytes_iter = encode_text_iter()
if bytes is not _NO_VALUE:
bytes_iter = _iter((bytes,))
return _Parser(bytes_iter)


# Support the 0.1.x API for backwards compatibility
Expand Down
4 changes: 3 additions & 1 deletion tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,11 @@ def program_string_can_be_retrieved_from_program():
assert_equal(".", program.program_string)

@istest
def parse_both_text_and_text_iter_accepted():
def parse_all_inputs_accepted():
assert_equal(True, next(jq.parse(text="true")))
assert_equal(True, next(jq.parse(text_iter=iter(["true"]))))
assert_equal(True, next(jq.parse(bytes=b"true")))
assert_equal(True, next(jq.parse(bytes_iter=iter([b"true"]))))

@istest
def parse_empty_text_iter_stops():
Expand Down

0 comments on commit 6490f6f

Please sign in to comment.