Skip to content

Commit

Permalink
Add parse()
Browse files Browse the repository at this point in the history
Add an implementation of parse() function accepting either text or a
text iterator and producing an iterable returning parsed values.

This allows parsing JSON and JSON streams without passing them through a
program.
  • Loading branch information
spbnick committed Sep 8, 2020
1 parent 786a8c8 commit dd73a5a
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 0 deletions.
61 changes: 61 additions & 0 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,61 @@ cdef object _jv_unpack(jv v):
raise ValueError("Invalid value kind: " + str(kind))


cdef class _Parser(object):
cdef jv_parser* _parser
cdef object _text_iter
cdef object _bytes

def __dealloc__(self):
jv_parser_free(self._parser)

def __cinit__(self, text_iter):
self._parser = jv_parser_new(0)
self._text_iter = text_iter
self._bytes = None

def __iter__(self):
return self

def __next__(self):
cdef jv value
while True:
# If we have no bytes to parse
if self._bytes is None:
# Ready some more
self._ready_next_bytes()
# Parse whatever we've readied, if any
value = jv_parser_next(self._parser)
if jv_is_valid(value):
try:
unpacked = _jv_unpack(value)
finally:
jv_free(value)
return unpacked
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_value(error_message).decode("utf8")
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
jv_free(value)
# If we didn't ready any bytes
if self._bytes is None:
raise StopIteration
self._bytes = None

cdef bint _ready_next_bytes(self) except 1:
cdef char* cbytes
try:
self._bytes = next(self._text_iter).encode("utf8")
cbytes = PyBytes_AsString(self._bytes)
jv_parser_set_buf(self._parser, cbytes, len(cbytes), 1)
except StopIteration:
self._bytes = None
jv_parser_set_buf(self._parser, "", 0, 0)
return 0


def compile(object program):
cdef object program_bytes = program.encode("utf8")
return _Program(program_bytes)
Expand Down Expand Up @@ -348,6 +403,12 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
return compile(program).input(value, text=text).text()


def parse(text=_NO_VALUE, text_iter=_NO_VALUE):
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
raise ValueError("Either the text or text_iter argument should be set")
return _Parser(text_iter if text_iter is not _NO_VALUE else _iter((text,)))


# Support the 0.1.x API for backwards compatibility
def jq(object program):
return compile(program)
47 changes: 47 additions & 0 deletions tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,53 @@ def program_string_can_be_retrieved_from_program():
program = jq.compile(".")
assert_equal(".", program.program_string)

@istest
def parse_both_text_and_text_iter_accepted():
assert_equal(True, next(jq.parse(text="true")))
assert_equal(True, next(jq.parse(text_iter=iter(["true"]))))

@istest
def parse_empty_text_iter_stops():
assert_raises(StopIteration, next, jq.parse(text_iter=iter([])))
assert_raises(StopIteration, next, jq.parse(text_iter=iter([""])))
assert_raises(StopIteration, next, jq.parse(text_iter=iter(["", ""])))

@istest
def parse_single_complete_text_iter_works():
assert_equal(False, next(jq.parse(text_iter=iter(["false"]))))
assert_equal(True, next(jq.parse(text_iter=iter(["true"]))))
assert_equal(42, next(jq.parse(text_iter=iter(["42"]))))
assert_equal(-42, next(jq.parse(text_iter=iter(["-42"]))))
assert_equal("42", next(jq.parse(text_iter=iter(['"42"']))))
assert_equal([42], next(jq.parse(text_iter=iter(["[42]"]))))
assert_equal(dict(a=42), next(jq.parse(text_iter=iter(['{"a": 42}']))))

@istest
def parse_multi_complete_text_iter_works():
assert_equal(False, next(jq.parse(text_iter=iter(["fa", "lse"]))))
assert_equal(True, next(jq.parse(text_iter=iter(["tr", "ue"]))))
assert_equal(42, next(jq.parse(text_iter=iter(["4", "2"]))))
assert_equal(-42, next(jq.parse(text_iter=iter(["-4", "2"]))))
assert_equal("42", next(jq.parse(text_iter=iter(['"4', '2"']))))
assert_equal([42], next(jq.parse(text_iter=iter(["[4", "2]"]))))
assert_equal(dict(a=42), next(jq.parse(text_iter=iter(['{"a":', ' 42}']))))

@istest
def parse_single_incomplete_text_iter_breaks():
assert_raises(ValueError, next, jq.parse(text_iter=iter(["fals"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(["tru"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(["-"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(['"42'])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(["[42"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(['{"a": 42'])))

@istest
def parse_multi_incomplete_text_iter_breaks():
assert_raises(ValueError, next, jq.parse(text_iter=iter(["fa", "ls"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(["tr", "u"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(['"4', '2'])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(["[4", "2"])))
assert_raises(ValueError, next, jq.parse(text_iter=iter(['{"a":', ' 42'])))

@istest
class ConvenienceFunctions(object):
Expand Down

0 comments on commit dd73a5a

Please sign in to comment.