diff --git a/jq.pyx b/jq.pyx index d3f9067..43f1bc4 100644 --- a/jq.pyx +++ b/jq.pyx @@ -109,6 +109,71 @@ cdef object _jv_to_python(jv value): return python_value +class JSONParseError(Exception): + """A failure to parse JSON""" + + +cdef class _JSONParser(object): + cdef jv_parser* _parser + cdef object _text_iter + cdef object _bytes + + def __dealloc__(self): + jv_parser_free(self._parser) + + def __cinit__(self, text_iter): + self._parser = jv_parser_new(0) + self._text_iter = text_iter + self._bytes = None + + def __iter__(self): + return self + + def __next__(self): + """ + Retrieve next parsed JSON value. + + Returns: + The next parsed JSON value. + + Raises: + JSONParseError: failed parsing the input JSON. + StopIteration: no more values available. + """ + cdef jv value + while True: + # If we have no bytes to parse + if self._bytes is None: + # Ready some more + self._ready_next_bytes() + # Parse whatever we've readied, if any + value = jv_parser_next(self._parser) + if jv_is_valid(value): + return _jv_to_python(value) + elif jv_invalid_has_msg(jv_copy(value)): + error_message = jv_invalid_get_msg(value) + message = jv_string_value(error_message).decode("utf8") + jv_free(error_message) + raise JSONParseError(message) + else: + jv_free(value) + # If we didn't ready any bytes + if self._bytes is None: + raise StopIteration + self._bytes = None + + cdef bint _ready_next_bytes(self) except 1: + cdef char* cbytes + try: + self._bytes = next(self._text_iter).encode("utf8") + cbytes = PyBytes_AsString(self._bytes) + jv_parser_set_buf(self._parser, cbytes, len(cbytes), 1) + except StopIteration: + self._bytes = None + jv_parser_set_buf(self._parser, "", 0, 0) + return 0 + + def compile(object program, args=None): cdef object program_bytes = program.encode("utf8") return _Program(program_bytes, args=args) @@ -356,6 +421,46 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE): return compile(program).input(value, text=text).text() +def parse_json(text=_NO_VALUE, text_iter=_NO_VALUE): + """ + Parse a JSON stream. + Either "text" or "text_iter" must be specified. + + Args: + text: A string containing the JSON stream to parse. + text_iter: An iterator returning strings - pieces of the JSON stream + to parse. + + Returns: + An iterator returning parsed values. + + Raises: + JSONParseError: failed parsing the input JSON stream. + """ + if (text is _NO_VALUE) == (text_iter is _NO_VALUE): + raise ValueError("Either the text or text_iter argument should be set") + return _JSONParser(text_iter + if text_iter is not _NO_VALUE + else _iter((text,))) + + +def parse_json_file(fp): + """ + Parse a JSON stream file. + + Args: + fp: The file-like object to read the JSON stream from. + Must be in text mode. + + Returns: + An iterator returning parsed values. + + Raises: + JSONParseError: failed parsing the JSON stream. + """ + return parse_json(text=fp.read()) + + # Support the 0.1.x API for backwards compatibility def jq(object program): return compile(program) diff --git a/tests/jq_tests.py b/tests/jq_tests.py index 5b20391..acde6fd 100644 --- a/tests/jq_tests.py +++ b/tests/jq_tests.py @@ -4,6 +4,7 @@ from nose.tools import istest, assert_equal, assert_is, assert_raises +import io import jq @@ -204,6 +205,71 @@ def program_string_can_be_retrieved_from_program(): program = jq.compile(".") assert_equal(".", program.program_string) +@istest +def parse_json_both_text_and_text_iter_accepted(): + assert_equal(True, next(jq.parse_json(text="true"))) + assert_equal(True, next(jq.parse_json(text_iter=iter(["true"])))) + +@istest +def parse_json_file_works(): + fp = io.StringIO('{"abc": "def"}') + assert_equal([dict(abc="def")], list(jq.parse_json_file(fp))) + +@istest +def parse_json_empty_text_iter_stops(): + assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([]))) + assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([""]))) + assert_raises(StopIteration, next, jq.parse_json(text_iter=iter(["", ""]))) + +@istest +def parse_json_single_complete_text_iter_works(): + assert_equal(False, next(jq.parse_json(text_iter=iter(["false"])))) + assert_equal(True, next(jq.parse_json(text_iter=iter(["true"])))) + assert_equal(42, next(jq.parse_json(text_iter=iter(["42"])))) + assert_equal(-42, next(jq.parse_json(text_iter=iter(["-42"])))) + assert_equal("42", next(jq.parse_json(text_iter=iter(['"42"'])))) + assert_equal([42], next(jq.parse_json(text_iter=iter(["[42]"])))) + assert_equal(dict(a=42), + next(jq.parse_json(text_iter=iter(['{"a": 42}'])))) + +@istest +def parse_json_multi_complete_text_iter_works(): + assert_equal(False, next(jq.parse_json(text_iter=iter(["fa", "lse"])))) + assert_equal(True, next(jq.parse_json(text_iter=iter(["tr", "ue"])))) + assert_equal(42, next(jq.parse_json(text_iter=iter(["4", "2"])))) + assert_equal(-42, next(jq.parse_json(text_iter=iter(["-4", "2"])))) + assert_equal("42", next(jq.parse_json(text_iter=iter(['"4', '2"'])))) + assert_equal([42], next(jq.parse_json(text_iter=iter(["[4", "2]"])))) + assert_equal(dict(a=42), + next(jq.parse_json(text_iter=iter(['{"a":', ' 42}'])))) + +@istest +def parse_json_single_incomplete_text_iter_breaks(): + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["fals"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["tru"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["-"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(['"42']))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["[42"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(['{"a": 42']))) + +@istest +def parse_json_multi_incomplete_text_iter_breaks(): + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["fa", "ls"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["tr", "u"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(['"4', '2']))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(["[4", "2"]))) + assert_raises(jq.JSONParseError, next, + jq.parse_json(text_iter=iter(['{"a":', ' 42']))) @istest class TestJvToPython(object):