Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: Accept bytes as input #51

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 115 additions & 2 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import json
import threading

from cpython.bytes cimport PyBytes_AsString
from cpython.bytes cimport PyBytes_AsStringAndSize


cdef extern from "jv.h":
Expand Down Expand Up @@ -109,6 +110,76 @@ cdef object _jv_to_python(jv value):
return python_value


class JSONParseError(Exception):
"""A failure to parse JSON"""


cdef class _JSONParser(object):
cdef jv_parser* _parser
cdef object _text_iter
cdef object _bytes

def __dealloc__(self):
jv_parser_free(self._parser)

def __cinit__(self, text_iter):
self._parser = jv_parser_new(0)
self._text_iter = text_iter
self._bytes = None

def __iter__(self):
return self

def __next__(self):
"""
Retrieve next parsed JSON value.

Returns:
The next parsed JSON value.

Raises:
JSONParseError: failed parsing the input JSON.
StopIteration: no more values available.
"""
cdef jv value
while True:
# If we have no bytes to parse
if self._bytes is None:
# Ready some more
self._ready_next_bytes()
# Parse whatever we've readied, if any
value = jv_parser_next(self._parser)
if jv_is_valid(value):
return _jv_to_python(value)
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_value(error_message).decode("utf8")
jv_free(error_message)
raise JSONParseError(message)
else:
jv_free(value)
# If we didn't ready any bytes
if self._bytes is None:
raise StopIteration
self._bytes = None

cdef bint _ready_next_bytes(self) except 1:
cdef char* cbytes
cdef ssize_t clen
try:
text = next(self._text_iter)
if isinstance(text, bytes):
self._bytes = text
else:
self._bytes = text.encode("utf8")
PyBytes_AsStringAndSize(self._bytes, &cbytes, &clen)
jv_parser_set_buf(self._parser, cbytes, clen, 1)
except StopIteration:
self._bytes = None
jv_parser_set_buf(self._parser, "", 0, 0)
return 0


def compile(object program, args=None):
cdef object program_bytes = program.encode("utf8")
return _Program(program_bytes, args=args)
Expand Down Expand Up @@ -295,8 +366,10 @@ cdef class _ResultIterator(object):
self._bytes_input = bytes_input
self._ready = False
cdef jv_parser* parser = jv_parser_new(0)
cdef char* cbytes_input = PyBytes_AsString(bytes_input)
jv_parser_set_buf(parser, cbytes_input, len(cbytes_input), 0)
cdef char* cbytes_input
cdef ssize_t clen_input
PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
jv_parser_set_buf(parser, cbytes_input, clen_input, 0)
self._parser = parser

def __iter__(self):
Expand Down Expand Up @@ -356,6 +429,46 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
return compile(program).input(value, text=text).text()


def parse_json(text=_NO_VALUE, text_iter=_NO_VALUE):
"""
Parse a JSON stream.
Either "text" or "text_iter" must be specified.

Args:
text: A string or bytes object containing the JSON stream to
parse.
text_iter: An iterator returning strings or bytes - pieces of the
JSON stream to parse.

Returns:
An iterator returning parsed values.

Raises:
JSONParseError: failed parsing the input JSON stream.
"""
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
raise ValueError("Either the text or text_iter argument should be set")
return _JSONParser(text_iter
if text_iter is not _NO_VALUE
else _iter((text,)))


def parse_json_file(fp):
"""
Parse a JSON stream file.

Args:
fp: The file-like object to read the JSON stream from.

Returns:
An iterator returning parsed values.

Raises:
JSONParseError: failed parsing the JSON stream.
"""
return parse_json(text=fp.read())


# Support the 0.1.x API for backwards compatibility
def jq(object program):
return compile(program)
68 changes: 68 additions & 0 deletions tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from nose.tools import istest, assert_equal, assert_is, assert_raises

import io
import jq


Expand Down Expand Up @@ -204,6 +205,73 @@ def program_string_can_be_retrieved_from_program():
program = jq.compile(".")
assert_equal(".", program.program_string)

@istest
def parse_json_all_inputs_accepted():
assert_equal(True, next(jq.parse_json(text="true")))
assert_equal(True, next(jq.parse_json(text_iter=iter(["true"]))))
assert_equal(True, next(jq.parse_json(text=b"true")))
assert_equal(True, next(jq.parse_json(text_iter=iter([b"true"]))))

@istest
def parse_json_file_works():
fp = io.StringIO('{"abc": "def"}')
assert_equal([dict(abc="def")], list(jq.parse_json_file(fp)))

@istest
def parse_json_empty_text_iter_stops():
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([])))
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([""])))
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter(["", ""])))

@istest
def parse_json_single_complete_text_iter_works():
assert_equal(False, next(jq.parse_json(text_iter=iter(["false"]))))
assert_equal(True, next(jq.parse_json(text_iter=iter(["true"]))))
assert_equal(42, next(jq.parse_json(text_iter=iter(["42"]))))
assert_equal(-42, next(jq.parse_json(text_iter=iter(["-42"]))))
assert_equal("42", next(jq.parse_json(text_iter=iter(['"42"']))))
assert_equal([42], next(jq.parse_json(text_iter=iter(["[42]"]))))
assert_equal(dict(a=42),
next(jq.parse_json(text_iter=iter(['{"a": 42}']))))

@istest
def parse_json_multi_complete_text_iter_works():
assert_equal(False, next(jq.parse_json(text_iter=iter(["fa", "lse"]))))
assert_equal(True, next(jq.parse_json(text_iter=iter(["tr", "ue"]))))
assert_equal(42, next(jq.parse_json(text_iter=iter(["4", "2"]))))
assert_equal(-42, next(jq.parse_json(text_iter=iter(["-4", "2"]))))
assert_equal("42", next(jq.parse_json(text_iter=iter(['"4', '2"']))))
assert_equal([42], next(jq.parse_json(text_iter=iter(["[4", "2]"]))))
assert_equal(dict(a=42),
next(jq.parse_json(text_iter=iter(['{"a":', ' 42}']))))

@istest
def parse_json_single_incomplete_text_iter_breaks():
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["fals"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["tru"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["-"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['"42'])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["[42"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['{"a": 42'])))

@istest
def parse_json_multi_incomplete_text_iter_breaks():
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["fa", "ls"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["tr", "u"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['"4', '2'])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["[4", "2"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['{"a":', ' 42'])))

@istest
class TestJvToPython(object):
Expand Down