From 7002090cff204eae3fa921bb55963a8662aec3ef Mon Sep 17 00:00:00 2001 From: Tristan Crockett Date: Thu, 4 Apr 2019 17:27:01 -0500 Subject: [PATCH] Experimental bytestream --- src/ohio/__init__.py | 4 +- src/ohio/baseio.py | 55 +++++++++++++++++++++---- src/ohio/iterio.py | 11 ++++- test/__init__.py | 5 +++ test/iterio_test.py | 97 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 159 insertions(+), 13 deletions(-) diff --git a/src/ohio/__init__.py b/src/ohio/__init__.py index 5c1423c..828de31 100644 --- a/src/ohio/__init__.py +++ b/src/ohio/__init__.py @@ -1,6 +1,6 @@ """I/O extras""" -from .baseio import (IOClosed, StreamTextIOBase) -from .iterio import IteratorTextIO +from .baseio import (IOClosed, StreamTextIOBase, StreamBufferedIOBase) +from .iterio import IteratorTextIO, IteratorBufferedIO from .csvio import (csv_text, CsvWriterTextIO, CsvDictWriterTextIO) from .pipeio import PipeTextIO, pipe_text diff --git a/src/ohio/baseio.py b/src/ohio/baseio.py index 071ca8e..ba0499e 100644 --- a/src/ohio/baseio.py +++ b/src/ohio/baseio.py @@ -12,18 +12,18 @@ def __init__(self, *args): super().__init__(*args) -class StreamTextIOBase(io.TextIOBase): +class StreamIOBase(object): """Readable file-like abstract base class. Concrete classes may implemented method `__next_chunk__` to return chunks (or all) of the text to be read. """ - def __init__(self): - self._remainder = '' - def __next_chunk__(self): - raise NotImplementedError("StreamTextIOBase subclasses must implement __next_chunk__") + raise NotImplementedError("StreamIOBase subclasses must implement __next_chunk__") + + def _get_empty_value(self): + raise NotImplementedError("StreamIOBase subclasses must implement _get_empty_value") def readable(self): if self.closed: @@ -50,7 +50,7 @@ def read(self, size=None): if size is not None and size < 0: size = None - result = '' + result = self._get_empty_value() while size is None or size > 0: content = self._read1(size) @@ -68,10 +68,11 @@ def readline(self): if self.closed: raise IOClosed() - result = '' + result = self._get_empty_value() + newline = self._get_newline() while True: - index = self._remainder.find('\n') + index = self._remainder.find(newline) if index == -1: result += self._remainder try: @@ -85,3 +86,41 @@ def readline(self): break return result + + +class StreamTextIOBase(StreamIOBase, io.TextIOBase): + """Readable file-like abstract base class for text. + + Concrete classes may implemented method `__next_chunk__` to return + chunks (or all) of the text to be read. + + """ + + def __init__(self): + self._remainder = '' + + @staticmethod + def _get_empty_value(): + return '' + + @staticmethod + def _get_newline(): + return '\n' + + +class StreamBufferedIOBase(StreamIOBase, io.BufferedIOBase): + """Readable file-like abstract base class for bytes. + + Concrete classes may implemented method `__next_chunk__` to return + chunks (or all) of the bytes to be read. + """ + def __init__(self): + self._remainder = b'' + + @staticmethod + def _get_empty_value(): + return b'' + + @staticmethod + def _get_newline(): + return b'\n' diff --git a/src/ohio/iterio.py b/src/ohio/iterio.py index 68aac84..8fa7014 100644 --- a/src/ohio/iterio.py +++ b/src/ohio/iterio.py @@ -1,8 +1,8 @@ from . import baseio -class IteratorTextIO(baseio.StreamTextIOBase): - """Readable file-like interface for iterable text streams.""" +class IteratorIO(baseio.StreamIOBase): + """Readable file-like interface for iterable streams.""" def __init__(self, iterable): super().__init__() @@ -10,3 +10,10 @@ def __init__(self, iterable): def __next_chunk__(self): return next(self.__iterator__) + + +class IteratorTextIO(IteratorIO, baseio.StreamTextIOBase): + pass + +class IteratorBufferedIO(IteratorIO, baseio.StreamBufferedIOBase): + pass diff --git a/test/__init__.py b/test/__init__.py index 18bca3f..96b7ee7 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -23,3 +23,8 @@ def ex_csv_stream(): yield '1/4/09 20:11,Product1,1200,Mastercard,Fleur\r\n' yield '1/2/09 20:09,Product1,1200,Mastercard,adam\r\n' yield '1/4/09 13:17,Product1,1200,Mastercard,Renee Elisabeth\r\n' + + +def ex_csv_bytestream(): + for csvline in ex_csv_stream(): + yield csvline.encode('utf-8') diff --git a/test/iterio_test.py b/test/iterio_test.py index 2b8c75f..1854cd9 100644 --- a/test/iterio_test.py +++ b/test/iterio_test.py @@ -5,7 +5,7 @@ import ohio -from . import ex_csv_stream +from . import ex_csv_stream, ex_csv_bytestream class TestIteratorTextIO: @@ -101,3 +101,98 @@ def test_write_methods(self, buffer, method_name, method_args): with pytest.raises(io.UnsupportedOperation): method(*method_args) + + +class TestIteratorBufferedIO: + + @pytest.fixture + def csv_stream(self): + # only necessary to *spy* on iteration (calls to __next__) + iter_mock = unittest.mock.MagicMock(**{ + 'return_value.__next__.side_effect': ex_csv_bytestream(), + }) + return unittest.mock.Mock(__iter__=iter_mock) + + @pytest.fixture + def buffer(self, csv_stream): + return ohio.IteratorBufferedIO(csv_stream) + + def test_context_manager(self, buffer): + assert not buffer.closed + + with buffer as buffer1: + assert buffer is buffer1 + assert not buffer.closed + + assert buffer.closed + + def test_readable(self, buffer): + assert buffer.readable() + + def test_readable_closed(self, buffer): + buffer.close() + + with pytest.raises(ohio.IOClosed): + buffer.readable() + + def test_read(self, buffer): + all_content = b''.join(ex_csv_bytestream()) + assert buffer.read() == all_content + assert buffer.__iterator__.__next__.call_count == 11 + + def test_read_closed(self, buffer): + buffer.close() + + with pytest.raises(ohio.IOClosed): + buffer.read() + + def test_read_parts(self, buffer): + for (iteration, size, chunk) in ( + (1, 5, b'Trans'), + (1, 15, b'action_date,Pro'), + (2, 43, b'duct,Price,Payment_Type,Name\r\n1/2/09 6:17,P'), + ): + assert buffer.read(size) == chunk + assert buffer.__iterator__.__next__.call_count == iteration + + assert buffer.read(None) + assert buffer.__iterator__.__next__.call_count == 11 + + def test_readline(self, buffer): + for (count, line) in enumerate(ex_csv_bytestream(), 1): + assert buffer.readline() == line + assert buffer.__iterator__.__next__.call_count == count + + def test_readline_closed(self, buffer): + buffer.close() + + with pytest.raises(ohio.IOClosed): + buffer.readline() + + def test_readlines(self, buffer): + assert buffer.readlines() == list(ex_csv_bytestream()) + assert buffer.__iterator__.__next__.call_count == 11 + + def test_iter(self, buffer): + for (count, (buffer_line, example_line)) in enumerate(zip(buffer, ex_csv_bytestream()), 1): + assert buffer_line == example_line + assert buffer.__iterator__.__next__.call_count == count + + def test_not_seekable(self, buffer): + assert not buffer.seekable() + + def test_not_writable(self, buffer): + assert not buffer.writable() + + @pytest.mark.parametrize('method_name,method_args', ( + ('seek', ()), + ('tell', ()), + ('truncate', ()), + ('write', ()), + ('writelines', (['hi\n'],)), + )) + def test_write_methods(self, buffer, method_name, method_args): + method = getattr(buffer, method_name) + + with pytest.raises(io.UnsupportedOperation): + method(*method_args)