Skip to content

Commit

Permalink
Merge pull request #6 from dabapps/streaming
Browse files Browse the repository at this point in the history
Streaming
  • Loading branch information
RealOrangeOne authored May 24, 2018
2 parents 42162c7 + 309d58a commit 59cd017
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 18 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,22 @@ Here, our `fetch_records` is just spitting the data straight to the headers for

Note that we specify the type of the header for `headers`. This allows the typechecker to find out quickly if any of your headers are accessing data incorrectly.

Now, we can use it. We have two methods for getting data out - `to_list` and `as_response`. `to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want) whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views
Now, we can use it. We have several methods for getting data out:

`to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want):

```python
LlamaExporter(my_llamas).to_list()
```

whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views:

```python
LlamaExporter.as_response('my_llamas')
```

If your CSV is large, and takes a long time to generate, you should use a generator, or stream the response. `to_iter` and `to_streamed_response` are the generator_counterparts to the above methods, working in exactly the same way, just returning a generator and a `HttpStreamedResponse` respectively. By default, `to_list` calls `to_iter`, so if you need to do anything custom, it's best to do it in `to_iter`.

You can also provide an ordering to the headers, if you want. Simply assign a list of strings to `header_order` and when the data is unpacked, those headers who's labels match these will be placed in that order.

```python
Expand Down
59 changes: 42 additions & 17 deletions csv_wrangler/exporter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABCMeta, abstractmethod
import csv
from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic
from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic, Generator
from typing import Optional # noqa
from functools import reduce
from django.http import HttpResponse
from django.http import HttpResponse, StreamingHttpResponse


T = TypeVar('T')
Expand All @@ -18,6 +18,17 @@ def __init__(self, label: str, callback: Callable[[T], str]) -> None:
self.callback = callback


class Echo:
"""
An object that implements just the write method of the file-like
interface.
https://docs.djangoproject.com/en/1.10/howto/outputting-csv/
"""
def write(self, value):
"""Write the value by returning it, instead of storing in a buffer."""
return value


class BaseExporter(metaclass=ABCMeta):
"""
The root exporter class
Expand All @@ -26,16 +37,28 @@ class BaseExporter(metaclass=ABCMeta):
header_order = None # type: Optional[List[str]]

@abstractmethod
def to_list(self) -> List[List[str]]: # pragma: no cover
def to_iter(self) -> Generator[List[str], None, None]: # pragma: no cover
pass

def to_list(self) -> List[List[str]]:
return list(self.to_iter())

def as_response(self, filename: str='export') -> HttpResponse:
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
writer = csv.writer(response)
[writer.writerow(row) for row in self.to_list()]
return response

def as_streamed_response(self, filename: str='export') -> StreamingHttpResponse:
writer = csv.writer(Echo())
response = StreamingHttpResponse(
(writer.writerow(row) for row in self.to_iter()),
content_type='text/csv'
)
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
return response


class Exporter(Generic[T], BaseExporter, metaclass=ABCMeta):

Expand Down Expand Up @@ -63,14 +86,12 @@ def sort_headers(self, headers: List[Header[T]]) -> List[Header[T]]:
self.header_order.index(header.label) if header.label in self.header_order else len(self.header_order)
)

def to_list(self) -> List[List[str]]:
def to_iter(self) -> Generator[List[str], None, None]:
records = self.fetch_records()
headers = self.get_sorted_headers()
lines = [
[header.callback(record) for header in headers]
for record in records
]
return [self.get_header_labels()] + lines
yield self.get_header_labels()
for record in records:
yield [header.callback(record) for header in headers]


class MultiExporter(BaseExporter):
Expand All @@ -84,6 +105,12 @@ def to_list(self) -> List[List[str]]:
exportings = [exporter.to_list() for exporter in self.exporters]
return reduce(lambda memo, exporting: exporting if memo == [] else memo + [[]] + exporting, exportings, [])

def to_iter(self) -> Generator[List[str], None, None]:
for exporter in self.exporters:
yield from exporter.to_iter()
if exporter != self.exporters[-1]:
yield []


SimpleHeader = NamedTuple('Header', [('label', str), ('callback', Callable[[Any, str], str])])

Expand All @@ -109,14 +136,12 @@ def get_csv_headers(self) -> List[SimpleHeader]:
def get_csv_header_labels(self) -> List[str]:
return [header.label for header in self.get_csv_headers()]

def to_list(self) -> List[List[str]]:
def to_iter(self) -> Generator[List[str], None, None]:
records = self.fetch_records()
headers = self.get_csv_headers()
lines = [
[header.callback(record, header.label) for header in headers]
for record in records
]
return [self.get_csv_header_labels()] + lines
yield self.get_csv_header_labels()
for record in records:
yield [header.callback(record, header.label) for header in headers]


class PassthroughExporter(BaseExporter):
Expand All @@ -126,5 +151,5 @@ class PassthroughExporter(BaseExporter):
def __init__(self, data: List[List[str]]) -> None:
self.data = data

def to_list(self) -> List[List[str]]:
return self.data
def to_iter(self) -> Generator[List[str], None, None]:
yield from self.data
60 changes: 60 additions & 0 deletions csv_wrangler/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import NamedTuple
from typing import List, Any
from csv_wrangler.exporter import Exporter, Header, MultiExporter, SimpleExporter, PassthroughExporter
from django.http import StreamingHttpResponse, HttpResponse


DummyData = NamedTuple('DummyData', [('a', str), ('b', int), ('c', float)])
Expand Down Expand Up @@ -48,6 +49,16 @@ def test_to_list(self) -> None:
self.assertEqual(results[2], ['b', '2', '2.0'])
self.assertEqual(results[3], ['c', '3', '3.0'])

def test_to_iter(self) -> None:
results = self.exporter.to_iter()
self.assertEqual(next(results), ['a', 'b', 'c'])
self.assertEqual(next(results), ['a', '1', '1.0'])
self.assertEqual(next(results), ['b', '2', '2.0'])
self.assertEqual(next(results), ['c', '3', '3.0'])

def test_to_iter_is_same_as_list(self) -> None:
self.assertListEqual(list(self.exporter.to_iter()), self.exporter.to_list())

def test_ordering(self) -> None:
self.exporter.header_order = ['c', 'b', 'a']
results = self.exporter.to_list()
Expand All @@ -67,6 +78,7 @@ def test_partial_ordering(self) -> None:
def test_as_response(self) -> None:
filename = 'hello'
results = self.exporter.as_response(filename)
self.assertIsInstance(results, HttpResponse)
self.assertEqual(results['content-type'], 'text/csv')
self.assertEqual(results['Content-Disposition'], 'attachment; filename="{}.csv"'.format(filename))
self.assertEqual(str(results.content, 'utf-8'), '\r\n'.join([
Expand All @@ -75,6 +87,18 @@ def test_as_response(self) -> None:
in self.exporter.to_list()
]) + '\r\n')

def test_as_streamed_response(self) -> None:
filename = 'hello'
results = self.exporter.as_streamed_response(filename)
self.assertIsInstance(results, StreamingHttpResponse)
self.assertEqual(results['content-type'], 'text/csv')
self.assertEqual(results['Content-Disposition'], 'attachment; filename="hello.csv"')
self.assertEqual(results.getvalue().decode(), '\r\n'.join([
','.join(row)
for row
in self.exporter.to_list()
]) + '\r\n')


class MultiExporterTestCase(TestCase):

Expand All @@ -97,6 +121,21 @@ def test_multiple_exporters(self) -> None:
self.assertEqual(results[6], ['llama'])
self.assertEqual(results[7], ['drama'])

def test_multiple_exporters_to_iter(self) -> None:
multi_exporter = MultiExporter([
self.exporter,
self.exporter_2
])
results = multi_exporter.to_iter()
self.assertEqual(next(results), ['a', 'b', 'c'])
self.assertEqual(next(results), ['a', '1', '1.0'])
self.assertEqual(next(results), ['b', '2', '2.0'])
self.assertEqual(next(results), ['c', '3', '3.0'])
self.assertEqual(next(results), [])
self.assertEqual(next(results), ['dummy'])
self.assertEqual(next(results), ['llama'])
self.assertEqual(next(results), ['drama'])


class SimpleExporterTestCase(TestCase):

Expand All @@ -110,6 +149,16 @@ def test_simple_exporter(self) -> None:
self.assertEqual(results[0], ['a', 'b', 'c'])
self.assertEqual(results[1], ['5', '', '15'])

def test_simple_exporter_to_iter(self) -> None:
exporter = SimpleExporter(['a', 'b', 'c'], [{
'a': 5,
'b': None,
'c': 15
}])
results = exporter.to_iter()
self.assertEqual(next(results), ['a', 'b', 'c'])
self.assertEqual(next(results), ['5', '', '15'])


class PassthroughExporterTestCase(TestCase):

Expand All @@ -124,6 +173,17 @@ def test_passthrough_to_list(self) -> None:
self.assertEqual(results[1], ['1', '2', '3'])
self.assertEqual(results[2], ['2', '3', '4'])

def test_passthrough_to_iter(self) -> None:
exporter = PassthroughExporter([
['a', 'b', 'c'],
['1', '2', '3'],
['2', '3', '4'],
])
results = exporter.to_iter()
self.assertEqual(next(results), ['a', 'b', 'c'])
self.assertEqual(next(results), ['1', '2', '3'])
self.assertEqual(next(results), ['2', '3', '4'])

def test_malformed_passthrough(self) -> None:
exporter = PassthroughExporter([
['a', 'b', 'c'],
Expand Down

0 comments on commit 59cd017

Please sign in to comment.