diff --git a/README.md b/README.md index 8d0cb06..1509ed1 100644 --- a/README.md +++ b/README.md @@ -56,14 +56,22 @@ Here, our `fetch_records` is just spitting the data straight to the headers for Note that we specify the type of the header for `headers`. This allows the typechecker to find out quickly if any of your headers are accessing data incorrectly. -Now, we can use it. We have two methods for getting data out - `to_list` and `as_response`. `to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want) whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views +Now, we can use it. We have several methods for getting data out: + +`to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want): ```python LlamaExporter(my_llamas).to_list() +``` + +whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views: +```python LlamaExporter.as_response('my_llamas') ``` +If your CSV is large, and takes a long time to generate, you should use a generator, or stream the response. `to_iter` and `to_streamed_response` are the generator_counterparts to the above methods, working in exactly the same way, just returning a generator and a `HttpStreamedResponse` respectively. By default, `to_list` calls `to_iter`, so if you need to do anything custom, it's best to do it in `to_iter`. + You can also provide an ordering to the headers, if you want. Simply assign a list of strings to `header_order` and when the data is unpacked, those headers who's labels match these will be placed in that order. ```python diff --git a/csv_wrangler/exporter.py b/csv_wrangler/exporter.py index 8a83150..4cc8888 100644 --- a/csv_wrangler/exporter.py +++ b/csv_wrangler/exporter.py @@ -1,9 +1,9 @@ from abc import ABCMeta, abstractmethod import csv -from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic +from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic, Generator from typing import Optional # noqa from functools import reduce -from django.http import HttpResponse +from django.http import HttpResponse, StreamingHttpResponse T = TypeVar('T') @@ -18,6 +18,17 @@ def __init__(self, label: str, callback: Callable[[T], str]) -> None: self.callback = callback +class Echo: + """ + An object that implements just the write method of the file-like + interface. + https://docs.djangoproject.com/en/1.10/howto/outputting-csv/ + """ + def write(self, value): + """Write the value by returning it, instead of storing in a buffer.""" + return value + + class BaseExporter(metaclass=ABCMeta): """ The root exporter class @@ -26,9 +37,12 @@ class BaseExporter(metaclass=ABCMeta): header_order = None # type: Optional[List[str]] @abstractmethod - def to_list(self) -> List[List[str]]: # pragma: no cover + def to_iter(self) -> Generator[List[str], None, None]: # pragma: no cover pass + def to_list(self) -> List[List[str]]: + return list(self.to_iter()) + def as_response(self, filename: str='export') -> HttpResponse: response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename) @@ -36,6 +50,15 @@ def as_response(self, filename: str='export') -> HttpResponse: [writer.writerow(row) for row in self.to_list()] return response + def as_streamed_response(self, filename: str='export') -> StreamingHttpResponse: + writer = csv.writer(Echo()) + response = StreamingHttpResponse( + (writer.writerow(row) for row in self.to_iter()), + content_type='text/csv' + ) + response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename) + return response + class Exporter(Generic[T], BaseExporter, metaclass=ABCMeta): @@ -63,14 +86,12 @@ def sort_headers(self, headers: List[Header[T]]) -> List[Header[T]]: self.header_order.index(header.label) if header.label in self.header_order else len(self.header_order) ) - def to_list(self) -> List[List[str]]: + def to_iter(self) -> Generator[List[str], None, None]: records = self.fetch_records() headers = self.get_sorted_headers() - lines = [ - [header.callback(record) for header in headers] - for record in records - ] - return [self.get_header_labels()] + lines + yield self.get_header_labels() + for record in records: + yield [header.callback(record) for header in headers] class MultiExporter(BaseExporter): @@ -84,6 +105,12 @@ def to_list(self) -> List[List[str]]: exportings = [exporter.to_list() for exporter in self.exporters] return reduce(lambda memo, exporting: exporting if memo == [] else memo + [[]] + exporting, exportings, []) + def to_iter(self) -> Generator[List[str], None, None]: + for exporter in self.exporters: + yield from exporter.to_iter() + if exporter != self.exporters[-1]: + yield [] + SimpleHeader = NamedTuple('Header', [('label', str), ('callback', Callable[[Any, str], str])]) @@ -109,14 +136,12 @@ def get_csv_headers(self) -> List[SimpleHeader]: def get_csv_header_labels(self) -> List[str]: return [header.label for header in self.get_csv_headers()] - def to_list(self) -> List[List[str]]: + def to_iter(self) -> Generator[List[str], None, None]: records = self.fetch_records() headers = self.get_csv_headers() - lines = [ - [header.callback(record, header.label) for header in headers] - for record in records - ] - return [self.get_csv_header_labels()] + lines + yield self.get_csv_header_labels() + for record in records: + yield [header.callback(record, header.label) for header in headers] class PassthroughExporter(BaseExporter): @@ -126,5 +151,5 @@ class PassthroughExporter(BaseExporter): def __init__(self, data: List[List[str]]) -> None: self.data = data - def to_list(self) -> List[List[str]]: - return self.data + def to_iter(self) -> Generator[List[str], None, None]: + yield from self.data diff --git a/csv_wrangler/test_exporter.py b/csv_wrangler/test_exporter.py index a2220f2..c15756f 100644 --- a/csv_wrangler/test_exporter.py +++ b/csv_wrangler/test_exporter.py @@ -2,6 +2,7 @@ from typing import NamedTuple from typing import List, Any from csv_wrangler.exporter import Exporter, Header, MultiExporter, SimpleExporter, PassthroughExporter +from django.http import StreamingHttpResponse, HttpResponse DummyData = NamedTuple('DummyData', [('a', str), ('b', int), ('c', float)]) @@ -48,6 +49,16 @@ def test_to_list(self) -> None: self.assertEqual(results[2], ['b', '2', '2.0']) self.assertEqual(results[3], ['c', '3', '3.0']) + def test_to_iter(self) -> None: + results = self.exporter.to_iter() + self.assertEqual(next(results), ['a', 'b', 'c']) + self.assertEqual(next(results), ['a', '1', '1.0']) + self.assertEqual(next(results), ['b', '2', '2.0']) + self.assertEqual(next(results), ['c', '3', '3.0']) + + def test_to_iter_is_same_as_list(self) -> None: + self.assertListEqual(list(self.exporter.to_iter()), self.exporter.to_list()) + def test_ordering(self) -> None: self.exporter.header_order = ['c', 'b', 'a'] results = self.exporter.to_list() @@ -67,6 +78,7 @@ def test_partial_ordering(self) -> None: def test_as_response(self) -> None: filename = 'hello' results = self.exporter.as_response(filename) + self.assertIsInstance(results, HttpResponse) self.assertEqual(results['content-type'], 'text/csv') self.assertEqual(results['Content-Disposition'], 'attachment; filename="{}.csv"'.format(filename)) self.assertEqual(str(results.content, 'utf-8'), '\r\n'.join([ @@ -75,6 +87,18 @@ def test_as_response(self) -> None: in self.exporter.to_list() ]) + '\r\n') + def test_as_streamed_response(self) -> None: + filename = 'hello' + results = self.exporter.as_streamed_response(filename) + self.assertIsInstance(results, StreamingHttpResponse) + self.assertEqual(results['content-type'], 'text/csv') + self.assertEqual(results['Content-Disposition'], 'attachment; filename="hello.csv"') + self.assertEqual(results.getvalue().decode(), '\r\n'.join([ + ','.join(row) + for row + in self.exporter.to_list() + ]) + '\r\n') + class MultiExporterTestCase(TestCase): @@ -97,6 +121,21 @@ def test_multiple_exporters(self) -> None: self.assertEqual(results[6], ['llama']) self.assertEqual(results[7], ['drama']) + def test_multiple_exporters_to_iter(self) -> None: + multi_exporter = MultiExporter([ + self.exporter, + self.exporter_2 + ]) + results = multi_exporter.to_iter() + self.assertEqual(next(results), ['a', 'b', 'c']) + self.assertEqual(next(results), ['a', '1', '1.0']) + self.assertEqual(next(results), ['b', '2', '2.0']) + self.assertEqual(next(results), ['c', '3', '3.0']) + self.assertEqual(next(results), []) + self.assertEqual(next(results), ['dummy']) + self.assertEqual(next(results), ['llama']) + self.assertEqual(next(results), ['drama']) + class SimpleExporterTestCase(TestCase): @@ -110,6 +149,16 @@ def test_simple_exporter(self) -> None: self.assertEqual(results[0], ['a', 'b', 'c']) self.assertEqual(results[1], ['5', '', '15']) + def test_simple_exporter_to_iter(self) -> None: + exporter = SimpleExporter(['a', 'b', 'c'], [{ + 'a': 5, + 'b': None, + 'c': 15 + }]) + results = exporter.to_iter() + self.assertEqual(next(results), ['a', 'b', 'c']) + self.assertEqual(next(results), ['5', '', '15']) + class PassthroughExporterTestCase(TestCase): @@ -124,6 +173,17 @@ def test_passthrough_to_list(self) -> None: self.assertEqual(results[1], ['1', '2', '3']) self.assertEqual(results[2], ['2', '3', '4']) + def test_passthrough_to_iter(self) -> None: + exporter = PassthroughExporter([ + ['a', 'b', 'c'], + ['1', '2', '3'], + ['2', '3', '4'], + ]) + results = exporter.to_iter() + self.assertEqual(next(results), ['a', 'b', 'c']) + self.assertEqual(next(results), ['1', '2', '3']) + self.assertEqual(next(results), ['2', '3', '4']) + def test_malformed_passthrough(self) -> None: exporter = PassthroughExporter([ ['a', 'b', 'c'],