From 7ef09cedb170a897e4e4277403025311a325dd0d Mon Sep 17 00:00:00 2001 From: Gary Benson Date: Fri, 31 May 2024 20:33:18 +0100 Subject: [PATCH] JSONL writer --- src/dom_tokenizers/internal/jsonl.py | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/dom_tokenizers/internal/jsonl.py diff --git a/src/dom_tokenizers/internal/jsonl.py b/src/dom_tokenizers/internal/jsonl.py new file mode 100644 index 0000000..e53908f --- /dev/null +++ b/src/dom_tokenizers/internal/jsonl.py @@ -0,0 +1,32 @@ +from datetime import datetime +from functools import cached_property +from typing import Optional + +from . import json + + +class Writer: + def __init__( + self, + filename: Optional[str] = None, + *, + mode: str = "a", + basename: Optional[str] = None, + ext: str = ".jsonl", + with_timestamp: bool = False, + ): + if filename is None: + filename = basename + if with_timestamp: + filename = f"{filename}-{datetime.now():%Y%m%d%H%M%S%f}" + filename = f"{filename}{ext}" + self.filename = filename + self._mode = mode + + @cached_property + def _fp(self): + return open(self.filename, self._mode) + + def write(self, **fields): + json.dump(fields, self._fp) + self._fp.write("\n")