diff --git a/README.md b/README.md index d2314c3..0e49acb 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ from markitdown import MarkItDown md = MarkItDown() result = md.convert("test.xlsx") print(result.text_content) +result.save("test.md") ``` To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`: diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index b6cf963..7f662e4 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -57,6 +57,12 @@ def main(): "--output", help="Output file name. If not provided, output is written to stdout.", ) + parser.add_argument( + "-e", + "--encoding", + help="Encoding of the output file. Defaults to utf-8.", + default="utf-8", + ) args = parser.parse_args() if args.filename is None: @@ -72,8 +78,7 @@ def main(): def _handle_output(args, result: DocumentConverterResult): """Handle output to stdout or file""" if args.output: - with open(args.output, "w", encoding="utf-8") as f: - f.write(result.text_content) + result.save(args.output, encoding=args.encoding) else: print(result.text_content) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 789c1e5..2873817 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -147,6 +147,17 @@ def __init__(self, title: Union[str, None] = None, text_content: str = ""): self.title: Union[str, None] = title self.text_content: str = text_content + def save(self, file_path: str, encoding: str = "utf-8") -> None: + """ + Save the converted document result `text_content` to a file. + + params: + file_path: The path to save the document result to. + encoding: The encoding to use when writing the document. + """ + with open(file_path, "w", encoding=encoding) as f: + f.write(self.text_content) + class DocumentConverter: """Abstract superclass of all DocumentConverters."""