From 831c0de97bd86cb3b65a9df1ae8399a0b631ff9c Mon Sep 17 00:00:00 2001 From: Benjoyo Date: Sun, 5 May 2024 12:51:55 +0200 Subject: [PATCH] fix translate images/pdfs --- bpm-ai/bpm_ai/translate/translate.py | 4 ++-- bpm-ai/pyproject.toml | 2 +- bpm-ai/tests/test_translate.py | 24 ++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/bpm-ai/bpm_ai/translate/translate.py b/bpm-ai/bpm_ai/translate/translate.py index 7cf01ce..bf11b4f 100644 --- a/bpm-ai/bpm_ai/translate/translate.py +++ b/bpm-ai/bpm_ai/translate/translate.py @@ -31,8 +31,8 @@ async def translate_llm( else: input_items = await ocr_documents(input_items, ocr) input_items = await transcribe_audio(input_items, asr) - input_data = prepare_text_blobs(input_data) - assert_all_files_processed(input_data) + input_items = prepare_text_blobs(input_items) + assert_all_files_processed(input_items) prompt = Prompt.from_file( "translate", diff --git a/bpm-ai/pyproject.toml b/bpm-ai/pyproject.toml index ca2d0d6..f79f38e 100644 --- a/bpm-ai/pyproject.toml +++ b/bpm-ai/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bpm-ai" -version = "1.7.2" +version = "1.7.3" description = "AI task automation for BPM engines." authors = ["Bennet Krause "] repository = "https://github.com/holunda-io/bpm-ai" diff --git a/bpm-ai/tests/test_translate.py b/bpm-ai/tests/test_translate.py index 3f946f4..4f659e9 100644 --- a/bpm-ai/tests/test_translate.py +++ b/bpm-ai/tests/test_translate.py @@ -80,6 +80,30 @@ async def test_translate_none(llm): assert result["subject"] is None +async def test_translate_image(llm): + input_data = { + "doc": "files/invoice.png", + } + llm = llm or FakeLLM( + name="openai", + supports_images=True, + responses=[ + AssistantMessage( + content={"doc": "Rechnung\n\nVon:\nDEMO - Sliced Invoices\nSuite 5A-1204\n123 Somewhere Street\nYour City AZ 12345\nadmin@slicedinvoices.com\n\nRechnungsnummer: INV-3337"} + ) + ] + ) + result = await translate_llm( + llm=llm, + input_data=input_data, + target_language="German", + ) + #if isinstance(llm, FakeLLM): + # llm.assert_last_request_contains("admin@slicedinvoices.com") + + assert "Rechnung" in result["doc"] + + async def test_translate_empty(llm): input_data = {} llm = llm or FakeLLM(name="openai")