Skip to content

Commit

Permalink
fix text blob urls not replaced with file contents for non-LLM models
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjoyo committed May 3, 2024
1 parent 5aa20f2 commit ea79c84
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 74 deletions.
9 changes: 9 additions & 0 deletions bpm-ai/bpm_ai/common/multimodal.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from bpm_ai_core.llm.common.blob import Blob
from bpm_ai_core.ocr.ocr import OCR
from bpm_ai_core.speech_recognition.asr import ASRModel
from bpm_ai_core.util.file import is_supported_audio_file, is_file, is_supported_text_file
Expand Down Expand Up @@ -45,6 +46,14 @@ def prepare_text_blobs(input_data: dict):
}


async def replace_text_blobs(input_data: dict):
return {
k: (await Blob.from_path_or_url(v).as_bytes()).decode("utf-8")
if (isinstance(v, str) and is_supported_text_file(v))
else v for k, v in input_data.items()
}


def assert_all_files_processed(input_data: dict):
for v in input_data.values():
if v and isinstance(v, str) and is_file(v):
Expand Down
4 changes: 2 additions & 2 deletions bpm-ai/bpm_ai/decide/decide.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from bpm_ai.common.errors import MissingParameterError
from bpm_ai.common.multimodal import transcribe_audio, prepare_images_for_llm_prompt, ocr_documents, prepare_text_blobs, \
assert_all_files_processed
assert_all_files_processed, replace_text_blobs
from bpm_ai.decide.schema import get_cot_decision_output_schema, get_decision_output_schema, remove_order_prefix_from_keys


Expand Down Expand Up @@ -118,7 +118,7 @@ async def decide_classifier(
else: # text classifier
input_data = await ocr_documents(input_data, ocr)
input_data = await transcribe_audio(input_data, asr)
input_data = prepare_text_blobs(input_data)
input_data = await replace_text_blobs(input_data)
assert_all_files_processed(input_data)

input_md = dict_to_md(input_data).strip()
Expand Down
6 changes: 3 additions & 3 deletions bpm-ai/bpm_ai/extract/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from bpm_ai.common.errors import MissingParameterError
from bpm_ai.common.multimodal import transcribe_audio, prepare_images_for_llm_prompt, ocr_documents, prepare_text_blobs, \
assert_all_files_processed
assert_all_files_processed, replace_text_blobs
from bpm_ai.extract.util import merge_dicts, strip_non_numeric_chars, create_json_object


Expand Down Expand Up @@ -94,7 +94,7 @@ async def extract_qa(
else:
input_data = await ocr_documents(input_data, ocr)
input_data = await transcribe_audio(input_data, asr)
input_data = prepare_text_blobs(input_data)
input_data = await replace_text_blobs(input_data)
assert_all_files_processed(input_data)

if not output_schema:
Expand Down Expand Up @@ -131,7 +131,7 @@ async def extract_value(text: str, field_name: str, field_type: str, description

if field_type == "integer":
try:
return int(strip_non_numeric_chars(qa_result.answer))
return int(strip_non_numeric_chars(qa_result.answer)) # todo should also accept and round floats
except ValueError:
return None
elif field_type == "number":
Expand Down
4 changes: 2 additions & 2 deletions bpm-ai/bpm_ai/translate/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from bpm_ai.common.errors import MissingParameterError, LanguageNotFoundError
from bpm_ai.common.multimodal import transcribe_audio, prepare_images_for_llm_prompt, ocr_documents, prepare_text_blobs, \
assert_all_files_processed
assert_all_files_processed, replace_text_blobs
from bpm_ai.translate.util import get_translation_output_schema, get_lang_code


Expand Down Expand Up @@ -69,7 +69,7 @@ async def translate_nmt(

input_items = await ocr_documents(input_items, ocr)
input_items = await transcribe_audio(input_items, asr)
input_data = prepare_text_blobs(input_data)
input_data = await replace_text_blobs(input_data)
assert_all_files_processed(input_data)

try:
Expand Down
110 changes: 46 additions & 64 deletions bpm-ai/poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions bpm-ai/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bpm-ai"
version = "1.7.1"
version = "1.7.2"
description = "AI task automation for BPM engines."
authors = ["Bennet Krause <[email protected]>"]
repository = "https://github.com/holunda-io/bpm-ai"
Expand All @@ -22,7 +22,7 @@ azure-ai-documentintelligence = "^1.0.0b2"
av = "^11.0.0"

[tool.poetry.group.dev.dependencies]
bpm-ai-inference = "0.3.0"
bpm-ai-inference = "0.3.3"
ctranslate2 = "4.1.0"


Expand Down
3 changes: 2 additions & 1 deletion bpm-ai/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from bpm_ai_core.llm.anthropic_chat.anthropic_chat import ChatAnthropic
from bpm_ai_core.llm.openai_chat.openai_chat import ChatOpenAI
from bpm_ai_core.util.rpc import remote_object
from bpm_ai_inference.llm.llama_cpp.llama_chat import ChatLlamaCpp
from bpm_ai_inference.util.hf import hf_home


Expand All @@ -19,7 +20,7 @@ def local_llm():

@pytest.fixture
def llm():
return None #remote_object("ChatLlamaCpp", "0.0.0.0", 6666, model="QuantFactory/Phi-3-mini-4k-instruct-GGUF")
return None #ChatLlamaCpp(model="NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF")


@pytest.fixture(autouse=True, scope="module")
Expand Down
13 changes: 13 additions & 0 deletions bpm-ai/tests/test_decide.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,16 @@ async def test_decide_classifier_none():

assert result["decision"] is None
assert result["reasoning"] == "No input values present."


async def test_decide_classifier_textfile():
classifier = TransformersClassifier()

result = await decide_classifier(
classifier=classifier,
input_data={"doc": "files/document.txt"},
possible_values=["invoice", "letter"],
output_type="string"
)

assert result["decision"] is "invoice"

0 comments on commit ea79c84

Please sign in to comment.