From 7d2a0027d19aa05629d8f8a3a6d99848ab376726 Mon Sep 17 00:00:00 2001 From: fzowl Date: Mon, 16 Dec 2024 20:06:02 +0100 Subject: [PATCH 1/3] Multimodal embeddings, correction --- .../llama_index/embeddings/voyageai/base.py | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py index 87c225e81bbb3..bdffdbae755e4 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py @@ -1,5 +1,6 @@ """Voyage embeddings file.""" import logging +import os from typing import Any, List, Optional, Union from llama_index.core.base.embeddings.base import Embedding @@ -8,7 +9,6 @@ import voyageai from llama_index.core.embeddings import MultiModalEmbedding -import base64 from io import BytesIO from pathlib import Path from llama_index.core.schema import ImageType @@ -83,36 +83,31 @@ def _validate_image_format(file_type: str) -> bool: """Validate image format.""" return file_type.lower() in SUPPORTED_IMAGE_FORMATS - def _text_to_content(self, input_str: str) -> dict: - return {"type": "text", "text": input_str} - - def _texts_to_content(self, input_strs: List[str]) -> List[dict]: + @classmethod + def _texts_to_content(cls, input_strs: List[str]) -> List[dict]: return [{"content": [{"type": "text", "text": x}]} for x in input_strs] - def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict: + def _image_to_content( + self, image_input: Union[str, Path, BytesIO] + ) -> Union[str, Image]: """Convert an image to a base64 Data URL.""" if isinstance(image_input, (str, Path)): + image = Image.open(str(image_input)) # If it's a string or Path, assume it's a file path - content = {"type": "image_url", "image_url": image_input} + image_path = str(image_input) + file_extension = os.path.splitext(image_path)[1][1:].lower() elif isinstance(image_input, BytesIO): # If it's a BytesIO, use it directly image = Image.open(image_input) file_extension = image.format.lower() image_input.seek(0) # Reset the BytesIO stream to the beginning - image_data = image_input.read() - - if self._validate_image_format(file_extension): - enc_img = base64.b64encode(image_data).decode("utf-8") - content = { - "type": "image_base64", - "image_base64": f"data:image/{file_extension};base64,{enc_img}", - } - else: - raise ValueError(f"Unsupported image format: {file_extension}") else: raise ValueError("Unsupported input type. Must be a file path or BytesIO.") - return {"content": [content]} + if self._validate_image_format(file_extension): + return image + else: + raise ValueError(f"Unsupported image format: {file_extension}") def _embed_image( self, image_path: ImageType, input_type: Optional[str] = None @@ -125,7 +120,7 @@ def _embed_image( processed_image = self._image_to_content(image_path) return self._client.multimodal_embed( model=self.model_name, - inputs=[processed_image], + inputs=[[processed_image]], input_type=input_type, truncation=self.truncation, ).embeddings[0] @@ -142,7 +137,7 @@ async def _aembed_image( return ( await self._aclient.multimodal_embed( model=self.model_name, - inputs=[processed_image], + inputs=[[processed_image]], input_type=input_type, truncation=self.truncation, ) From 34e63981bc6c91ab4d37ad8b027a8b40ac5c08b1 Mon Sep 17 00:00:00 2001 From: fzowl Date: Mon, 16 Dec 2024 20:15:42 +0100 Subject: [PATCH 2/3] Multimodal embeddings, correction --- .../embeddings/llama-index-embeddings-voyageai/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml index 4290b8063ff43..09b4b3cb06d8b 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml +++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-embeddings-voyageai" readme = "README.md" -version = "0.3.2" +version = "0.3.3" [tool.poetry.dependencies] python = ">=3.9,<4.0" From 81f3fd0aa45970fa8e5a69dfd8f8d49fad24f1b6 Mon Sep 17 00:00:00 2001 From: fzowl Date: Mon, 16 Dec 2024 22:51:08 +0100 Subject: [PATCH 3/3] Multimodal embeddings, correction --- .../llama_index/embeddings/voyageai/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py index bdffdbae755e4..860ad1994e84f 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py @@ -87,9 +87,7 @@ def _validate_image_format(file_type: str) -> bool: def _texts_to_content(cls, input_strs: List[str]) -> List[dict]: return [{"content": [{"type": "text", "text": x}]} for x in input_strs] - def _image_to_content( - self, image_input: Union[str, Path, BytesIO] - ) -> Union[str, Image]: + def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image: """Convert an image to a base64 Data URL.""" if isinstance(image_input, (str, Path)): image = Image.open(str(image_input))