VoyageAI multimodal embedding, correction (#17284)

run-llama · Dec 16, 2024 · 5e9808b · 5e9808b
1 parent a853a83
commit 5e9808b
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 21 deletions.
diff --git a/...ations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/...ations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
@@ -1,5 +1,6 @@
 """Voyage embeddings file."""
 import logging
+import os
 from typing import Any, List, Optional, Union
 
 from llama_index.core.base.embeddings.base import Embedding
@@ -8,7 +9,6 @@
 
 import voyageai
 from llama_index.core.embeddings import MultiModalEmbedding
-import base64
 from io import BytesIO
 from pathlib import Path
 from llama_index.core.schema import ImageType
@@ -83,36 +83,29 @@ def _validate_image_format(file_type: str) -> bool:
         """Validate image format."""
         return file_type.lower() in SUPPORTED_IMAGE_FORMATS
 
-    def _text_to_content(self, input_str: str) -> dict:
-        return {"type": "text", "text": input_str}
-
-    def _texts_to_content(self, input_strs: List[str]) -> List[dict]:
+    @classmethod
+    def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
         return [{"content": [{"type": "text", "text": x}]} for x in input_strs]
 
-    def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict:
+    def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
         """Convert an image to a base64 Data URL."""
         if isinstance(image_input, (str, Path)):
+            image = Image.open(str(image_input))
             # If it's a string or Path, assume it's a file path
-            content = {"type": "image_url", "image_url": image_input}
+            image_path = str(image_input)
+            file_extension = os.path.splitext(image_path)[1][1:].lower()
         elif isinstance(image_input, BytesIO):
             # If it's a BytesIO, use it directly
             image = Image.open(image_input)
             file_extension = image.format.lower()
             image_input.seek(0)  # Reset the BytesIO stream to the beginning
-            image_data = image_input.read()
-
-            if self._validate_image_format(file_extension):
-                enc_img = base64.b64encode(image_data).decode("utf-8")
-                content = {
-                    "type": "image_base64",
-                    "image_base64": f"data:image/{file_extension};base64,{enc_img}",
-                }
-            else:
-                raise ValueError(f"Unsupported image format: {file_extension}")
         else:
             raise ValueError("Unsupported input type. Must be a file path or BytesIO.")
 
-        return {"content": [content]}
+        if self._validate_image_format(file_extension):
+            return image
+        else:
+            raise ValueError(f"Unsupported image format: {file_extension}")
 
     def _embed_image(
         self, image_path: ImageType, input_type: Optional[str] = None
@@ -125,7 +118,7 @@ def _embed_image(
         processed_image = self._image_to_content(image_path)
         return self._client.multimodal_embed(
             model=self.model_name,
-            inputs=[processed_image],
+            inputs=[[processed_image]],
             input_type=input_type,
             truncation=self.truncation,
         ).embeddings[0]
@@ -142,7 +135,7 @@ async def _aembed_image(
         return (
             await self._aclient.multimodal_embed(
                 model=self.model_name,
-                inputs=[processed_image],
+                inputs=[[processed_image]],
                 input_type=input_type,
                 truncation=self.truncation,
             )

diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-embeddings-voyageai"
 readme = "README.md"
-version = "0.3.2"
+version = "0.3.3"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"