From 5e9808b286d76cceb63baa8928c64babff7e040b Mon Sep 17 00:00:00 2001 From: fzowl <160063452+fzowl@users.noreply.github.com> Date: Tue, 17 Dec 2024 00:28:21 +0100 Subject: [PATCH] VoyageAI multimodal embedding, correction (#17284) --- .../llama_index/embeddings/voyageai/base.py | 33 ++++++++----------- .../pyproject.toml | 2 +- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py index 87c225e81bbb3..860ad1994e84f 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py @@ -1,5 +1,6 @@ """Voyage embeddings file.""" import logging +import os from typing import Any, List, Optional, Union from llama_index.core.base.embeddings.base import Embedding @@ -8,7 +9,6 @@ import voyageai from llama_index.core.embeddings import MultiModalEmbedding -import base64 from io import BytesIO from pathlib import Path from llama_index.core.schema import ImageType @@ -83,36 +83,29 @@ def _validate_image_format(file_type: str) -> bool: """Validate image format.""" return file_type.lower() in SUPPORTED_IMAGE_FORMATS - def _text_to_content(self, input_str: str) -> dict: - return {"type": "text", "text": input_str} - - def _texts_to_content(self, input_strs: List[str]) -> List[dict]: + @classmethod + def _texts_to_content(cls, input_strs: List[str]) -> List[dict]: return [{"content": [{"type": "text", "text": x}]} for x in input_strs] - def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict: + def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image: """Convert an image to a base64 Data URL.""" if isinstance(image_input, (str, Path)): + image = Image.open(str(image_input)) # If it's a string or Path, assume it's a file path - content = {"type": "image_url", "image_url": image_input} + image_path = str(image_input) + file_extension = os.path.splitext(image_path)[1][1:].lower() elif isinstance(image_input, BytesIO): # If it's a BytesIO, use it directly image = Image.open(image_input) file_extension = image.format.lower() image_input.seek(0) # Reset the BytesIO stream to the beginning - image_data = image_input.read() - - if self._validate_image_format(file_extension): - enc_img = base64.b64encode(image_data).decode("utf-8") - content = { - "type": "image_base64", - "image_base64": f"data:image/{file_extension};base64,{enc_img}", - } - else: - raise ValueError(f"Unsupported image format: {file_extension}") else: raise ValueError("Unsupported input type. Must be a file path or BytesIO.") - return {"content": [content]} + if self._validate_image_format(file_extension): + return image + else: + raise ValueError(f"Unsupported image format: {file_extension}") def _embed_image( self, image_path: ImageType, input_type: Optional[str] = None @@ -125,7 +118,7 @@ def _embed_image( processed_image = self._image_to_content(image_path) return self._client.multimodal_embed( model=self.model_name, - inputs=[processed_image], + inputs=[[processed_image]], input_type=input_type, truncation=self.truncation, ).embeddings[0] @@ -142,7 +135,7 @@ async def _aembed_image( return ( await self._aclient.multimodal_embed( model=self.model_name, - inputs=[processed_image], + inputs=[[processed_image]], input_type=input_type, truncation=self.truncation, ) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml index 4290b8063ff43..09b4b3cb06d8b 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml +++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-embeddings-voyageai" readme = "README.md" -version = "0.3.2" +version = "0.3.3" [tool.poetry.dependencies] python = ">=3.9,<4.0"