Skip to content

Commit

Permalink
VoyageAI multimodal embedding, correction (#17284)
Browse files Browse the repository at this point in the history
  • Loading branch information
fzowl authored Dec 16, 2024
1 parent a853a83 commit 5e9808b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Voyage embeddings file."""
import logging
import os
from typing import Any, List, Optional, Union

from llama_index.core.base.embeddings.base import Embedding
Expand All @@ -8,7 +9,6 @@

import voyageai
from llama_index.core.embeddings import MultiModalEmbedding
import base64
from io import BytesIO
from pathlib import Path
from llama_index.core.schema import ImageType
Expand Down Expand Up @@ -83,36 +83,29 @@ def _validate_image_format(file_type: str) -> bool:
"""Validate image format."""
return file_type.lower() in SUPPORTED_IMAGE_FORMATS

def _text_to_content(self, input_str: str) -> dict:
return {"type": "text", "text": input_str}

def _texts_to_content(self, input_strs: List[str]) -> List[dict]:
@classmethod
def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
return [{"content": [{"type": "text", "text": x}]} for x in input_strs]

def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict:
def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
"""Convert an image to a base64 Data URL."""
if isinstance(image_input, (str, Path)):
image = Image.open(str(image_input))
# If it's a string or Path, assume it's a file path
content = {"type": "image_url", "image_url": image_input}
image_path = str(image_input)
file_extension = os.path.splitext(image_path)[1][1:].lower()
elif isinstance(image_input, BytesIO):
# If it's a BytesIO, use it directly
image = Image.open(image_input)
file_extension = image.format.lower()
image_input.seek(0) # Reset the BytesIO stream to the beginning
image_data = image_input.read()

if self._validate_image_format(file_extension):
enc_img = base64.b64encode(image_data).decode("utf-8")
content = {
"type": "image_base64",
"image_base64": f"data:image/{file_extension};base64,{enc_img}",
}
else:
raise ValueError(f"Unsupported image format: {file_extension}")
else:
raise ValueError("Unsupported input type. Must be a file path or BytesIO.")

return {"content": [content]}
if self._validate_image_format(file_extension):
return image
else:
raise ValueError(f"Unsupported image format: {file_extension}")

def _embed_image(
self, image_path: ImageType, input_type: Optional[str] = None
Expand All @@ -125,7 +118,7 @@ def _embed_image(
processed_image = self._image_to_content(image_path)
return self._client.multimodal_embed(
model=self.model_name,
inputs=[processed_image],
inputs=[[processed_image]],
input_type=input_type,
truncation=self.truncation,
).embeddings[0]
Expand All @@ -142,7 +135,7 @@ async def _aembed_image(
return (
await self._aclient.multimodal_embed(
model=self.model_name,
inputs=[processed_image],
inputs=[[processed_image]],
input_type=input_type,
truncation=self.truncation,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-embeddings-voyageai"
readme = "README.md"
version = "0.3.2"
version = "0.3.3"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down

0 comments on commit 5e9808b

Please sign in to comment.