Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VoyageAI multimodal embedding, correction #17284

Merged
merged 3 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Voyage embeddings file."""
import logging
import os
from typing import Any, List, Optional, Union

from llama_index.core.base.embeddings.base import Embedding
Expand All @@ -8,7 +9,6 @@

import voyageai
from llama_index.core.embeddings import MultiModalEmbedding
import base64
from io import BytesIO
from pathlib import Path
from llama_index.core.schema import ImageType
Expand Down Expand Up @@ -83,36 +83,29 @@ def _validate_image_format(file_type: str) -> bool:
"""Validate image format."""
return file_type.lower() in SUPPORTED_IMAGE_FORMATS

def _text_to_content(self, input_str: str) -> dict:
return {"type": "text", "text": input_str}

def _texts_to_content(self, input_strs: List[str]) -> List[dict]:
@classmethod
def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
return [{"content": [{"type": "text", "text": x}]} for x in input_strs]

def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict:
def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
"""Convert an image to a base64 Data URL."""
if isinstance(image_input, (str, Path)):
image = Image.open(str(image_input))
# If it's a string or Path, assume it's a file path
content = {"type": "image_url", "image_url": image_input}
image_path = str(image_input)
file_extension = os.path.splitext(image_path)[1][1:].lower()
elif isinstance(image_input, BytesIO):
# If it's a BytesIO, use it directly
image = Image.open(image_input)
file_extension = image.format.lower()
image_input.seek(0) # Reset the BytesIO stream to the beginning
image_data = image_input.read()

if self._validate_image_format(file_extension):
enc_img = base64.b64encode(image_data).decode("utf-8")
content = {
"type": "image_base64",
"image_base64": f"data:image/{file_extension};base64,{enc_img}",
}
else:
raise ValueError(f"Unsupported image format: {file_extension}")
else:
raise ValueError("Unsupported input type. Must be a file path or BytesIO.")

return {"content": [content]}
if self._validate_image_format(file_extension):
return image
else:
raise ValueError(f"Unsupported image format: {file_extension}")

def _embed_image(
self, image_path: ImageType, input_type: Optional[str] = None
Expand All @@ -125,7 +118,7 @@ def _embed_image(
processed_image = self._image_to_content(image_path)
return self._client.multimodal_embed(
model=self.model_name,
inputs=[processed_image],
inputs=[[processed_image]],
input_type=input_type,
truncation=self.truncation,
).embeddings[0]
Expand All @@ -142,7 +135,7 @@ async def _aembed_image(
return (
await self._aclient.multimodal_embed(
model=self.model_name,
inputs=[processed_image],
inputs=[[processed_image]],
input_type=input_type,
truncation=self.truncation,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-embeddings-voyageai"
readme = "README.md"
version = "0.3.2"
version = "0.3.3"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Loading