From 7d2a0027d19aa05629d8f8a3a6d99848ab376726 Mon Sep 17 00:00:00 2001
From: fzowl <zoltan@voyageai.com>
Date: Mon, 16 Dec 2024 20:06:02 +0100
Subject: [PATCH 1/3] Multimodal embeddings, correction

---
 .../llama_index/embeddings/voyageai/base.py   | 35 ++++++++-----------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
index 87c225e81bbb3..bdffdbae755e4 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
@@ -1,5 +1,6 @@
 """Voyage embeddings file."""
 import logging
+import os
 from typing import Any, List, Optional, Union
 
 from llama_index.core.base.embeddings.base import Embedding
@@ -8,7 +9,6 @@
 
 import voyageai
 from llama_index.core.embeddings import MultiModalEmbedding
-import base64
 from io import BytesIO
 from pathlib import Path
 from llama_index.core.schema import ImageType
@@ -83,36 +83,31 @@ def _validate_image_format(file_type: str) -> bool:
         """Validate image format."""
         return file_type.lower() in SUPPORTED_IMAGE_FORMATS
 
-    def _text_to_content(self, input_str: str) -> dict:
-        return {"type": "text", "text": input_str}
-
-    def _texts_to_content(self, input_strs: List[str]) -> List[dict]:
+    @classmethod
+    def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
         return [{"content": [{"type": "text", "text": x}]} for x in input_strs]
 
-    def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> dict:
+    def _image_to_content(
+        self, image_input: Union[str, Path, BytesIO]
+    ) -> Union[str, Image]:
         """Convert an image to a base64 Data URL."""
         if isinstance(image_input, (str, Path)):
+            image = Image.open(str(image_input))
             # If it's a string or Path, assume it's a file path
-            content = {"type": "image_url", "image_url": image_input}
+            image_path = str(image_input)
+            file_extension = os.path.splitext(image_path)[1][1:].lower()
         elif isinstance(image_input, BytesIO):
             # If it's a BytesIO, use it directly
             image = Image.open(image_input)
             file_extension = image.format.lower()
             image_input.seek(0)  # Reset the BytesIO stream to the beginning
-            image_data = image_input.read()
-
-            if self._validate_image_format(file_extension):
-                enc_img = base64.b64encode(image_data).decode("utf-8")
-                content = {
-                    "type": "image_base64",
-                    "image_base64": f"data:image/{file_extension};base64,{enc_img}",
-                }
-            else:
-                raise ValueError(f"Unsupported image format: {file_extension}")
         else:
             raise ValueError("Unsupported input type. Must be a file path or BytesIO.")
 
-        return {"content": [content]}
+        if self._validate_image_format(file_extension):
+            return image
+        else:
+            raise ValueError(f"Unsupported image format: {file_extension}")
 
     def _embed_image(
         self, image_path: ImageType, input_type: Optional[str] = None
@@ -125,7 +120,7 @@ def _embed_image(
         processed_image = self._image_to_content(image_path)
         return self._client.multimodal_embed(
             model=self.model_name,
-            inputs=[processed_image],
+            inputs=[[processed_image]],
             input_type=input_type,
             truncation=self.truncation,
         ).embeddings[0]
@@ -142,7 +137,7 @@ async def _aembed_image(
         return (
             await self._aclient.multimodal_embed(
                 model=self.model_name,
-                inputs=[processed_image],
+                inputs=[[processed_image]],
                 input_type=input_type,
                 truncation=self.truncation,
             )

From 34e63981bc6c91ab4d37ad8b027a8b40ac5c08b1 Mon Sep 17 00:00:00 2001
From: fzowl <zoltan@voyageai.com>
Date: Mon, 16 Dec 2024 20:15:42 +0100
Subject: [PATCH 2/3] Multimodal embeddings, correction

---
 .../embeddings/llama-index-embeddings-voyageai/pyproject.toml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml
index 4290b8063ff43..09b4b3cb06d8b 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-embeddings-voyageai"
 readme = "README.md"
-version = "0.3.2"
+version = "0.3.3"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"

From 81f3fd0aa45970fa8e5a69dfd8f8d49fad24f1b6 Mon Sep 17 00:00:00 2001
From: fzowl <zoltan@voyageai.com>
Date: Mon, 16 Dec 2024 22:51:08 +0100
Subject: [PATCH 3/3] Multimodal embeddings, correction

---
 .../llama_index/embeddings/voyageai/base.py                   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
index bdffdbae755e4..860ad1994e84f 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py
@@ -87,9 +87,7 @@ def _validate_image_format(file_type: str) -> bool:
     def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
         return [{"content": [{"type": "text", "text": x}]} for x in input_strs]
 
-    def _image_to_content(
-        self, image_input: Union[str, Path, BytesIO]
-    ) -> Union[str, Image]:
+    def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
         """Convert an image to a base64 Data URL."""
         if isinstance(image_input, (str, Path)):
             image = Image.open(str(image_input))