diff --git a/CHANGELOG.md b/CHANGELOG.md
index b227b7db41ae0..6ff3df05c2473 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,135 @@
 # ChangeLog
 
+## [2024-10-31]
+
+### `llama-index-core` [0.11.21]
+
+- Fixed issue with default value set as None for workflow `ctx.get()` (#16756)
+- fix various issues with react agent streaming (#16755)
+- add unit test for query pipeline (#16749)
+- Fix \_merge_ref_doc_kv_pairs duped for-loop (#16739)
+- bugfix: determine if nodes is none when creating index (#16703)
+- fixes LLMRerank default_parse_choice_select_answer_fn parsing issue (#16736)
+- fix return type check on workflows (#16724)
+- Fixing a verbose issue and making sql errors more informative (#16686)
+
+### `llama-index-embeddings-siliconflow` [0.1.0]
+
+- add siliconflow embedding class (#16753)
+
+### `llama-index-graph-stores-falkordb` [0.2.4]
+
+- Multi-Graph-Supported-FalkorDB (#16482)
+
+### `llama-index-llms-anthropic` [0.3.8]
+
+- adding additional claude model name, for vertex AI (#16692)
+
+### `llama-index-llms-bedrock-converse` [0.3.6]
+
+- Added mistral large2 model id in bedrock (#16742)
+- Improve Bedrock Tool Calling (#16723)
+- add new sonnet3.5 to function calling bedrock converse models (#16702)
+- update bedrock models (#16698)
+
+### `llama-index-llms-bedrock` [0.2.5]
+
+- Added mistral large2 model id in bedrock (#16742)
+- add new sonnet3.5 to function calling bedrock converse models (#16702)
+- update bedrock models (#16698)
+
+### `llama-index-llms-cohere` [0.3.2]
+
+- Adding support to the new Aya-Expanse models from Cohere (#16733)
+
+### `llama-index-llms-dashscope` [0.2.3]
+
+- DashScope llm support async (#16711)
+
+### `llama-index-llms-nvidia` [0.3.4]
+
+- add nvidia/llama-3.2-nv-embedqa-1b-v1 to set of supported models (#16694)
+
+### `llama-index-llms-pipeshift` [0.1.0]
+
+- Pipeshift llama index integration (#16610)
+
+### `llama-index-memory-mem0` [0.1.0]
+
+- add Mem0 as a memory (#16708)
+
+### `llama-index-multi-modal-llms-anthropic` [0.2.4]
+
+- Fix anthropic multimodal deps conflict, update models (#16699)
+
+### `llama-index-node-parser-docling` [0.2.0]
+
+- feat: update Docling reader & node parser to Docling v2 (#16677)
+
+### `llama-index-postprocessor-nvidia-rerank` [0.3.3]
+
+- add nvidia/llama-3.2-nv-rerankqa-1b-v1 to set of supported models (#16695)
+
+### `llama-index-postprocessor-siliconflow-rerank` [0.1.0]
+
+- add siliconflow rerank class (#16737)
+
+### `llama-index-readers-docling` [0.2.0]
+
+- feat: update Docling reader & node parser to Docling v2 (#16677)
+
+### `llama-index-readers-microsoft-onedrive` [0.2.1]
+
+- feat: add permissions to one drive metadata (#16646)
+
+### `llama-index-storage-chat-store-azure` [0.2.4]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+- Add missing awaits in azure chat store (#16645)
+
+### `llama-index-storage-docstore-azure` [0.2.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-storage-index-store-azure` [0.3.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-storage-kvstore-azure` [0.2.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-tools-openai-image-generation` [0.3.0]
+
+- Makes the tool more compatible with the options, also for the future (#16676)
+
+### `llama-index-tools-vectara-query` [0.1.0]
+
+- Add Vectara Query Tool (#16722)
+
+### `llama-index-vector-stores-azureaisearch` [0.2.6]
+
+- Allow defining retrievable fields in Azure Vector Store (#16766)
+- feat: add get_nodes azureai search (#16761)
+- Added get_nodes() function in AISearch vector store (#16653)
+- Fix querying for ID in AzureAISearchVectorStore (fixes delete_nodes by node_ids) (#16769)
+
+### `llama-index-vector-stores-hnswlib` [0.2.0]
+
+- Fixed issue with persistence, rearranged and added new options to construction of HnswlibVectorStore (#16673)
+
+### `llama-index-vector-stores-opensearch` [0.4.1]
+
+- Init OpensearchVectorClient with `os_async_client` (#16767)
+
+### `llama-index-vector-stores-qdrant` [0.3.3]
+
+- chore: add embeddings on qdrant get_nodes return (#16760)
+
+### `llama-index-vector-stores-weaviate` [1.1.3]
+
+- add default ID if node ID is not provided (#16671)
+
 ## [2024-10-24]
 
 ### `llama-index-core` [0.11.20]
diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md
index b227b7db41ae0..966d9a9540eff 100644
--- a/docs/docs/CHANGELOG.md
+++ b/docs/docs/CHANGELOG.md
@@ -1,5 +1,135 @@
 # ChangeLog
 
+## [2024-10-31]
+
+### `llama-index-core` [0.11.21]
+
+- Fixed issue with default value set as None for workflow `ctx.get()` (#16756)
+- fix various issues with react agent streaming (#16755)
+- add unit test for query pipeline (#16749)
+- Fix _merge_ref_doc_kv_pairs duped for-loop (#16739)
+- bugfix: determine if nodes is none when creating index (#16703)
+- fixes LLMRerank default_parse_choice_select_answer_fn parsing issue (#16736)
+- fix return type check on workflows (#16724)
+- Fixing a verbose issue and making sql errors more informative (#16686)
+
+### `llama-index-embeddings-siliconflow` [0.1.0]
+
+- add siliconflow embedding class (#16753)
+
+### `llama-index-graph-stores-falkordb` [0.2.4]
+
+- Multi-Graph-Supported-FalkorDB (#16482)
+
+### `llama-index-llms-anthropic` [0.3.8]
+
+- adding additional claude model name, for vertex AI (#16692)
+
+### `llama-index-llms-bedrock-converse` [0.3.6]
+
+- Added mistral large2 model id in bedrock (#16742)
+- Improve Bedrock Tool Calling (#16723)
+- add new sonnet3.5 to function calling bedrock converse models (#16702)
+- update bedrock models (#16698)
+
+### `llama-index-llms-bedrock` [0.2.5]
+
+- Added mistral large2 model id in bedrock (#16742)
+- add new sonnet3.5 to function calling bedrock converse models (#16702)
+- update bedrock models (#16698)
+
+### `llama-index-llms-cohere` [0.3.2]
+
+- Adding support to the new Aya-Expanse models from Cohere (#16733)
+
+### `llama-index-llms-dashscope` [0.2.3]
+
+- DashScope llm support async (#16711)
+
+### `llama-index-llms-nvidia` [0.3.4]
+
+- add nvidia/llama-3.2-nv-embedqa-1b-v1 to set of supported models (#16694)
+
+### `llama-index-llms-pipeshift` [0.1.0]
+
+- Pipeshift llama index integration (#16610)
+
+### `llama-index-memory-mem0` [0.1.0]
+
+- add Mem0 as a memory (#16708)
+
+### `llama-index-multi-modal-llms-anthropic` [0.2.4]
+
+- Fix anthropic multimodal deps conflict, update models (#16699)
+
+### `llama-index-node-parser-docling` [0.2.0]
+
+- feat: update Docling reader & node parser to Docling v2 (#16677)
+
+### `llama-index-postprocessor-nvidia-rerank` [0.3.3]
+
+- add nvidia/llama-3.2-nv-rerankqa-1b-v1 to set of supported models (#16695)
+
+### `llama-index-postprocessor-siliconflow-rerank` [0.1.0]
+
+- add siliconflow rerank class (#16737)
+
+### `llama-index-readers-docling` [0.2.0]
+
+- feat: update Docling reader & node parser to Docling v2 (#16677)
+
+### `llama-index-readers-microsoft-onedrive` [0.2.1]
+
+- feat: add permissions to one drive metadata (#16646)
+
+### `llama-index-storage-chat-store-azure` [0.2.4]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+- Add missing awaits in azure chat store (#16645)
+
+### `llama-index-storage-docstore-azure` [0.2.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-storage-index-store-azure` [0.3.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-storage-kvstore-azure` [0.2.1]
+
+- Add Managed Identity authentication support for Azure storage components (#16710)
+
+### `llama-index-tools-openai-image-generation` [0.3.0]
+
+- Makes the tool more compatible with the options, also for the future (#16676)
+
+### `llama-index-tools-vectara-query` [0.1.0]
+
+- Add Vectara Query Tool (#16722)
+
+### `llama-index-vector-stores-azureaisearch` [0.2.6]
+
+- Allow defining retrievable fields in Azure Vector Store (#16766)
+- feat: add get_nodes azureai search (#16761)
+- Added get_nodes() function in AISearch vector store (#16653)
+- Fix querying for ID in AzureAISearchVectorStore (fixes delete_nodes by node_ids) (#16769)
+
+### `llama-index-vector-stores-hnswlib` [0.2.0]
+
+- Fixed issue with persistence, rearranged and added new options to construction of HnswlibVectorStore (#16673)
+
+### `llama-index-vector-stores-opensearch` [0.4.1]
+
+- Init OpensearchVectorClient with `os_async_client` (#16767)
+
+### `llama-index-vector-stores-qdrant` [0.3.3]
+
+- chore: add embeddings on qdrant get_nodes return (#16760)
+
+### `llama-index-vector-stores-weaviate` [1.1.3]
+
+- add default ID if node ID is not provided (#16671)
+
 ## [2024-10-24]
 
 ### `llama-index-core` [0.11.20]
diff --git a/docs/docs/api_reference/embeddings/siliconflow.md b/docs/docs/api_reference/embeddings/siliconflow.md
new file mode 100644
index 0000000000000..de30729c5d011
--- /dev/null
+++ b/docs/docs/api_reference/embeddings/siliconflow.md
@@ -0,0 +1,4 @@
+::: llama_index.embeddings.siliconflow
+    options:
+      members:
+        - SiliconFlowEmbedding
diff --git a/docs/docs/api_reference/memory/mem0.md b/docs/docs/api_reference/memory/mem0.md
new file mode 100644
index 0000000000000..d2cb09fe03057
--- /dev/null
+++ b/docs/docs/api_reference/memory/mem0.md
@@ -0,0 +1,4 @@
+::: llama_index.memory.mem0
+    options:
+      members:
+        - Mem0Memory
diff --git a/docs/docs/api_reference/postprocessor/siliconflow_rerank.md b/docs/docs/api_reference/postprocessor/siliconflow_rerank.md
new file mode 100644
index 0000000000000..df2db9a228d93
--- /dev/null
+++ b/docs/docs/api_reference/postprocessor/siliconflow_rerank.md
@@ -0,0 +1,4 @@
+::: llama_index.postprocessor.siliconflow_rerank
+    options:
+      members:
+        - SiliconFlowRerank
diff --git a/docs/docs/api_reference/tools/vectara_query.md b/docs/docs/api_reference/tools/vectara_query.md
new file mode 100644
index 0000000000000..a808c09e5e700
--- /dev/null
+++ b/docs/docs/api_reference/tools/vectara_query.md
@@ -0,0 +1,4 @@
+::: llama_index.tools.vectara_query
+    options:
+      members:
+        - VectaraQueryToolSpec
diff --git a/docs/docs/examples/llm/fireworks.ipynb b/docs/docs/examples/llm/fireworks.ipynb
index e67751e68dcb0..4d4fbeb66bf9c 100644
--- a/docs/docs/examples/llm/fireworks.ipynb
+++ b/docs/docs/examples/llm/fireworks.ipynb
@@ -31,102 +31,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-llms-fireworks"
+    "%pip install llama-index llama-index-llms-fireworks"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "83ea30ee",
+   "cell_type": "markdown",
+   "id": "b007403c-6b7a-420c-92f1-4171d05ed9bb",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: llama-index in /home/bennyfirebase/conda/lib/python3.9/site-packages (0.10.6)\n",
-      "Requirement already satisfied: llama-index-embeddings-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.5)\n",
-      "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.3)\n",
-      "Requirement already satisfied: llama-index-readers-file<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.4)\n",
-      "Requirement already satisfied: llama-index-llms-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.5)\n",
-      "Requirement already satisfied: llama-index-question-gen-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.2)\n",
-      "Requirement already satisfied: llama-index-core<0.11.0,>=0.10.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.10.9)\n",
-      "Requirement already satisfied: llama-index-program-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.3)\n",
-      "Requirement already satisfied: llama-index-legacy<0.10.0,>=0.9.48 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.9.48)\n",
-      "Requirement already satisfied: llama-index-agent-openai<0.2.0,>=0.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index) (0.1.4)\n",
-      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (1.6.0)\n",
-      "Requirement already satisfied: httpx in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (0.26.0)\n",
-      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (4.66.2)\n",
-      "Requirement already satisfied: numpy in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (1.26.4)\n",
-      "Requirement already satisfied: pillow>=9.0.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (10.2.0)\n",
-      "Requirement already satisfied: dataclasses-json in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (0.6.4)\n",
-      "Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (8.2.3)\n",
-      "Requirement already satisfied: networkx>=3.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (3.2.1)\n",
-      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (3.9.3)\n",
-      "Requirement already satisfied: PyYAML>=6.0.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (6.0.1)\n",
-      "Requirement already satisfied: SQLAlchemy[asyncio]>=1.4.49 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (2.0.27)\n",
-      "Requirement already satisfied: tiktoken>=0.3.3 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (0.6.0)\n",
-      "Requirement already satisfied: openai>=1.1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (1.12.0)\n",
-      "Requirement already satisfied: pandas in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (2.2.0)\n",
-      "Requirement already satisfied: deprecated>=1.2.9.3 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (1.2.14)\n",
-      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (1.0.8)\n",
-      "Requirement already satisfied: fsspec>=2023.5.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (2024.2.0)\n",
-      "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (3.8.1)\n",
-      "Requirement already satisfied: typing-inspect>=0.8.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (0.9.0)\n",
-      "Requirement already satisfied: llamaindex-py-client<0.2.0,>=0.1.13 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (0.1.13)\n",
-      "Requirement already satisfied: requests>=2.31.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (2.31.0)\n",
-      "Requirement already satisfied: typing-extensions>=4.5.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-core<0.11.0,>=0.10.0->llama-index) (4.9.0)\n",
-      "Requirement already satisfied: bs4<0.0.3,>=0.0.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (0.0.2)\n",
-      "Requirement already satisfied: pymupdf<2.0.0,>=1.23.21 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (1.23.25)\n",
-      "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (4.12.3)\n",
-      "Requirement already satisfied: pypdf<5.0.0,>=4.0.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (4.0.2)\n",
-      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (4.0.3)\n",
-      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (6.0.5)\n",
-      "Requirement already satisfied: aiosignal>=1.1.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.3.1)\n",
-      "Requirement already satisfied: attrs>=17.3.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (23.2.0)\n",
-      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.9.4)\n",
-      "Requirement already satisfied: frozenlist>=1.1.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.4.1)\n",
-      "Requirement already satisfied: soupsieve>1.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (2.5)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from deprecated>=1.2.9.3->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.16.0)\n",
-      "Requirement already satisfied: pydantic>=1.10 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from llamaindex-py-client<0.2.0,>=0.1.13->llama-index-core<0.11.0,>=0.10.0->llama-index) (2.6.1)\n",
-      "Requirement already satisfied: anyio in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (4.3.0)\n",
-      "Requirement already satisfied: httpcore==1.* in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.0.3)\n",
-      "Requirement already satisfied: sniffio in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.3.0)\n",
-      "Requirement already satisfied: idna in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (3.6)\n",
-      "Requirement already satisfied: certifi in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (2024.2.2)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from httpcore==1.*->httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (0.14.0)\n",
-      "Requirement already satisfied: joblib in /home/bennyfirebase/conda/lib/python3.9/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.3.2)\n",
-      "Requirement already satisfied: regex>=2021.8.3 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.0->llama-index) (2023.12.25)\n",
-      "Requirement already satisfied: click in /home/bennyfirebase/conda/lib/python3.9/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.0->llama-index) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from openai>=1.1.0->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.9.0)\n",
-      "Requirement already satisfied: PyMuPDFb==1.23.22 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pymupdf<2.0.0,>=1.23.21->llama-index-readers-file<0.2.0,>=0.1.0->llama-index) (1.23.22)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from requests>=2.31.0->llama-index-core<0.11.0,>=0.10.0->llama-index) (2.2.1)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from requests>=2.31.0->llama-index-core<0.11.0,>=0.10.0->llama-index) (3.3.2)\n",
-      "Requirement already satisfied: greenlet!=0.4.17 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.11.0,>=0.10.0->llama-index) (3.0.3)\n",
-      "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from typing-inspect>=0.8.0->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.0.0)\n",
-      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from dataclasses-json->llama-index-core<0.11.0,>=0.10.0->llama-index) (3.20.2)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.0->llama-index) (2.8.2)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.0->llama-index) (2024.1)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.0->llama-index) (2024.1)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from anyio->httpx->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.2.0)\n",
-      "Requirement already satisfied: packaging>=17.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.11.0,>=0.10.0->llama-index) (23.2)\n",
-      "Requirement already satisfied: annotated-types>=0.4.0 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pydantic>=1.10->llamaindex-py-client<0.2.0,>=0.1.13->llama-index-core<0.11.0,>=0.10.0->llama-index) (0.6.0)\n",
-      "Requirement already satisfied: pydantic-core==2.16.2 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from pydantic>=1.10->llamaindex-py-client<0.2.0,>=0.1.13->llama-index-core<0.11.0,>=0.10.0->llama-index) (2.16.2)\n",
-      "Requirement already satisfied: six>=1.5 in /home/bennyfirebase/conda/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas->llama-index-core<0.11.0,>=0.10.0->llama-index) (1.16.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
    "source": [
-    "%pip install llama-index"
+    "## Basic Usage"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "b007403c-6b7a-420c-92f1-4171d05ed9bb",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83bfb4fa",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## Basic Usage"
+    "from llama_index.llms.fireworks import Fireworks\n",
+    "\n",
+    "llm = Fireworks(\n",
+    "    model=\"accounts/fireworks/models/firefunction-v1\",\n",
+    "    # api_key=\"some key\",  # uses FIREWORKS_API_KEY env var by default\n",
+    ")"
    ]
   },
   {
@@ -144,9 +72,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.fireworks import Fireworks\n",
-    "\n",
-    "resp = Fireworks().complete(\"Paul Graham is \")"
+    "resp = llm.complete(\"Paul Graham is \")"
    ]
   },
   {
@@ -187,7 +113,6 @@
    "outputs": [],
    "source": [
     "from llama_index.core.llms import ChatMessage\n",
-    "from llama_index.llms.fireworks import Fireworks\n",
     "\n",
     "messages = [\n",
     "    ChatMessage(\n",
@@ -195,7 +120,7 @@
     "    ),\n",
     "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
     "]\n",
-    "resp = Fireworks().chat(messages)"
+    "resp = llm.chat(messages)"
    ]
   },
   {
@@ -239,9 +164,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.fireworks import Fireworks\n",
-    "\n",
-    "llm = Fireworks()\n",
     "resp = llm.stream_complete(\"Paul Graham is \")"
    ]
   },
@@ -279,10 +201,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.fireworks import Fireworks\n",
     "from llama_index.core.llms import ChatMessage\n",
     "\n",
-    "llm = Fireworks()\n",
     "messages = [\n",
     "    ChatMessage(\n",
     "        role=\"system\", content=\"You are a pirate with a colorful personality\"\n",
@@ -310,121 +230,6 @@
     "for r in resp:\n",
     "    print(r.delta, end=\"\")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "009d3f1c-ef35-4126-ae82-0b97adb746e3",
-   "metadata": {},
-   "source": [
-    "## Configure Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e973e3d1-a3c9-43b9-bee1-af3e57946ac3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from llama_index.llms.fireworks import Fireworks\n",
-    "\n",
-    "llm = Fireworks(model=\"accounts/fireworks/models/firefunction-v1\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2c9bcf6-c950-4dfc-abdc-598d5bdedf40",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "resp = llm.complete(\"Paul Graham is \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2edc85ca-df17-4774-a3ea-e80109fa1811",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Paul Graham is an English-American computer scientist, entrepreneur, venture capitalist, author, and blogger. He is known for co-founding the web-based application platform Viaweb, which was acquired by Yahoo! in 1998. He is also the founder of the startup accelerator Y Combinator.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "026fdb77-b61f-4571-8eaf-4a51e8415458",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "messages = [\n",
-    "    ChatMessage(\n",
-    "        role=\"system\", content=\"You are a pirate with a colorful personality\"\n",
-    "    ),\n",
-    "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
-    "]\n",
-    "resp = llm.chat(messages)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2286a16c-188b-437f-a1a3-4efe299b759d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "assistant: My name is Captain Redbeard, but you can call me Red for short.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a2782f06",
-   "metadata": {},
-   "source": [
-    "## Set API Key at a per-instance level\n",
-    "If desired, you can have separate LLM instances use separate API keys."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "015c2d39",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "a well-known essayist, programmer, and venture capitalist. He is best known for co-founding the startup incubator and venture capital firm, Y Combinator, which has funded and helped grow many successful tech companies including Dropbox, Airbnb, and Reddit. Graham is also known for his influential essays on startups, technology, and programming, which he publishes on his website, www.paulgraham.com. Prior to his work in venture capital, Graham was a successful entrepreneur and programmer, co-founding the company Viaweb (later acquired by Yahoo) and writing the programming language Arc.\n"
-     ]
-    }
-   ],
-   "source": [
-    "from llama_index.llms.fireworks import Fireworks\n",
-    "\n",
-    "llm = Fireworks(\n",
-    "    model=\"accounts/fireworks/models/firefunction-v1\", api_key=\"BAD_KEY\"\n",
-    ")\n",
-    "resp = Fireworks().complete(\"Paul Graham is \")\n",
-    "print(resp)"
-   ]
   }
  ],
  "metadata": {
diff --git a/docs/docs/examples/llm/gemini.ipynb b/docs/docs/examples/llm/gemini.ipynb
index 1334789ed56aa..4c62954d51176 100644
--- a/docs/docs/examples/llm/gemini.ipynb
+++ b/docs/docs/examples/llm/gemini.ipynb
@@ -29,26 +29,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-llms-gemini"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install -q llama-index google-generativeai"
+    "%pip install llama-index-llms-gemini llama-index"
    ]
   },
   {
@@ -89,6 +70,20 @@
     "os.environ[\"GOOGLE_API_KEY\"] = GOOGLE_API_KEY"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.gemini import Gemini\n",
+    "\n",
+    "llm = Gemini(\n",
+    "    model=\"models/gemini-1.5-flash\",\n",
+    "    # api_key=\"some key\",  # uses GOOGLE_API_KEY env var by default\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -145,7 +140,7 @@
    "source": [
     "from llama_index.llms.gemini import Gemini\n",
     "\n",
-    "resp = Gemini().complete(\"Write a poem about a magic backpack\")\n",
+    "resp = llm.complete(\"Write a poem about a magic backpack\")\n",
     "print(resp)"
    ]
   },
@@ -206,7 +201,6 @@
    ],
    "source": [
     "from llama_index.core.llms import ChatMessage\n",
-    "from llama_index.llms.gemini import Gemini\n",
     "\n",
     "messages = [\n",
     "    ChatMessage(role=\"user\", content=\"Hello friend!\"),\n",
@@ -215,7 +209,7 @@
     "        role=\"user\", content=\"Help me decide what to have for dinner.\"\n",
     "    ),\n",
     "]\n",
-    "resp = Gemini().chat(messages)\n",
+    "resp = llm.chat(messages)\n",
     "print(resp)"
    ]
   },
@@ -239,9 +233,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.gemini import Gemini\n",
-    "\n",
-    "llm = Gemini()\n",
     "resp = llm.stream_complete(\n",
     "    \"The story of Sourcrust, the bread creature, is really interesting. It all started when...\"\n",
     ")"
@@ -294,32 +285,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[parts {\n",
-      "  text: \"Hello friend!\"\n",
-      "}\n",
-      "role: \"user\"\n",
-      ", parts {\n",
-      "  text: \"Yarr what is shakin\\' matey?\"\n",
-      "}\n",
-      "role: \"model\"\n",
-      ", parts {\n",
-      "  text: \"Help me decide what to have for dinner.\"\n",
-      "}\n",
-      "role: \"user\"\n",
-      "]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "from llama_index.llms.gemini import Gemini\n",
     "from llama_index.core.llms import ChatMessage\n",
     "\n",
-    "llm = Gemini()\n",
     "messages = [\n",
     "    ChatMessage(role=\"user\", content=\"Hello friend!\"),\n",
     "    ChatMessage(role=\"assistant\", content=\"Yarr what is shakin' matey?\"),\n",
diff --git a/docs/docs/examples/llm/openai.ipynb b/docs/docs/examples/llm/openai.ipynb
index 9de457ed9ca99..8b4d5185c503f 100644
--- a/docs/docs/examples/llm/openai.ipynb
+++ b/docs/docs/examples/llm/openai.ipynb
@@ -31,17 +31,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-llms-openai"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "83ea30ee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install llama-index"
+    "%pip install llama-index llama-index-llms-openai"
    ]
   },
   {
@@ -64,6 +54,21 @@
     "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b79b0d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "llm = OpenAI(\n",
+    "    model=\"gpt-4o-mini\",\n",
+    "    # api_key=\"some key\",  # uses OPENAI_API_KEY env var by default\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "8ead155e-b8bd-46f9-ab9b-28fc009361dd",
@@ -81,7 +86,7 @@
    "source": [
     "from llama_index.llms.openai import OpenAI\n",
     "\n",
-    "resp = OpenAI().complete(\"Paul Graham is \")"
+    "resp = llm.complete(\"Paul Graham is \")"
    ]
   },
   {
@@ -118,7 +123,6 @@
    "outputs": [],
    "source": [
     "from llama_index.core.llms import ChatMessage\n",
-    "from llama_index.llms.openai import OpenAI\n",
     "\n",
     "messages = [\n",
     "    ChatMessage(\n",
@@ -126,7 +130,7 @@
     "    ),\n",
     "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
     "]\n",
-    "resp = OpenAI().chat(messages)"
+    "resp = llm.chat(messages)"
    ]
   },
   {
@@ -170,9 +174,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.openai import OpenAI\n",
-    "\n",
-    "llm = OpenAI()\n",
     "resp = llm.stream_complete(\"Paul Graham is \")"
    ]
   },
@@ -210,10 +211,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.openai import OpenAI\n",
     "from llama_index.core.llms import ChatMessage\n",
     "\n",
-    "llm = OpenAI()\n",
     "messages = [\n",
     "    ChatMessage(\n",
     "        role=\"system\", content=\"You are a pirate with a colorful personality\"\n",
@@ -259,7 +258,7 @@
    "source": [
     "from llama_index.llms.openai import OpenAI\n",
     "\n",
-    "llm = OpenAI(model=\"gpt-3.5-turbo\")"
+    "llm = OpenAI(model=\"gpt-4o\")"
    ]
   },
   {
@@ -344,7 +343,6 @@
    "outputs": [],
    "source": [
     "from pydantic import BaseModel\n",
-    "from llama_index.llms.openai.utils import to_openai_tool\n",
     "from llama_index.core.tools import FunctionTool\n",
     "\n",
     "\n",
@@ -436,6 +434,119 @@
     "    print(f\"Name: {s.tool_name}, Input: {s.raw_input}, Output: {str(s)}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "7552118f",
+   "metadata": {},
+   "source": [
+    "### Manual Tool Calling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dade92c4",
+   "metadata": {},
+   "source": [
+    "If you want to control how a tool is called, you can also split the tool calling and tool selection into their own steps.\n",
+    "\n",
+    "First, lets select a tool."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a76226a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.llms import ChatMessage\n",
+    "\n",
+    "chat_history = [ChatMessage(role=\"user\", content=\"Pick a random song for me\")]\n",
+    "\n",
+    "resp = llm.chat_with_tools([tool], chat_history=chat_history)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9eb38e2",
+   "metadata": {},
+   "source": [
+    "Now, lets call the tool the LLM selected (if any).\n",
+    "\n",
+    "If there was a tool call, we should send the results to the LLM to generate the final response (or another tool call!)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43163a59",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Calling generate_song with {'name': 'Random Vibes', 'artist': 'DJ Chill'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "tools_by_name = {t.metadata.name: t for t in [tool]}\n",
+    "tool_calls = llm.get_tool_calls_from_response(\n",
+    "    resp, error_on_no_tool_call=False\n",
+    ")\n",
+    "\n",
+    "while tool_calls:\n",
+    "    # add the LLM's response to the chat history\n",
+    "    chat_history.append(resp.message)\n",
+    "\n",
+    "    for tool_call in tool_calls:\n",
+    "        tool_name = tool_call.tool_name\n",
+    "        tool_kwargs = tool_call.tool_kwargs\n",
+    "\n",
+    "        print(f\"Calling {tool_name} with {tool_kwargs}\")\n",
+    "        tool_output = tool(**tool_kwargs)\n",
+    "        chat_history.append(\n",
+    "            ChatMessage(\n",
+    "                role=\"tool\",\n",
+    "                content=str(tool_output),\n",
+    "                # most LLMs like OpenAI need to know the tool call id\n",
+    "                additional_kwargs={\"tool_call_id\": tool_call.tool_id},\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "        resp = llm.chat_with_tools([tool], chat_history=chat_history)\n",
+    "        tool_calls = llm.get_tool_calls_from_response(\n",
+    "            resp, error_on_no_tool_call=False\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78d611c5",
+   "metadata": {},
+   "source": [
+    "Now, we should have a final response!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c5864e3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Here's a random song for you: **\"Random Vibes\"** by **DJ Chill**. Enjoy!\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(resp.message.content)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7ede8d94-524b-4a51-8150-552df952f1bf",
@@ -700,7 +811,7 @@
     "from llama_index.llms.openai import OpenAI\n",
     "\n",
     "llm = OpenAI(model=\"gpt-3.5-turbo\", api_key=\"BAD_KEY\")\n",
-    "resp = OpenAI().complete(\"Paul Graham is \")\n",
+    "resp = llm.complete(\"Paul Graham is \")\n",
     "print(resp)"
    ]
   },
@@ -723,7 +834,7 @@
     "from llama_index.llms.openai import OpenAI\n",
     "\n",
     "llm = OpenAI(model=\"gpt-3.5-turbo\", additional_kwargs={\"user\": \"your_user_id\"})\n",
-    "resp = OpenAI().complete(\"Paul Graham is \")\n",
+    "resp = llm.complete(\"Paul Graham is \")\n",
     "print(resp)"
    ]
   },
@@ -1121,9 +1232,9 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "llamacloud",
+   "display_name": "llama-index-caVs7DDe-py3.11",
    "language": "python",
-   "name": "llamacloud"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/docs/examples/llm/sambanova.ipynb b/docs/docs/examples/llm/sambanova.ipynb
index 26544c49b0b61..b093259844708 100644
--- a/docs/docs/examples/llm/sambanova.ipynb
+++ b/docs/docs/examples/llm/sambanova.ipynb
@@ -4,21 +4,50 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## SambaNova\n"
+    "# SambaNova Cloud\n",
+    "\n",
+    "This will help you getting started with **[SambaNova](https://sambanova.ai/)'s** [SambaNova Cloud](https://cloud.sambanova.ai/), which is a platform for performing inference with open-source models.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To access SambaNova Cloud model you will need to create a [SambaNovaCloud](https://cloud.sambanova.ai/) account, get an API key, install the `llama-index-llms-sambanova` integration package, and install the `SSEClient` Package.\n",
+    "\n",
+    "```bash\n",
+    "pip install llama-index-llms-sambanovacloud\n",
+    "pip install sseclient-py\n",
+    "```\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "Get an API Key from [cloud.sambanova.ai](https://cloud.sambanova.ai/apis) and add it to your environment variables:\n",
+    "\n",
+    "``` bash\n",
+    "export SAMBANOVA_API_KEY=\"your-api-key-here\"\n",
+    "```"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "#### Example notebook on how to use Sambaverse and SambaStudio offerings from SambaNova\n"
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "if not os.getenv(\"SAMBANOVA_API_KEY\"):\n",
+    "    os.environ[\"SAMBANOVA_API_KEY\"] = getpass.getpass(\n",
+    "        \"Enter your SambaNova Cloud API key: \"\n",
+    "    )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.\n"
+    "### Installation\n",
+    "\n",
+    "The Llama-Index __SambaNovaCloud__ integration lives in the `langchain-index-integrations` package, and it can be installed with the following commands:"
    ]
   },
   {
@@ -27,7 +56,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-llms-sambanova"
+    "%pip install \"llama-index-llms-sambanovacloud\"\n",
+    "%pip install sseclient-py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate chat completions:"
    ]
   },
   {
@@ -36,32 +75,54 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install llama-index"
+    "from llama_index.llms.sambanovacloud import SambaNovaCloud\n",
+    "\n",
+    "llm = SambaNovaCloud(\n",
+    "    model=\"Meta-Llama-3.1-70B-Instruct\",\n",
+    "    max_tokens=1024,\n",
+    "    temperature=0.7,\n",
+    "    top_k=1,\n",
+    "    top_p=0.01,\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Sambaverse\n"
+    "## Invocation\n",
+    "\n",
+    "Given the following system and user messages, let's explore different ways of calling a SambaNova Cloud model. "
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "1. Setup on account on SambaNova\n",
+    "from llama_index.core.base.llms.types import (\n",
+    "    ChatMessage,\n",
+    "    MessageRole,\n",
+    ")\n",
     "\n",
-    "2. Generate a new API token by clicking on the profile\n",
+    "system_msg = ChatMessage(\n",
+    "    role=MessageRole.SYSTEM,\n",
+    "    content=\"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
+    ")\n",
+    "user_msg = ChatMessage(role=MessageRole.USER, content=\"I love programming.\")\n",
     "\n",
-    "3. Identify the model name from the playground\n"
+    "messages = [\n",
+    "    system_msg,\n",
+    "    user_msg,\n",
+    "]"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Import the Sambaverse and use the LLM\n"
+    "### Chat"
    ]
   },
   {
@@ -70,14 +131,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.sambanova import Sambaverse"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Configure the environment variables\n"
+    "ai_msg = llm.chat(messages)\n",
+    "ai_msg.message"
    ]
   },
   {
@@ -86,19 +141,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"SAMBAVERSE_API_KEY\"] = \"you sambaverse api key\"\n",
-    "os.environ[\"SAMBAVERSE_MODEL_NAME\"] = \"you sambaverse model name\"\n",
-    "\n",
-    "# Example model name = Meta/Meta-Llama-3-8B"
+    "print(ai_msg.message.content)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Create an LLM instance\n"
+    "### Complete"
    ]
   },
   {
@@ -107,44 +157,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm = Sambaverse(\n",
-    "    streaming=False,\n",
-    "    model_kwargs={\n",
-    "        \"do_sample\": False,\n",
-    "        \"process_prompt\": False,\n",
-    "        \"select_expert\": \"Meta-Llama-3-8B\",\n",
-    "        \"stop_sequences\": \"\",\n",
-    "    },\n",
-    ")"
+    "ai_msg = llm.complete(user_msg.content)\n",
+    "ai_msg"
    ]
   },
   {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To know more about the model kwargs, kindly refer [here](https://docs.sambanova.ai/sambastudio/latest/api-reference.html)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Completion response\n"
+    "print(ai_msg.text)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "response = llm.complete(\"What is the capital of India?\")\n",
-    "print(response)\n"
+    "## Streaming"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Stream complete response\n"
+    "### Chat"
    ]
   },
   {
@@ -153,34 +190,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stream_response = llm.stream_complete(\"What is the capital of India\")\n",
-    "for response in stream_response:\n",
-    "    print(response)"
+    "ai_stream_msgs = []\n",
+    "for stream in llm.stream_chat(messages):\n",
+    "    ai_stream_msgs.append(stream)\n",
+    "ai_stream_msgs"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### SambaStudio\n"
+    "print(ai_stream_msgs[-1])"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1. Setup on account on SambaNova for SambaStudio\n",
-    "\n",
-    "2. Create a project.\n",
-    "\n",
-    "3. Configure the model name and endpoint.\n"
+    "### Complete"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Import SambaStudio and use the LLM\n"
+    "ai_stream_msgs = []\n",
+    "for stream in llm.stream_complete(user_msg.content):\n",
+    "    ai_stream_msgs.append(stream)\n",
+    "ai_stream_msgs"
    ]
   },
   {
@@ -189,36 +230,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.sambanova import SambaStudio"
+    "print(ai_stream_msgs[-1])"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Configure the environment variables\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"SAMBAVERSE_API_KEY\"] = \"you sambastudio api key\"\n",
-    "os.environ[\"SAMBASTUDIO_BASE_URL\"] = \"you sambastudio base_url\"\n",
-    "os.environ[\"SAMBASTUDIO_BASE_URI\"] = \"you sambastudio base_uri\"\n",
-    "os.environ[\"SAMBASTUDIO_PROJECT_ID\"] = \"you sambastudio project_id\"\n",
-    "os.environ[\"SAMBASTUDIO_ENDPOINT_ID\"] = \"you sambastudio endpoint_id\""
+    "## Async"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Create a SambaStudio instance\n"
+    "### Chat"
    ]
   },
   {
@@ -227,29 +253,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm = Sambaverse(\n",
-    "    streaming=False,\n",
-    "    model_kwargs={\n",
-    "        \"do_sample\": True,\n",
-    "        \"process_prompt\": True,\n",
-    "        \"max_tokens_to_generate\": 1000,\n",
-    "        \"temperature\": 0.8,\n",
-    "    },\n",
-    ")"
+    "ai_msg = await llm.achat(messages)\n",
+    "ai_msg"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "To know more about the model kwargs, kindly refer [here](https://docs.sambanova.ai/sambastudio/latest/api-reference.html)\n"
+    "print(ai_msg.message.content)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Complete response\n"
+    "### Complete"
    ]
   },
   {
@@ -258,32 +279,45 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "response = llm.complete(\"What is the capital of India?\")\n",
-    "print(response)"
+    "ai_msg = await llm.acomplete(user_msg.content)\n",
+    "ai_msg"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Stream complete response\n"
+    "print(ai_msg.text)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "stream_response = llm.stream_complete(\"What is the capital of India\")\n",
-    "for response in stream_response:\n",
-    "    print(response)"
+    "## Async Streaming\n",
+    "\n",
+    "Not supported yet. Coming soon!"
    ]
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "llamaindex_venv",
+   "language": "python",
+   "name": "llamaindex_venv"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/examples/llm/upstage.ipynb b/docs/docs/examples/llm/upstage.ipynb
index a9d4f99f4107b..a9ce18114c22d 100644
--- a/docs/docs/examples/llm/upstage.ipynb
+++ b/docs/docs/examples/llm/upstage.ipynb
@@ -31,17 +31,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-llms-upstage==0.1.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "83ea30ee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install llama-index"
+    "%pip install llama-index-llms-upstage llama-index"
    ]
   },
   {
@@ -81,7 +71,12 @@
    "source": [
     "from llama_index.llms.upstage import Upstage\n",
     "\n",
-    "resp = Upstage().complete(\"Paul Graham is \")"
+    "llm = Upstage(\n",
+    "    model=\"solar-1-mini-chat\",\n",
+    "    # api_key=\"YOUR_API_KEY\"  # uses UPSTAGE_API_KEY env var by default\n",
+    ")\n",
+    "\n",
+    "resp = llm.complete(\"Paul Graham is \")"
    ]
   },
   {
@@ -118,7 +113,6 @@
    "outputs": [],
    "source": [
     "from llama_index.core.llms import ChatMessage\n",
-    "from llama_index.llms.upstage import Upstage\n",
     "\n",
     "messages = [\n",
     "    ChatMessage(\n",
@@ -126,7 +120,7 @@
     "    ),\n",
     "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
     "]\n",
-    "resp = Upstage().chat(messages)"
+    "resp = llm.chat(messages)"
    ]
   },
   {
@@ -170,9 +164,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.upstage import Upstage\n",
-    "\n",
-    "llm = Upstage()\n",
     "resp = llm.stream_complete(\"Paul Graham is \")"
    ]
   },
@@ -211,9 +202,7 @@
    "outputs": [],
    "source": [
     "from llama_index.core.llms import ChatMessage\n",
-    "from llama_index.llms.upstage import Upstage\n",
     "\n",
-    "llm = Upstage()\n",
     "messages = [\n",
     "    ChatMessage(\n",
     "        role=\"system\", content=\"You are a pirate with a colorful personality\"\n",
@@ -242,88 +231,6 @@
     "    print(r.delta, end=\"\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "009d3f1c-ef35-4126-ae82-0b97adb746e3",
-   "metadata": {},
-   "source": [
-    "## Configure Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e973e3d1-a3c9-43b9-bee1-af3e57946ac3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from llama_index.llms.upstage import Upstage\n",
-    "\n",
-    "llm = Upstage()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2c9bcf6-c950-4dfc-abdc-598d5bdedf40",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "resp = llm.complete(\"Paul Graham is \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2edc85ca-df17-4774-a3ea-e80109fa1811",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Paul Graham is a computer scientist, entrepreneur, and essayist. He is best known for co-founding the startup accelerator Y Combinator, which has helped launch some of the most successful tech companies in the world, including Airbnb, Dropbox, and Stripe. He is also the author of several influential essays on entrepreneurship, startups, and technology, including \"How to Start a Startup\" and \"Hackers & Painters.\"\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "026fdb77-b61f-4571-8eaf-4a51e8415458",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "messages = [\n",
-    "    ChatMessage(\n",
-    "        role=\"system\", content=\"You are a pirate with a colorful personality\"\n",
-    "    ),\n",
-    "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
-    "]\n",
-    "resp = llm.chat(messages)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2286a16c-188b-437f-a1a3-4efe299b759d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "assistant: I am Captain Redbeard, the fearless pirate!\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "90f07f7e-927f-47a2-9797-de5a86d61e1f",
@@ -519,37 +426,6 @@
     "response = await llm.apredict_and_call([tool], \"Generate a song\")\n",
     "print(str(response))"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a2782f06",
-   "metadata": {},
-   "source": [
-    "## Set API Key at a per-instance level\n",
-    "If desired, you can have separate LLM instances using separate API keys."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "015c2d39",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Paul Graham is a computer scientist, entrepreneur, and essayist. He is best known as the co-founder of the startup accelerator Y Combinator, which has helped launch and fund many successful tech companies. He is also the author of several influential essays on startup culture and technology, including \"How to Start a Startup\" and \"Hackers & Painters.\"\n"
-     ]
-    }
-   ],
-   "source": [
-    "from llama_index.llms.upstage import Upstage\n",
-    "\n",
-    "llm = Upstage(api_key=\"BAD_KEY\")\n",
-    "resp = Upstage().complete(\"Paul Graham is \")\n",
-    "print(resp)"
-   ]
   }
  ],
  "metadata": {
diff --git a/docs/docs/examples/memory/Mem0Memory.ipynb b/docs/docs/examples/memory/Mem0Memory.ipynb
new file mode 100644
index 0000000000000..ee5aecdb3d2e7
--- /dev/null
+++ b/docs/docs/examples/memory/Mem0Memory.ipynb
@@ -0,0 +1,449 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using `from_client` (for Mem0 platform API): "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.memory.mem0 import Mem0Memory\n",
+    "\n",
+    "context = {\"user_id\": \"test_user_1\"}\n",
+    "memory_from_client = Mem0Memory.from_client(\n",
+    "    context=context,\n",
+    "    api_key=\"<your-api-key>\",\n",
+    "    search_msg_limit=4,  # Default is 5\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using `from_config` (for Mem0 OSS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"<your-api-key>\"\n",
+    "config = {\n",
+    "    \"vector_store\": {\n",
+    "        \"provider\": \"qdrant\",\n",
+    "        \"config\": {\n",
+    "            \"collection_name\": \"test_9\",\n",
+    "            \"host\": \"localhost\",\n",
+    "            \"port\": 6333,\n",
+    "            \"embedding_model_dims\": 1536,  # Change this according to your local model's dimensions\n",
+    "        },\n",
+    "    },\n",
+    "    \"llm\": {\n",
+    "        \"provider\": \"openai\",\n",
+    "        \"config\": {\n",
+    "            \"model\": \"gpt-4o\",\n",
+    "            \"temperature\": 0.2,\n",
+    "            \"max_tokens\": 1500,\n",
+    "        },\n",
+    "    },\n",
+    "    \"embedder\": {\n",
+    "        \"provider\": \"openai\",\n",
+    "        \"config\": {\"model\": \"text-embedding-3-small\"},\n",
+    "    },\n",
+    "    \"version\": \"v1.1\",\n",
+    "}\n",
+    "memory_from_config = Mem0Memory.from_config(\n",
+    "    context=context,\n",
+    "    config=config,\n",
+    "    search_msg_limit=4,  # Default is 5\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "llm = OpenAI(model=\"gpt-4o\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mem0 for Function Calling Agents\n",
+    "\n",
+    "Use `Mem0` as memory for `FunctionCallingAgents`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.tools import FunctionTool\n",
+    "from llama_index.core.agent import FunctionCallingAgent\n",
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize Tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def call_fn(name: str):\n",
+    "    \"\"\"Call the provided name.\n",
+    "    Args:\n",
+    "        name: str (Name of the person)\n",
+    "    \"\"\"\n",
+    "    print(f\"Calling... {name}\")\n",
+    "\n",
+    "\n",
+    "def email_fn(name: str):\n",
+    "    \"\"\"Email the provided name.\n",
+    "    Args:\n",
+    "        name: str (Name of the person)\n",
+    "    \"\"\"\n",
+    "    print(f\"Emailing... {name}\")\n",
+    "\n",
+    "\n",
+    "call_tool = FunctionTool.from_defaults(fn=call_fn)\n",
+    "email_tool = FunctionTool.from_defaults(fn=email_fn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent = FunctionCallingAgent.from_tools(\n",
+    "    [call_tool, email_tool],\n",
+    "    llm=llm,\n",
+    "    memory=memory_from_client,  # can be memory_from_config\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step 47f7d617-4756-4f7c-8858-baf6d8e0ce3a. Step input: Hi, My name is Mayank.\n",
+      "Added user message to memory: Hi, My name is Mayank.\n",
+      "=== LLM Response ===\n",
+      "Hello Mayank! How can I assist you today?\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"Hi, My name is Mayank.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step f08de932-bd3d-4ded-a701-5b2c6bc62788. Step input: My preferred way of communication would be Email.\n",
+      "Added user message to memory: My preferred way of communication would be Email.\n",
+      "=== LLM Response ===\n",
+      "Got it, Mayank! Your preferred way of communication is email. How can I assist you further?\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"My preferred way of communication would be Email.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step ccf8aae2-4e0e-459e-be6c-ead62d330c96. Step input: Send me an update of your product.\n",
+      "Added user message to memory: Send me an update of your product.\n",
+      "=== Calling Function ===\n",
+      "Calling function: email_fn with args: {\"name\": \"Mayank\"}\n",
+      "Emailing... Mayank\n",
+      "=== Function Output ===\n",
+      "None\n",
+      "> Running step e26553f6-6b3f-46d1-aa84-9ba34879461f. Step input: None\n",
+      "=== LLM Response ===\n",
+      "I've sent you an update of our product via email. If you have any other questions or need further assistance, feel free to let me know!\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"Send me an update of your product.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mem0 for Chat Engines\n",
+    "\n",
+    "Use `Mem0` as memory to `SimpleChatEngine`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize chat engine\n",
+    "from llama_index.core.chat_engine.simple import SimpleChatEngine\n",
+    "\n",
+    "agent = SimpleChatEngine.from_defaults(\n",
+    "    llm=llm, memory=memory_from_client  # can be memory_from_config\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello Mayank! How can I assist you today?\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"Hi, My name is mayank\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "That sounds exciting! San Francisco has a lot to offer. If you need any recommendations on places to visit or things to do, feel free to ask. Safe travels!\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"I am planning to visit SF tommorow.\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Since you're planning to visit San Francisco tomorrow, it might be best to schedule meetings either in the morning before you start exploring or in the late afternoon or evening after you've had some time to enjoy the city. This way, you can make the most of your visit without feeling rushed. Let me know if you need help with anything else!\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\n",
+    "    \"What would be a suitable time to schedule a meeting tommorow?\"\n",
+    ")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mem0 for ReAct Agents\n",
+    "\n",
+    "Use `Mem0` as memory for `ReActAgent`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import ReActAgent\n",
+    "\n",
+    "agent = ReActAgent.from_tools(\n",
+    "    [call_tool, email_tool],\n",
+    "    llm=llm,\n",
+    "    memory=memory_from_client,  # can be memory_from_config\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step fb3acef3-f806-493e-838f-eae950df54f4. Step input: Hi, My name is Mayank.\n",
+      "\u001b[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!\n",
+      "Answer: Hello Mayank! How can I assist you today?\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"Hi, My name is Mayank.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step e1dfafe0-7fb1-4f37-a3bd-a4d93ef7e19f. Step input: My preferred way of communication would be Email.\n",
+      "\u001b[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!\n",
+      "Answer: Got it, Mayank! If you need to communicate or schedule anything, I'll make sure to use email as your preferred method. Let me know if there's anything specific you need help with!\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"My preferred way of communication would be Email.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step e4a6f24c-0008-41d7-8fe9-53d41f26cd87. Step input: Send me an update of your product.\n",
+      "\u001b[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me send an update via email.\n",
+      "Action: email_fn\n",
+      "Action Input: {'name': 'Mayank'}\n",
+      "\u001b[0mEmailing... Mayank\n",
+      "\u001b[1;3;34mObservation: None\n",
+      "\u001b[0m> Running step aec65a36-eabf-473b-8572-0cb02b25969b. Step input: None\n",
+      "\u001b[1;3;38;5;200mThought: I have sent the email to Mayank with the product update. I can now confirm this action.\n",
+      "Answer: I have sent you an update of our product via email. Please check your inbox. Let me know if there's anything else you need!\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"Send me an update of your product.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step c3d61bd9-6415-49b3-94ba-8cb0ee717e3a. Step input: First call me and then communicate me requirements.\n",
+      "\u001b[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.\n",
+      "Action: call_fn\n",
+      "Action Input: {'name': 'Mayank'}\n",
+      "\u001b[0mCalling... Mayank\n",
+      "\u001b[1;3;34mObservation: None\n",
+      "\u001b[0m> Running step 6a594cb7-af31-48e3-b965-7fef88b03084. Step input: None\n",
+      "\u001b[1;3;38;5;200mThought: Since the call did not go through, I will proceed with the next step, which is to communicate via email as per your preference.\n",
+      "Action: email_fn\n",
+      "Action Input: {'name': 'Mayank'}\n",
+      "\u001b[0mEmailing... Mayank\n",
+      "\u001b[1;3;34mObservation: None\n",
+      "\u001b[0m> Running step 72e96d48-afcc-48fc-83d7-5c0232dd7a92. Step input: None\n",
+      "\u001b[1;3;38;5;200mThought: I have attempted to call and email you, but there seems to be no response from the tools. I will provide the information here instead.\n",
+      "Answer: I attempted to call you, but it seems there was an issue. I'll proceed with providing the requirements update here. Please let me know if you have any specific requirements or updates you need, and I'll be happy to assist you!\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"First call me and then communicate me requirements.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 760298507b1de..cc0d380d76cb6 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -161,6 +161,7 @@ nav:
           - ./examples/cookbooks/oreilly_course_cookbooks/Module-7/Multi_Modal_RAG_System.ipynb
           - ./examples/cookbooks/oreilly_course_cookbooks/Module-8/Advanced_RAG_with_LlamaParse.ipynb
           - ./examples/cookbooks/prometheus2_cookbook.ipynb
+          - ./examples/cookbooks/toolhouse_llamaindex.ipynb
       - Customization:
           - ./examples/customization/llms/AzureOpenAI.ipynb
           - ./examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
@@ -909,6 +910,7 @@ nav:
           - ./api_reference/embeddings/oracleai.md
           - ./api_reference/embeddings/premai.md
           - ./api_reference/embeddings/sagemaker_endpoint.md
+          - ./api_reference/embeddings/siliconflow.md
           - ./api_reference/embeddings/text_embeddings_inference.md
           - ./api_reference/embeddings/textembed.md
           - ./api_reference/embeddings/together.md
@@ -1122,6 +1124,7 @@ nav:
       - Memory:
           - ./api_reference/memory/chat_memory_buffer.md
           - ./api_reference/memory/index.md
+          - ./api_reference/memory/mem0.md
           - ./api_reference/memory/simple_composable_memory.md
           - ./api_reference/memory/vector_memory.md
       - Metadata Extractors:
@@ -1194,6 +1197,7 @@ nav:
           - ./api_reference/postprocessor/rankllm_rerank.md
           - ./api_reference/postprocessor/sbert_rerank.md
           - ./api_reference/postprocessor/sentence_optimizer.md
+          - ./api_reference/postprocessor/siliconflow_rerank.md
           - ./api_reference/postprocessor/similarity.md
           - ./api_reference/postprocessor/tei_rerank.md
           - ./api_reference/postprocessor/time_weighted.md
@@ -1631,6 +1635,7 @@ nav:
           - ./api_reference/tools/tavily_research.md
           - ./api_reference/tools/text_to_image.md
           - ./api_reference/tools/tool_spec.md
+          - ./api_reference/tools/vectara_query.md
           - ./api_reference/tools/vector_db.md
           - ./api_reference/tools/waii.md
           - ./api_reference/tools/weather.md
@@ -2291,6 +2296,10 @@ plugins:
             - ../llama-index-integrations/storage/docstore/llama-index-storage-docstore-couchbase
             - ../llama-index-integrations/storage/index_store/llama-index-storage-index-store-couchbase
             - ../llama-index-integrations/llms/llama-index-llms-reka
+            - ../llama-index-integrations/tools/llama-index-tools-vectara-query
+            - ../llama-index-integrations/embeddings/llama-index-embeddings-siliconflow
+            - ../llama-index-integrations/memory/llama-index-memory-mem0
+            - ../llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank
   - redirects:
       redirect_maps:
         ./api/llama_index.vector_stores.MongoDBAtlasVectorSearch.html: api_reference/storage/vector_store/mongodb.md
diff --git a/docs/scripts/serve.py b/docs/scripts/serve.py
index f2636fe2fae60..3a67a78884923 100644
--- a/docs/scripts/serve.py
+++ b/docs/scripts/serve.py
@@ -1,5 +1,8 @@
 import argparse
+import os
 import subprocess
+import sys
+
 import yaml
 
 
@@ -11,13 +14,23 @@ def _skip_notebook_conversion():
         p for p in config["plugins"] if "mkdocs-jupyter" not in p
     ]
 
+    tmp_config_path = ".mkdocs.tmp.yml"
+    with open(tmp_config_path, "w") as f:
+        yaml.safe_dump(config, f)
+
     try:
-        subprocess.run(
-            ["mkdocs", "serve", "-f", "-"],
-            input=yaml.safe_dump(config).encode("utf-8"),
+        process = subprocess.Popen(
+            ["mkdocs", "serve", "--dirty", "-f", tmp_config_path],
+            stdout=sys.stdout,
+            stderr=sys.stderr,
         )
-    except KeyboardInterrupt:
-        pass
+
+        process.wait()
+
+    finally:
+        os.unlink(tmp_config_path)  # Clean up the temporary file
+        if process.poll() is None:
+            process.terminate()
 
 
 def _serve():
diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py
index 6be6e568b0b3f..0cccfb08d3b3b 100644
--- a/llama-index-core/llama_index/core/__init__.py
+++ b/llama-index-core/llama_index/core/__init__.py
@@ -1,6 +1,6 @@
 """Init file of LlamaIndex."""
 
-__version__ = "0.11.20"
+__version__ = "0.11.21"
 
 import logging
 from logging import NullHandler
diff --git a/llama-index-core/llama_index/core/agent/react/step.py b/llama-index-core/llama_index/core/agent/react/step.py
index c43b729029d30..b1da61e19d174 100644
--- a/llama-index-core/llama_index/core/agent/react/step.py
+++ b/llama-index-core/llama_index/core/agent/react/step.py
@@ -494,7 +494,7 @@ def _infer_stream_chunk_is_final(
                 missed_chunks_storage.append(chunk)
             elif not latest_content.startswith("Thought"):
                 return True
-            elif "Answer: " in latest_content:
+            elif "Answer:" in latest_content:
                 missed_chunks_storage.clear()
                 return True
         return False
@@ -682,11 +682,22 @@ def _run_step_stream(
                     )
                 )
         else:
-            # Get the response in a separate thread so we can yield the response
+            # remove "Answer: " from the response, and anything before it
+            start_idx = (latest_chunk.message.content or "").find("Answer:")
+            if start_idx != -1 and latest_chunk.message.content:
+                latest_chunk.message.content = latest_chunk.message.content[
+                    start_idx + len("Answer:") :
+                ].strip()
+
+            # set delta to the content, minus the "Answer: "
+            latest_chunk.delta = latest_chunk.message.content
+
+            # add back the chunks that were missed
             response_stream = self._add_back_chunk_to_stream(
                 chunks=[*missed_chunks_storage, latest_chunk], chat_stream=chat_stream
             )
 
+            # Get the response in a separate thread so we can yield the response
             agent_response_stream = StreamingAgentChatResponse(
                 chat_stream=response_stream,
                 sources=task.extra_state["sources"],
@@ -764,7 +775,17 @@ async def _arun_step_stream(
                     )
                 )
         else:
-            # Get the response in a separate thread so we can yield the response
+            # remove "Answer: " from the response, and anything before it
+            start_idx = (latest_chunk.message.content or "").find("Answer:")
+            if start_idx != -1 and latest_chunk.message.content:
+                latest_chunk.message.content = latest_chunk.message.content[
+                    start_idx + len("Answer:") :
+                ].strip()
+
+            # set delta to the content, minus the "Answer: "
+            latest_chunk.delta = latest_chunk.message.content
+
+            # add back the chunks that were missed
             response_stream = self._async_add_back_chunk_to_stream(
                 chunks=[*missed_chunks_storage, latest_chunk], chat_stream=chat_stream
             )
diff --git a/llama-index-core/llama_index/core/chat_engine/types.py b/llama-index-core/llama_index/core/chat_engine/types.py
index f0f77bda15d1a..62644e5cab414 100644
--- a/llama-index-core/llama_index/core/chat_engine/types.py
+++ b/llama-index-core/llama_index/core/chat_engine/types.py
@@ -34,7 +34,10 @@
 
 def is_function(message: ChatMessage) -> bool:
     """Utility for ChatMessage responses from OpenAI models."""
-    return "tool_calls" in message.additional_kwargs
+    return (
+        "tool_calls" in message.additional_kwargs
+        and len(message.additional_kwargs["tool_calls"]) > 0
+    )
 
 
 class ChatResponseMode(str, Enum):
diff --git a/llama-index-core/llama_index/core/indices/base.py b/llama-index-core/llama_index/core/indices/base.py
index 3564139b1d051..60ffbea10b520 100644
--- a/llama-index-core/llama_index/core/indices/base.py
+++ b/llama-index-core/llama_index/core/indices/base.py
@@ -46,7 +46,7 @@ def __init__(
         """Initialize with parameters."""
         if index_struct is None and nodes is None and objects is None:
             raise ValueError("One of nodes, objects, or index_struct must be provided.")
-        if index_struct is not None and nodes is not None:
+        if index_struct is not None and nodes is not None and len(nodes) >= 1:
             raise ValueError("Only one of nodes or index_struct can be provided.")
         # This is to explicitly make sure that the old UX is not used
         if nodes is not None and len(nodes) >= 1 and not isinstance(nodes[0], BaseNode):
diff --git a/llama-index-core/llama_index/core/indices/struct_store/json_query.py b/llama-index-core/llama_index/core/indices/struct_store/json_query.py
index 0b8164a5c3620..40b5049b90bfd 100644
--- a/llama-index-core/llama_index/core/indices/struct_store/json_query.py
+++ b/llama-index-core/llama_index/core/indices/struct_store/json_query.py
@@ -65,8 +65,8 @@ def default_output_processor(llm_output: str, json_value: JSONType) -> Dict[str,
     expressions = [expr.strip() for expr in llm_output.split(",")]
 
     try:
-        from jsonpath_ng.ext import parse
-        from jsonpath_ng.jsonpath import DatumInContext
+        from jsonpath_ng.ext import parse  # pants: no-infer-dep
+        from jsonpath_ng.jsonpath import DatumInContext  # pants: no-infer-dep
     except ImportError as exc:
         IMPORT_ERROR_MSG = "You need to install jsonpath-ng to use this function!"
         raise ImportError(IMPORT_ERROR_MSG) from exc
diff --git a/llama-index-core/llama_index/core/indices/utils.py b/llama-index-core/llama_index/core/indices/utils.py
index 2aed8534944e3..4580da5c80909 100644
--- a/llama-index-core/llama_index/core/indices/utils.py
+++ b/llama-index-core/llama_index/core/indices/utils.py
@@ -101,7 +101,17 @@ def default_parse_choice_select_answer_fn(
                     "Answer line must be of the form: "
                     "answer_num: <int>, answer_relevance: <float>"
                 )
-        answer_num = int(line_tokens[0].split(":")[1].strip())
+        try:
+            answer_num = int(line_tokens[0].split(":")[1].strip())
+        except (IndexError, ValueError) as e:
+            if not raise_error:
+                continue
+            else:
+                raise ValueError(
+                    f"Invalid answer line: {answer_line}. "
+                    "Answer line must be of the form: "
+                    "answer_num: <int>, answer_relevance: <float>"
+                )
         if answer_num > num_choices:
             continue
         answer_nums.append(answer_num)
diff --git a/llama-index-core/llama_index/core/response_synthesizers/generation.py b/llama-index-core/llama_index/core/response_synthesizers/generation.py
index 72c51acf02176..cc9d444818b5c 100644
--- a/llama-index-core/llama_index/core/response_synthesizers/generation.py
+++ b/llama-index-core/llama_index/core/response_synthesizers/generation.py
@@ -1,15 +1,31 @@
-from typing import Any, Optional, Sequence
+from typing import Any, List, Optional, Sequence
 
+from llama_index.core.base.response.schema import RESPONSE_TYPE
 from llama_index.core.callbacks.base import CallbackManager
+from llama_index.core.callbacks.schema import CBEventType, EventPayload
 from llama_index.core.indices.prompt_helper import PromptHelper
+from llama_index.core.instrumentation.events.synthesis import (
+    SynthesizeStartEvent,
+    SynthesizeEndEvent,
+)
+import llama_index.core.instrumentation as instrument
 from llama_index.core.llms import LLM
 from llama_index.core.prompts import BasePromptTemplate
 from llama_index.core.prompts.default_prompts import DEFAULT_SIMPLE_INPUT_PROMPT
 from llama_index.core.prompts.mixin import PromptDictType
 from llama_index.core.response_synthesizers.base import BaseSynthesizer
+from llama_index.core.schema import (
+    MetadataMode,
+    NodeWithScore,
+    QueryBundle,
+    QueryType,
+)
 from llama_index.core.types import RESPONSE_TEXT_TYPE
 
 
+dispatcher = instrument.get_dispatcher(__name__)
+
+
 class Generation(BaseSynthesizer):
     def __init__(
         self,
@@ -52,7 +68,7 @@ async def aget_response(
                 **response_kwargs,
             )
         else:
-            return self._llm.stream(
+            return await self._llm.astream(
                 self._input_prompt,
                 query_str=query_str,
                 **response_kwargs,
@@ -79,3 +95,94 @@ def get_response(
                 query_str=query_str,
                 **response_kwargs,
             )
+
+    # NOTE: synthesize and asynthesize are copied from the base class,
+    #       but modified to return when zero nodes are provided
+
+    @dispatcher.span
+    def synthesize(
+        self,
+        query: QueryType,
+        nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+        **response_kwargs: Any,
+    ) -> RESPONSE_TYPE:
+        dispatcher.event(
+            SynthesizeStartEvent(
+                query=query,
+            )
+        )
+
+        if isinstance(query, str):
+            query = QueryBundle(query_str=query)
+
+        with self._callback_manager.event(
+            CBEventType.SYNTHESIZE,
+            payload={EventPayload.QUERY_STR: query.query_str},
+        ) as event:
+            response_str = self.get_response(
+                query_str=query.query_str,
+                text_chunks=[
+                    n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
+                ],
+                **response_kwargs,
+            )
+
+            additional_source_nodes = additional_source_nodes or []
+            source_nodes = list(nodes) + list(additional_source_nodes)
+
+            response = self._prepare_response_output(response_str, source_nodes)
+
+            event.on_end(payload={EventPayload.RESPONSE: response})
+
+        dispatcher.event(
+            SynthesizeEndEvent(
+                query=query,
+                response=response,
+            )
+        )
+        return response
+
+    @dispatcher.span
+    async def asynthesize(
+        self,
+        query: QueryType,
+        nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+        **response_kwargs: Any,
+    ) -> RESPONSE_TYPE:
+        dispatcher.event(
+            SynthesizeStartEvent(
+                query=query,
+            )
+        )
+
+        if isinstance(query, str):
+            query = QueryBundle(query_str=query)
+
+        with self._callback_manager.event(
+            CBEventType.SYNTHESIZE,
+            payload={EventPayload.QUERY_STR: query.query_str},
+        ) as event:
+            response_str = await self.aget_response(
+                query_str=query.query_str,
+                text_chunks=[
+                    n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
+                ],
+                **response_kwargs,
+            )
+
+            additional_source_nodes = additional_source_nodes or []
+            source_nodes = list(nodes) + list(additional_source_nodes)
+
+            response = self._prepare_response_output(response_str, source_nodes)
+
+            event.on_end(payload={EventPayload.RESPONSE: response})
+
+        dispatcher.event(
+            SynthesizeEndEvent(
+                query=query,
+                response=response,
+            )
+        )
+        return response
diff --git a/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py b/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py
index e9d9a8c3e2dd5..102b845a8fc5f 100644
--- a/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py
+++ b/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py
@@ -125,19 +125,16 @@ def _get_kv_pairs_for_insert(
     def _merge_ref_doc_kv_pairs(self, ref_doc_kv_pairs: dict) -> List[Tuple[str, dict]]:
         merged_ref_doc_kv_pairs: List[Tuple[str, dict]] = []
         for key, kv_pairs in ref_doc_kv_pairs.items():
-            for key, kv_pairs in ref_doc_kv_pairs.items():
-                merged_node_ids: List[str] = []
-                metadata: Dict[str, Any] = {}
-                for kv_pair in kv_pairs:
-                    nodes = kv_pair[1].get("node_ids", [])
-                    new_nodes = set(nodes).difference(set(merged_node_ids))
-                    merged_node_ids.extend(
-                        [node for node in nodes if node in new_nodes]
-                    )
-                    metadata.update(kv_pair[1].get("metadata", {}))
-                merged_ref_doc_kv_pairs.append(
-                    (key, {"node_ids": merged_node_ids, "metadata": metadata})
-                )
+            merged_node_ids: List[str] = []
+            metadata: Dict[str, Any] = {}
+            for kv_pair in kv_pairs:
+                nodes = kv_pair[1].get("node_ids", [])
+                new_nodes = set(nodes).difference(set(merged_node_ids))
+                merged_node_ids.extend([node for node in nodes if node in new_nodes])
+                metadata.update(kv_pair[1].get("metadata", {}))
+            merged_ref_doc_kv_pairs.append(
+                (key, {"node_ids": merged_node_ids, "metadata": metadata})
+            )
 
         return merged_ref_doc_kv_pairs
 
diff --git a/llama-index-core/llama_index/core/workflow/context.py b/llama-index-core/llama_index/core/workflow/context.py
index 79c00c7a9a15f..7273f36d22160 100644
--- a/llama-index-core/llama_index/core/workflow/context.py
+++ b/llama-index-core/llama_index/core/workflow/context.py
@@ -156,7 +156,7 @@ async def set(self, key: str, value: Any, make_private: bool = False) -> None:
         async with self.lock:
             self._globals[key] = value
 
-    async def get(self, key: str, default: Optional[Any] = None) -> Any:
+    async def get(self, key: str, default: Optional[Any] = Ellipsis) -> Any:
         """Get the value corresponding to `key` from the Context.
 
         Args:
@@ -169,7 +169,7 @@ async def get(self, key: str, default: Optional[Any] = None) -> Any:
         async with self.lock:
             if key in self._globals:
                 return self._globals[key]
-            elif default is not None:
+            elif default is not Ellipsis:
                 return default
 
         msg = f"Key '{key}' not found in Context"
diff --git a/llama-index-core/llama_index/core/workflow/workflow.py b/llama-index-core/llama_index/core/workflow/workflow.py
index 4b1449df06e16..711c65de17823 100644
--- a/llama-index-core/llama_index/core/workflow/workflow.py
+++ b/llama-index-core/llama_index/core/workflow/workflow.py
@@ -396,7 +396,7 @@ def is_done(self) -> bool:
     @step
     async def _done(self, ctx: Context, ev: StopEvent) -> None:
         """Tears down the whole workflow and stop execution."""
-        ctx._retval = ev.result or None
+        ctx._retval = ev.result
         ctx.write_event_to_stream(ev)
 
         # Signal we want to stop the workflow
diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml
index eb225a7e301f1..ae6877ff51af9 100644
--- a/llama-index-core/pyproject.toml
+++ b/llama-index-core/pyproject.toml
@@ -46,7 +46,7 @@ name = "llama-index-core"
 packages = [{include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.11.20"
+version = "0.11.21"
 
 [tool.poetry.dependencies]
 SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}
diff --git a/llama-index-core/tests/agent/custom/test_query_pipeline.py b/llama-index-core/tests/agent/custom/test_query_pipeline.py
index 4d7ba56df6b42..b017587de1877 100644
--- a/llama-index-core/tests/agent/custom/test_query_pipeline.py
+++ b/llama-index-core/tests/agent/custom/test_query_pipeline.py
@@ -1,7 +1,7 @@
 """Test query pipeline worker."""
 
 from typing import Any, Dict, Set, Tuple
-
+import pytest
 from llama_index.core.agent.custom.pipeline_worker import (
     QueryPipelineAgentWorker,
 )
@@ -63,6 +63,26 @@ def mock_agent_output_fn(
     return AgentChatResponse(response=str(output)), is_done
 
 
+class MyCustomAgentComponent(CustomAgentComponent):
+    """Custom agent component."""
+
+    separator: str = Field(default=":", description="Separator")
+
+    def _run_component(self, **kwargs: Any) -> Dict[str, Any]:
+        """Run component."""
+        return {"output": kwargs["a"] + self.separator + kwargs["a"]}
+
+    @property
+    def _input_keys(self) -> Set[str]:
+        """Input keys."""
+        return {"a"}
+
+    @property
+    def _output_keys(self) -> Set[str]:
+        """Output keys."""
+        return {"output"}
+
+
 def test_qp_agent_fn() -> None:
     """Test query pipeline agent.
 
@@ -90,29 +110,34 @@ def test_qp_agent_fn() -> None:
     assert step_output.is_last is True
 
 
-class MyCustomAgentComponent(CustomAgentComponent):
-    """Custom agent component."""
+@pytest.mark.asyncio()
+async def test_qp_agent_async_fn() -> None:
+    """
+    Test query pipeline agent with async function components.
+    """
+    agent_input = AgentInputComponent(fn=mock_agent_input_fn)
+    fn_component = FnComponent(fn=mock_fn)
+    agent_output = AgentFnComponent(fn=mock_agent_output_fn)
+    qp = QueryPipeline(chain=[agent_input, fn_component, agent_output])
 
-    separator: str = Field(default=":", description="Separator")
+    agent_worker = QueryPipelineAgentWorker(pipeline=qp)
+    agent_runner = AgentRunner(agent_worker=agent_worker)
 
-    def _run_component(self, **kwargs: Any) -> Dict[str, Any]:
-        """Run component."""
-        return {"output": kwargs["a"] + self.separator + kwargs["a"]}
+    # test create_task
+    task = agent_runner.create_task("foo")
+    assert task.input == "foo"
 
-    @property
-    def _input_keys(self) -> Set[str]:
-        """Input keys."""
-        return {"a"}
+    first_step_output = await agent_runner.arun_step(task.task_id)
+    assert str(first_step_output.output) == "foo3"
+    assert first_step_output.is_last is False
 
-    @property
-    def _output_keys(self) -> Set[str]:
-        """Output keys."""
-        return {"output"}
+    second_step_output = await agent_runner.arun_step(task.task_id)
+    assert str(second_step_output.output) == "foo33"
+    assert second_step_output.is_last is True
 
 
 def test_qp_agent_custom() -> None:
     """Test query pipeline agent.
-
     Implement via `AgentCustomQueryComponent` subclass.
 
     """
diff --git a/llama-index-core/tests/response_synthesizers/test_generate.py b/llama-index-core/tests/response_synthesizers/test_generate.py
new file mode 100644
index 0000000000000..c14c1a1eea69e
--- /dev/null
+++ b/llama-index-core/tests/response_synthesizers/test_generate.py
@@ -0,0 +1,40 @@
+import pytest
+
+from llama_index.core.llms import MockLLM
+from llama_index.core.response_synthesizers.generation import Generation
+
+
+def test_synthesize() -> None:
+    synthesizer = Generation(llm=MockLLM())
+    response = synthesizer.synthesize(query="test", nodes=[])
+    assert str(response) == "test"
+
+
+def test_synthesize_stream() -> None:
+    synthesizer = Generation(llm=MockLLM(), streaming=True)
+    response = synthesizer.synthesize(query="test", nodes=[])
+
+    gold = "test"
+    i = 0
+    for chunk in response.response_gen:
+        assert chunk == gold[i]
+        i += 1
+
+
+@pytest.mark.asyncio()
+async def test_asynthesize() -> None:
+    synthesizer = Generation(llm=MockLLM())
+    response = await synthesizer.asynthesize(query="test", nodes=[])
+    assert str(response) == "test"
+
+
+@pytest.mark.asyncio()
+async def test_asynthesize_stream() -> None:
+    synthesizer = Generation(llm=MockLLM(), streaming=True)
+    response = await synthesizer.asynthesize(query="test", nodes=[])
+
+    gold = "test"
+    i = 0
+    async for chunk in response.async_response_gen():
+        assert chunk == gold[i]
+        i += 1
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/llama_index/embeddings/gemini/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/llama_index/embeddings/gemini/base.py
index 5782eb72d1a74..2c5a0baa208bf 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/llama_index/embeddings/gemini/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/llama_index/embeddings/gemini/base.py
@@ -109,11 +109,11 @@ def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
 
     async def _aget_query_embedding(self, query: str) -> List[float]:
         """The asynchronous version of _get_query_embedding."""
-        return self._aget_text_embeddings(query)
+        return (await self._aget_text_embeddings([query]))[0]
 
     async def _aget_text_embedding(self, text: str) -> List[float]:
         """Asynchronously get text embedding."""
-        return self._aget_text_embeddings(text)
+        return (await self._aget_text_embeddings([text]))[0]
 
     async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
         """Asynchronously get text embeddings."""
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/pyproject.toml
index 083ba8479a767..e043d643d1773 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/pyproject.toml
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-embeddings-gemini"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/.gitignore
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/.gitignore
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/.gitignore
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/BUILD
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/BUILD
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/BUILD
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/Makefile
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/Makefile
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/Makefile
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/README.md
new file mode 100644
index 0000000000000..a18d2a0fb4478
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/README.md
@@ -0,0 +1,41 @@
+# LlamaIndex Embeddings Integration: SiliconFlow
+
+## 1. Product Introduction
+
+SiliconCloud provides cost-effective GenAI services based on an excellent open-source foundation model.
+introduction: https://docs.siliconflow.cn/introduction
+
+## 2. Product features
+
+- As a one-stop cloud service platform that integrates top large models, SiliconCloud is committed to providing developers with faster, cheaper, more comprehensive, and smoother model APIs.
+
+  - SiliconCloud has been listed on Qwen2.5-72B, DeepSeek-V2.5, Qwen2, InternLM2.5-20B-Chat, BCE, BGE, SenseVoice-Small, Llama-3.1, FLUX.1, DeepSeek-Coder-V2, SD3 Medium, GLM-4-9B-Chat, A variety of open-source large language models, image generation models, code generation models, vector and reordering models, and multimodal large models, including InstantID.
+
+  - Among them, Qwen 2.5 (7B), Llama 3.1 (8B) and other large model APIs are free to use, so that developers and product managers do not need to worry about the computing power costs caused by the R&D stage and large-scale promotion, and realize "token freedom".
+
+- Provide out-of-the-box large model inference acceleration services to bring a more efficient user experience to your GenAI applications.
+
+## 3. Installation
+
+```shell
+pip install llama-index-embeddings-siliconflow
+```
+
+## 4. Usage
+
+```python
+import asyncio
+import os
+from llama_index.embeddings.siliconflow import SiliconFlowEmbedding
+
+embedding = SiliconFlowEmbedding(
+    model="BAAI/bge-m3",
+    api_key=os.getenv("SILICONFLOW_API_KEY"),
+)
+
+response = embedding.get_query_embedding("...")
+print(response)
+
+response = asyncio.run(embedding.aget_query_embedding("..."))
+print(response)
+```
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/BUILD
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/BUILD
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/BUILD
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/__init__.py
new file mode 100644
index 0000000000000..a43bad1244a7e
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.embeddings.siliconflow.base import SiliconFlowEmbedding
+
+__all__ = ["SiliconFlowEmbedding"]
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/base.py
new file mode 100644
index 0000000000000..6440d95b5332d
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/llama_index/embeddings/siliconflow/base.py
@@ -0,0 +1,146 @@
+"""SiliconFLow embeddings file."""
+
+import aiohttp
+import base64
+import requests
+import struct
+from typing import Any, List, Optional
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.callbacks.base import CallbackManager
+from llama_index.core.embeddings import BaseEmbedding
+
+
+DEFAULT_SILICONFLOW_API_URL = "https://api.siliconflow.cn/v1/embeddings"
+
+VALID_ENCODING = ["float", "base64"]
+
+AVAILABLE_OPTIONS = [
+    ("Pro/BAAI/bge-m3", 1024),  ## 8192 tokens
+    ("BAAI/bge-m3", 1024),  ## 8192 tokens
+    ("BAAI/bge-large-zh-v1.5", 1024),  ## 512 tokens
+    ("BAAI/bge-large-en-v1.5", 1024),  ## 512 tokens
+    ("netease-youdao/bce-embedding-base_v1", 768),  ## 512 tokens
+]
+
+
+def base64_to_float_list(encoded_str: str) -> List[float]:
+    byte_data = base64.b64decode(encoded_str)
+    float_count = len(byte_data) // 4
+    float_list = struct.unpack(f"{float_count}f", byte_data)
+    return list(float_list)
+
+
+class SiliconFlowEmbedding(BaseEmbedding):
+    """SiliconFlow class for embeddings."""
+
+    model: str = Field(
+        default="BAAI/bge-m3",
+        description="""\
+            The name of the embedding model to use.
+            512 tokens for all models input except `bge-m3` which is 8192.
+        """,
+    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The SiliconFlow API key.",
+    )
+    base_url: str = Field(
+        default=DEFAULT_SILICONFLOW_API_URL,
+        description="The base URL for the SiliconFlow API.",
+    )
+    encoding_format: str = Field(
+        default="float",
+        description="The format to return the embeddings in. Can be either float or base64.",
+    )  # TODO: Consider whether to fix the encoding format as float.
+
+    _headers: Any = PrivateAttr()
+
+    def __init__(
+        self,
+        model: str = "BAAI/bge-m3",
+        api_key: Optional[str] = None,
+        base_url: str = DEFAULT_SILICONFLOW_API_URL,
+        encoding_format: Optional[str] = "float",
+        callback_manager: Optional[CallbackManager] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            model=model,
+            api_key=api_key,
+            base_url=base_url,
+            encoding_format=encoding_format,
+            callback_manager=callback_manager,
+            **kwargs,
+        )
+        assert (
+            self.encoding_format in VALID_ENCODING
+        ), f"""\
+            Encoding_format parameter {self.encoding_format} not supported.
+            Please choose one of {VALID_ENCODING}".
+        """
+
+        self._headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "SiliconFlowEmbedding"
+
+    def _data_formatting(self, response: list) -> List[List[float]]:
+        results = sorted(response["data"], key=lambda e: e["index"])
+        if self.encoding_format == "base64":
+            return [base64_to_float_list(data["embedding"]) for data in results]
+        else:
+            return [data["embedding"] for data in results]
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """Get query embedding."""
+        return self._get_text_embeddings([query])[0]
+
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        """The asynchronous version of _get_query_embedding."""
+        result = await self._aget_text_embeddings([query])
+        return result[0]
+
+    def _get_text_embedding(self, text: str) -> List[float]:
+        """Get text embedding."""
+        return self._get_text_embeddings([text])[0]
+
+    async def _aget_text_embedding(self, text: str) -> List[float]:
+        """Asynchronously get text embedding."""
+        result = await self._aget_text_embeddings([text])
+        return result[0]
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        with requests.Session() as session:
+            input_json = {
+                "model": self.model,
+                "input": texts,
+                "encoding_format": self.encoding_format,
+            }
+            response = session.post(
+                self.base_url, json=input_json, headers=self._headers
+            ).json()
+            if "data" not in response:
+                raise RuntimeError(response)
+            return self._data_formatting(response)
+
+    async def _aget_text_embeddings(
+        self,
+        texts: List[str],
+    ) -> List[List[float]]:
+        async with aiohttp.ClientSession() as session:
+            input_json = {
+                "input": texts,
+                "model": self.model,
+                "encoding_format": self.encoding_format,
+            }
+
+            async with session.post(
+                self.base_url, json=input_json, headers=self._headers
+            ) as response:
+                response_json = await response.json()
+                response.raise_for_status()
+                return self._data_formatting(response_json)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/pyproject.toml
new file mode 100644
index 0000000000000..fff927e601285
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.embeddings.siliconflow"
+
+[tool.llamahub.class_authors]
+SiliconFlowEmbedding = "nightosong"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["nightosong <nightosong2013@gmail.com>"]
+description = "llama-index embeddings siliconflow integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-embeddings-siliconflow"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.11.0"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-asyncio = "^0.23.7"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/BUILD
similarity index 100%
rename from llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/BUILD
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/BUILD
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/__init__.py
similarity index 100%
rename from llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/__init__.py
rename to llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/__init__.py
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/test_embeddings_siliconflow.py b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/test_embeddings_siliconflow.py
new file mode 100644
index 0000000000000..10d00c01cb469
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-siliconflow/tests/test_embeddings_siliconflow.py
@@ -0,0 +1,133 @@
+import json
+import pytest
+import types
+from requests import Response
+from unittest import mock
+from typing import Optional, Type
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.embeddings.siliconflow import SiliconFlowEmbedding
+
+
+class MockAsyncResponse:
+    def __init__(self, json_data) -> None:
+        self._json_data = json_data
+
+    def raise_for_status(self) -> None:
+        ...
+
+    async def __aenter__(self) -> "MockAsyncResponse":
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[types.TracebackType],
+    ) -> None:
+        pass
+
+    async def json(self) -> dict:
+        return self._json_data
+
+
+def test_embedding_class():
+    emb = SiliconFlowEmbedding()
+    assert isinstance(emb, BaseEmbedding)
+
+
+def test_float_format_embedding():
+    input_text = "..."
+    mock_response = Response()
+    mock_response._content = json.dumps(
+        {
+            "model": "<string>",
+            "data": [{"object": "embedding", "embedding": [123], "index": 0}],
+            "usage": {
+                "prompt_tokens": 123,
+                "completion_tokens": 123,
+                "total_tokens": 123,
+            },
+        }
+    ).encode("utf-8")
+    embedding = SiliconFlowEmbedding(api_key="...")
+    with mock.patch("requests.Session.post", return_value=mock_response) as mock_post:
+        actual_result = embedding.get_query_embedding(input_text)
+        expected_result = [123]
+
+        assert actual_result == expected_result
+
+        mock_post.assert_called_once_with(
+            embedding.base_url,
+            json={
+                "model": embedding.model,
+                "input": [input_text],
+                "encoding_format": "float",
+            },
+            headers=embedding._headers,
+        )
+
+
+def test_base64_format_embedding():
+    input_text = "..."
+    mock_response = Response()
+    mock_response._content = json.dumps(
+        {
+            "model": "<string>",
+            "data": [{"object": "embedding", "embedding": "AAD2Qg==", "index": 0}],
+            "usage": {
+                "prompt_tokens": 123,
+                "completion_tokens": 123,
+                "total_tokens": 123,
+            },
+        }
+    ).encode("utf-8")
+    embedding = SiliconFlowEmbedding(api_key="...", encoding_format="base64")
+    with mock.patch("requests.Session.post", return_value=mock_response) as mock_post:
+        actual_result = embedding.get_query_embedding(input_text)
+        expected_result = [123]
+
+        assert actual_result == expected_result
+
+        mock_post.assert_called_once_with(
+            embedding.base_url,
+            json={
+                "model": embedding.model,
+                "input": [input_text],
+                "encoding_format": "base64",
+            },
+            headers=embedding._headers,
+        )
+
+
+@pytest.mark.asyncio()
+async def test_float_format_embedding_async():
+    input_text = "..."
+    mock_response = MockAsyncResponse(
+        json_data={
+            "model": "<string>",
+            "data": [{"object": "embedding", "embedding": [123], "index": 0}],
+            "usage": {
+                "prompt_tokens": 123,
+                "completion_tokens": 123,
+                "total_tokens": 123,
+            },
+        }
+    )
+    embedding = SiliconFlowEmbedding(api_key="...")
+    with mock.patch(
+        "aiohttp.ClientSession.post", return_value=mock_response
+    ) as mock_post:
+        actual_result = await embedding.aget_query_embedding(input_text)
+        expected_result = [123]
+
+        assert actual_result == expected_result
+
+        mock_post.assert_called_once_with(
+            embedding.base_url,
+            json={
+                "model": embedding.model,
+                "input": [input_text],
+                "encoding_format": "float",
+            },
+            headers=embedding._headers,
+        )
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/base.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/base.py
index ba53a7c4639fa..4e75b5de053c3 100644
--- a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/base.py
+++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/base.py
@@ -31,10 +31,11 @@ def __init__(
         """Initialize params."""
         self._node_label = node_label
 
-        self._driver = FalkorDB.from_url(url).select_graph(database)
+        self._driver = FalkorDB.from_url(url)
+        self._graph = self._driver.select_graph(database)
 
         try:
-            self._driver.query(f"CREATE INDEX FOR (n:`{self._node_label}`) ON (n.id)")
+            self._graph.query(f"CREATE INDEX FOR (n:`{self._node_label}`) ON (n.id)")
         except redis.ResponseError as e:
             # TODO: to find an appropriate way to handle this issue.
             logger.warning("Create index failed: %s", e)
@@ -49,13 +50,11 @@ def __init__(
 
     @property
     def client(self) -> None:
-        return self._driver
+        return self._graph
 
     def get(self, subj: str) -> List[List[str]]:
         """Get triplets."""
-        result = self._driver.query(
-            self.get_query, params={"subj": subj}, read_only=True
-        )
+        result = self._graph.query(self.get_query, params={"subj": subj})
         return result.result_set
 
     def get_rel_map(
@@ -123,7 +122,7 @@ def upsert_triplet(self, subj: str, rel: str, obj: str) -> None:
         )
 
         # Call FalkorDB with prepared statement
-        self._driver.query(prepared_statement, params={"subj": subj, "obj": obj})
+        self._graph.query(prepared_statement, params={"subj": subj, "obj": obj})
 
     def delete(self, subj: str, rel: str, obj: str) -> None:
         """Delete triplet."""
@@ -136,13 +135,13 @@ def delete_rel(subj: str, obj: str, rel: str) -> None:
             """
 
             # Call FalkorDB with prepared statement
-            self._driver.query(query, params={"subj": subj, "obj": obj})
+            self._graph.query(query, params={"subj": subj, "obj": obj})
 
         def delete_entity(entity: str) -> None:
             query = f"MATCH (n:`{self._node_label}`) WHERE n.id = $entity DELETE n"
 
             # Call FalkorDB with prepared statement
-            self._driver.query(query, params={"entity": entity})
+            self._graph.query(query, params={"entity": entity})
 
         def check_edges(entity: str) -> bool:
             query = f"""
@@ -151,9 +150,7 @@ def check_edges(entity: str) -> bool:
             """
 
             # Call FalkorDB with prepared statement
-            result = self._driver.query(
-                query, params={"entity": entity}, read_only=True
-            )
+            result = self._graph.query(query, params={"entity": entity})
             return bool(result.result_set)
 
         delete_rel(subj, obj, rel)
@@ -183,5 +180,21 @@ def get_schema(self, refresh: bool = False) -> str:
         return self.schema
 
     def query(self, query: str, params: Optional[Dict[str, Any]] = None) -> Any:
-        result = self._driver.query(query, params=params)
+        result = self._graph.query(query, params=params)
         return result.result_set
+
+    def switch_graph(self, graph_name: str) -> None:
+        """Switch to the given graph name (`graph_name`).
+
+        This method allows users to change the active graph within the same
+        database connection.
+
+        Args:
+            graph_name (str): The name of the graph to switch to.
+        """
+        self._graph = self._driver.select_graph(graph_name)
+
+        try:
+            self.refresh_schema()
+        except Exception as e:
+            raise ValueError(f"Could not refresh schema. Error: {e}")
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/falkordb_property_graph.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/falkordb_property_graph.py
index 60dc12f97e03f..4882b5c3b4529 100644
--- a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/falkordb_property_graph.py
+++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/llama_index/graph_stores/falkordb/falkordb_property_graph.py
@@ -127,7 +127,8 @@ def __init__(
         **falkordb_kwargs: Any,
     ) -> None:
         self.sanitize_query_output = sanitize_query_output
-        self._driver = FalkorDB.from_url(url).select_graph(database)
+        self._driver = FalkorDB.from_url(url)
+        self._graph = self._driver.select_graph(database)
         self._database = database
         self.structured_schema = {}
         if refresh_schema:
@@ -135,7 +136,7 @@ def __init__(
 
     @property
     def client(self):
-        return self._driver
+        return self._graph
 
     def refresh_schema(self) -> None:
         """Refresh the schema."""
@@ -459,7 +460,7 @@ def structured_query(
     ) -> Any:
         param_map = param_map or {}
 
-        result = self._driver.query(query, param_map)
+        result = self._graph.query(query, param_map)
         full_result = [
             {h[1]: d[i] for i, h in enumerate(result.header)} for d in result.result_set
         ]
@@ -591,5 +592,21 @@ def get_schema_str(self, refresh: bool = False) -> str:
             ]
         )
 
+    def switch_graph(self, graph_name: str) -> None:
+        """Switch to the given graph name (`graph_name`).
+
+        This method allows users to change the active graph within the same
+        database connection.
+
+        Args:
+            graph_name (str): The name of the graph to switch to.
+        """
+        self._graph = self._driver.select_graph(graph_name)
+
+        try:
+            self.refresh_schema()
+        except Exception as e:
+            raise ValueError(f"Could not refresh schema. Error: {e}")
+
 
 FalkorDBPGStore = FalkorDBPropertyGraphStore
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/pyproject.toml b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/pyproject.toml
index 8c4d84fd989ad..ce8bf89ece7bb 100644
--- a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/pyproject.toml
+++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/pyproject.toml
@@ -28,7 +28,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-graph-stores-falkordb"
 readme = "README.md"
-version = "0.2.3"
+version = "0.2.4"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
@@ -36,6 +36,7 @@ falkordb = "^1.0.8"
 llama-index-core = "^0.11.0"
 
 [tool.poetry.group.dev.dependencies]
+docker = "^7.1.0"
 ipython = "8.10.0"
 jupyter = "^1.0.0"
 mypy = "0.991"
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_graph_stores_falkordb.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_graph_stores_falkordb.py
index a137d40706215..3e6f8c1c1a4b4 100644
--- a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_graph_stores_falkordb.py
+++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_graph_stores_falkordb.py
@@ -1,9 +1,64 @@
-from unittest.mock import patch
-
-from llama_index.core.graph_stores.types import GraphStore
+import time
+import docker
+import unittest
 from llama_index.graph_stores.falkordb.base import FalkorDBGraphStore
 
+# Set up Docker client
+docker_client = docker.from_env()
+
+
+class TestFalkorDBGraphStore(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Setup method called once for the entire test class."""
+        # Start FalkorDB container
+        try:
+            cls.container = docker_client.containers.run(
+                "falkordb/falkordb:latest",
+                detach=True,
+                name="falkordb_test_instance",
+                ports={"6379/tcp": 6379},
+            )
+            time.sleep(2)  # Allow time for the container to initialize
+        except Exception as e:
+            print(f"Error starting FalkorDB container: {e}")
+            raise
+
+        # Set up the FalkorDB Graph store
+        cls.graph_store = FalkorDBGraphStore(url="redis://localhost:6379")
+
+    @classmethod
+    def tearDownClass(cls):
+        """Teardown method called once after all tests are done."""
+        try:
+            cls.container.stop()
+            cls.container.remove()
+        except Exception as e:
+            print(f"Error stopping/removing container: {e}")
+
+    def test_base_graph(self):
+        self.graph_store.upsert_triplet("node1", "related_to", "node2")
+
+        # Check if the data has been inserted correctly
+        result = self.graph_store.get("node1")
+        expected_result = [
+            "RELATED_TO",
+            "node2",
+        ]  # Expected data
+        self.assertIn(expected_result, result)
+
+        result = self.graph_store.get_rel_map(["node1"], 1)
+        self.assertIn(expected_result, result["node1"])
+
+        self.graph_store.delete("node1", "related_to", "node2")
+
+        result = self.graph_store.get("node1")
+        expected_result = []  # Expected data
+        self.assertEqual(expected_result, result)
+
+        self.graph_store.switch_graph("new_graph")
+        self.graph_store.refresh_schema()
+
 
-@patch("llama_index.graph_stores.falkordb.base.FalkorDBGraphStore")
-def test_falkordb_class(mock_db: FalkorDBGraphStore):
-    assert isinstance(mock_db, GraphStore)
+if __name__ == "__main__":
+    unittest.main()
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_pg_stores_falkordb.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_pg_stores_falkordb.py
index b603c9d03dcdc..c0fe523c9ed6b 100644
--- a/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_pg_stores_falkordb.py
+++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-falkordb/tests/test_pg_stores_falkordb.py
@@ -1,106 +1,130 @@
-import os
-import pytest
-
+import time
+import unittest
+import docker
 from llama_index.graph_stores.falkordb import FalkorDBPropertyGraphStore
 from llama_index.core.graph_stores.types import Relation, EntityNode
 from llama_index.core.schema import TextNode
 
-falkordb_url = os.environ.get("FALKORDB_TEST_URL")
-
-if not falkordb_url:
-    falkordb_available = False
-else:
-    falkordb_available = True
-
-
-@pytest.fixture()
-def pg_store() -> FalkorDBPropertyGraphStore:
-    if not falkordb_available:
-        pytest.skip("No falkordb credentials provided")
-    pg_store = FalkorDBPropertyGraphStore(url=falkordb_url)
-    pg_store.structured_query("MATCH (n) DETACH DELETE n")
-    return pg_store
-
-
-def test_falkordb_pg_store(pg_store: FalkorDBPropertyGraphStore) -> None:
-    # Create a two entity nodes
-    entity1 = EntityNode(label="PERSON", name="Logan", properties={"age": 28})
-    entity2 = EntityNode(label="ORGANIZATION", name="LlamaIndex")
-
-    # Create a relation
-    relation = Relation(
-        label="WORKS_FOR",
-        source_id=entity1.id,
-        target_id=entity2.id,
-        properties={"since": 2023},
-    )
-
-    pg_store.upsert_nodes([entity1, entity2])
-    pg_store.upsert_relations([relation])
-
-    source_node = TextNode(text="Logan (age 28), works for LlamaIndex since 2023.")
-    relations = [
-        Relation(
-            label="MENTIONS",
-            target_id=entity1.id,
-            source_id=source_node.node_id,
-        ),
-        Relation(
-            label="MENTIONS",
+# Set up Docker client
+docker_client = docker.from_env()
+
+
+class TestFalkorDBPropertyGraphStore(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Setup method called once for the entire test class."""
+        # Start FalkorDB container
+        try:
+            cls.container = docker_client.containers.run(
+                "falkordb/falkordb:latest",
+                detach=True,
+                name="falkordb_test_instance_pg",
+                ports={"6379/tcp": 6380},
+            )
+            time.sleep(2)  # Allow time for the container to initialize
+        except Exception as e:
+            print(f"Error starting FalkorDB container: {e}")
+            raise
+
+        # Set up the property graph store and clear database
+        cls.pg_store = FalkorDBPropertyGraphStore(url="redis://localhost:6380")
+        cls.pg_store.structured_query("MATCH (n) DETACH DELETE n")  # Clear the database
+
+    @classmethod
+    def tearDownClass(cls):
+        """Teardown method called once after all tests are done."""
+        try:
+            cls.container.stop()
+            cls.container.remove()
+        except Exception as e:
+            print(f"Error stopping/removing container: {e}")
+
+    def test_pg_graph(self):
+        # Create two entity nodes
+        entity1 = EntityNode(label="PERSON", name="Logan", properties={"age": 28})
+        entity2 = EntityNode(label="ORGANIZATION", name="LlamaIndex")
+
+        # Create a relation
+        relation = Relation(
+            label="WORKS_FOR",
+            source_id=entity1.id,
             target_id=entity2.id,
-            source_id=source_node.node_id,
-        ),
-    ]
-
-    pg_store.upsert_llama_nodes([source_node])
-    pg_store.upsert_relations(relations)
-
-    kg_nodes = pg_store.get(ids=[entity1.id])
-    assert len(kg_nodes) == 1
-    assert kg_nodes[0].label == "PERSON"
-    assert kg_nodes[0].name == "Logan"
-
-    kg_nodes = pg_store.get(properties={"age": 28})
-    assert len(kg_nodes) == 1
-    assert kg_nodes[0].label == "PERSON"
-    assert kg_nodes[0].name == "Logan"
-
-    # get paths from a node
-    paths = pg_store.get_rel_map(kg_nodes, depth=1)
-    for path in paths:
-        assert path[0].id == entity1.id
-        assert path[2].id == entity2.id
-        assert path[1].id == relation.id
-
-    query = "match (n:`__Entity__`) return n"
-    result = pg_store.structured_query(query)
-    assert len(result) == 2
-
-    # get the original text node back
-    llama_nodes = pg_store.get_llama_nodes([source_node.node_id])
-    assert len(llama_nodes) == 1
-    assert llama_nodes[0].text == source_node.text
-
-    # Upsert a new node
-    new_node = EntityNode(
-        label="PERSON", name="Logan", properties={"age": 28, "location": "Canada"}
-    )
-    pg_store.upsert_nodes([new_node])
-    kg_nodes = pg_store.get(properties={"age": 28})
-    assert len(kg_nodes) == 1
-    assert kg_nodes[0].label == "PERSON"
-    assert kg_nodes[0].name == "Logan"
-    assert kg_nodes[0].properties["location"] == "Canada"
-
-    # deleting
-    # delete our entities
-    pg_store.delete(ids=[entity1.id, entity2.id])
-
-    # delete our text nodes
-    pg_store.delete(ids=[source_node.node_id])
-
-    nodes = pg_store.get(ids=[entity1.id, entity2.id])
-    assert len(nodes) == 0
-
-    text_nodes = pg_store.get_llama_nodes([source_node.node_id])
-    assert len(text_nodes) == 0
+            properties={"since": 2023},
+        )
+
+        self.pg_store.upsert_nodes([entity1, entity2])
+        self.pg_store.upsert_relations([relation])
+
+        source_node = TextNode(text="Logan (age 28), works for LlamaIndex since 2023.")
+        relations = [
+            Relation(
+                label="MENTIONS",
+                target_id=entity1.id,
+                source_id=source_node.node_id,
+            ),
+            Relation(
+                label="MENTIONS",
+                target_id=entity2.id,
+                source_id=source_node.node_id,
+            ),
+        ]
+
+        self.pg_store.upsert_llama_nodes([source_node])
+        self.pg_store.upsert_relations(relations)
+
+        kg_nodes = self.pg_store.get(ids=[entity1.id])
+        self.assertEqual(len(kg_nodes), 1)
+        self.assertEqual(kg_nodes[0].label, "PERSON")
+        self.assertEqual(kg_nodes[0].name, "Logan")
+
+        kg_nodes = self.pg_store.get(properties={"age": 28})
+        self.assertEqual(len(kg_nodes), 1)
+        self.assertEqual(kg_nodes[0].label, "PERSON")
+        self.assertEqual(kg_nodes[0].name, "Logan")
+
+        # Get paths from a node
+        paths = self.pg_store.get_rel_map(kg_nodes, depth=1)
+        for path in paths:
+            self.assertEqual(path[0].id, entity1.id)
+            self.assertEqual(path[2].id, entity2.id)
+            self.assertEqual(path[1].id, relation.id)
+
+        query = "MATCH (n:`__Entity__`) RETURN n"
+        result = self.pg_store.structured_query(query)
+        self.assertEqual(len(result), 2)
+
+        # Get the original text node back
+        llama_nodes = self.pg_store.get_llama_nodes([source_node.node_id])
+        self.assertEqual(len(llama_nodes), 1)
+        self.assertEqual(llama_nodes[0].text, source_node.text)
+
+        # Upsert a new node
+        new_node = EntityNode(
+            label="PERSON", name="Logan", properties={"age": 28, "location": "Canada"}
+        )
+        self.pg_store.upsert_nodes([new_node])
+        kg_nodes = self.pg_store.get(properties={"age": 28})
+        self.assertEqual(len(kg_nodes), 1)
+        self.assertEqual(kg_nodes[0].label, "PERSON")
+        self.assertEqual(kg_nodes[0].name, "Logan")
+        self.assertEqual(kg_nodes[0].properties["location"], "Canada")
+
+        # Deleting
+        # Delete our entities
+        self.pg_store.delete(ids=[entity1.id, entity2.id])
+
+        # Delete our text nodes
+        self.pg_store.delete(ids=[source_node.node_id])
+
+        nodes = self.pg_store.get(ids=[entity1.id, entity2.id])
+        self.assertEqual(len(nodes), 0)
+
+        text_nodes = self.pg_store.get_llama_nodes([source_node.node_id])
+        self.assertEqual(len(text_nodes), 0)
+
+        self.pg_store.switch_graph("new_graph")
+        self.pg_store.refresh_schema()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/pyproject.toml
index b354d64f2fdfe..4809243f25eba 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/pyproject.toml
+++ b/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/pyproject.toml
@@ -30,12 +30,12 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-indices-managed-bge-m3"
 readme = "README.md"
-version = "0.2.0"
+version = "0.3.0"
 
 [tool.poetry.dependencies]
-python = ">=3.8.1,<4.0"
-peft = "^0.12.0"
-flagembedding = "^1.2.11"
+python = ">=3.10,<4.0"
+peft = ">=0.12.0"
+flagembedding = ">=1.2.11"
 llama-index-core = "^0.11.0"
 
 [tool.poetry.group.dev.dependencies]
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/test_indices_bge_m3.py b/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/test_indices_bge_m3.py
deleted file mode 100644
index 2f3c2cc47d6ae..0000000000000
--- a/llama-index-integrations/indices/llama-index-indices-managed-bge-m3/tests/test_indices_bge_m3.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from llama_index.core.indices.base import BaseIndex
-from llama_index.indices.managed.bge_m3 import BGEM3Index
-
-
-def test_class():
-    names_of_base_classes = [b.__name__ for b in BGEM3Index.__mro__]
-    assert BaseIndex.__name__ in names_of_base_classes
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/base.py b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/base.py
index d1d4b59e0e613..493b3c2033995 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/base.py
@@ -530,6 +530,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_choice: Optional[dict] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Prepare the arguments needed to let the LLM chat with tools."""
@@ -540,11 +541,15 @@ def _prepare_chat_with_tools(
             chat_history.append(user_msg)
 
         # convert Llama Index tools to AWS Bedrock Converse tools
-        tool_dicts = tools_to_converse_tools(tools)
+        tool_config = tools_to_converse_tools(tools)
+        if tool_choice:
+            # https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
+            # e.g. { "auto": {} }
+            tool_config["toolChoice"] = tool_choice
 
         return {
             "messages": chat_history,
-            "tools": tool_dicts or None,
+            "tools": tool_config,
             **kwargs,
         }
 
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/utils.py b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/utils.py
index 68bf048a59b2d..cba677167d1b5 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/llama_index/llms/bedrock_converse/utils.py
@@ -57,6 +57,7 @@
     "cohere.command-r-v1:0",
     "cohere.command-r-plus-v1:0",
     "mistral.mistral-large-2402-v1:0",
+    "mistral.mistral-large-2407-v1:0",
 )
 
 
@@ -174,21 +175,10 @@ def tools_to_converse_tools(tools: List["BaseTool"]) -> Dict[str, Any]:
     """
     converse_tools = []
     for tool in tools:
-        tool_name, tool_description = getattr(tool, "name", None), getattr(
-            tool, "description", None
-        )
-        if not tool_name or not tool_description:
-            # get the tool's name and description from the metadata if they aren't defined
-            tool_name = getattr(tool.metadata, "name", None)
-            if tool_fn := getattr(tool, "fn", None):
-                # get the tool's description from the function's docstring
-                tool_description = tool_fn.__doc__
-                if not tool_name:
-                    tool_name = tool_fn.__name__
-            else:
-                tool_description = getattr(tool.metadata, "description", None)
-            if not tool_name or not tool_description:
-                raise ValueError(f"Tool {tool} does not have a name or description.")
+        tool_name, tool_description = tool.metadata.name, tool.metadata.description
+        if not tool_name:
+            raise ValueError(f"Tool {tool} does not have a name.")
+
         tool_dict = {
             "name": tool_name,
             "description": tool_description,
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
index 29ed937084a09..0eda8d2d2be93 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-bedrock-converse"
 readme = "README.md"
-version = "0.3.4"
+version = "0.3.6"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
index 1790e37f59f82..ecb291c952ee4 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
@@ -82,6 +82,7 @@
     "mistral.mistral-7b-instruct-v0:2",
     "mistral.mixtral-8x7b-instruct-v0:1",
     "mistral.mistral-large-2402-v1:0",
+    "mistral.mistral-large-2407-v1:0",
 }
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
index a239ac4764f04..e0dc539c58c6e 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-bedrock"
 readme = "README.md"
-version = "0.2.4"
+version = "0.2.5"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/llama_index/llms/cohere/utils.py b/llama-index-integrations/llms/llama-index-llms-cohere/llama_index/llms/cohere/utils.py
index f79d7df5cc0e4..4a5c69ff2956b 100644
--- a/llama-index-integrations/llms/llama-index-llms-cohere/llama_index/llms/cohere/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-cohere/llama_index/llms/cohere/utils.py
@@ -42,6 +42,8 @@
     "command-nightly": 4096,
     "command-light": 4096,
     "command-light-nightly": 4096,
+    "c4ai-aya-expanse-32b": 128000,
+    "c4ai-aya-expanse-8b": 128000,
 }
 
 GENERATION_MODELS = {"base": 2048, "base-light": 2048}
diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
index ff35a822318d7..061985c03fe1b 100644
--- a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-cohere"
 readme = "README.md"
-version = "0.3.1"
+version = "0.3.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/llama_index/llms/dashscope/base.py b/llama-index-integrations/llms/llama-index-llms-dashscope/llama_index/llms/dashscope/base.py
index 38e753096c422..e3d7e35770f2b 100644
--- a/llama-index-integrations/llms/llama-index-llms-dashscope/llama_index/llms/dashscope/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/llama_index/llms/dashscope/base.py
@@ -85,6 +85,25 @@ def call_with_messages(
     )
 
 
+async def acall_with_messages(
+    model: str,
+    messages: List[Dict],
+    parameters: Optional[Dict] = None,
+    api_key: Optional[str] = None,
+    **kwargs: Any,
+) -> Dict:
+    try:
+        from dashscope import AioGeneration
+    except ImportError:
+        raise ValueError(
+            "DashScope is not installed. Please install it with "
+            "`pip install dashscope`."
+        )
+    return await AioGeneration.call(
+        model=model, messages=messages, api_key=api_key, **parameters
+    )
+
+
 class DashScope(CustomLLM):
     """DashScope LLM.
 
@@ -232,7 +251,9 @@ def _get_input_parameters(
         return message, parameters
 
     @llm_completion_callback()
-    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
         message, parameters = self._get_input_parameters(prompt=prompt, **kwargs)
         parameters.pop("incremental_output", None)
         parameters.pop("stream", None)
@@ -246,7 +267,25 @@ def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         return dashscope_response_to_completion_response(response)
 
     @llm_completion_callback()
-    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        message, parameters = self._get_input_parameters(prompt=prompt, **kwargs)
+        parameters.pop("incremental_output", None)
+        parameters.pop("stream", None)
+        messages = chat_message_to_dashscope_messages([message])
+        response = await acall_with_messages(
+            model=self.model_name,
+            messages=messages,
+            api_key=self.api_key,
+            parameters=parameters,
+        )
+        return dashscope_response_to_completion_response(response)
+
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
         message, parameters = self._get_input_parameters(prompt=prompt, kwargs=kwargs)
         parameters["incremental_output"] = True
         parameters["stream"] = True
@@ -291,6 +330,23 @@ def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         )
         return dashscope_response_to_chat_response(response)
 
+    @llm_chat_callback()
+    async def achat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponse:
+        parameters = self._get_default_parameters()
+        parameters.update({**kwargs})
+        parameters.pop("stream", None)
+        parameters.pop("incremental_output", None)
+        parameters["result_format"] = "message"  # only use message format.
+        response = await acall_with_messages(
+            model=self.model_name,
+            messages=chat_message_to_dashscope_messages(messages),
+            api_key=self.api_key,
+            parameters=parameters,
+        )
+        return dashscope_response_to_chat_response(response)
+
     @llm_chat_callback()
     def stream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
index 74f3e79a1bfc6..238643c8ee188 100644
--- a/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-dashscope"
 readme = "README.md"
-version = "0.2.2"
+version = "0.2.4"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_dashscope.py b/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_dashscope.py
new file mode 100644
index 0000000000000..4b9974cbb3002
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_dashscope.py
@@ -0,0 +1,59 @@
+from unittest.mock import patch
+
+import pytest
+
+from llama_index.core.base.llms.types import (
+    CompletionResponse,
+    ChatMessage,
+    ChatResponse,
+)
+from llama_index.llms.dashscope.base import DashScope
+
+
+@pytest.fixture()
+def dashscope_llm():
+    return DashScope(api_key="test")
+
+
+@pytest.fixture()
+def dashscope_api_response():
+    return {
+        "status_code": 200,
+        "request_id": "4438deec-2d21-9b9c-b405-a47459fd8f75",
+        "code": "",
+        "message": "",
+        "output": {
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "message": {"role": "assistant", "content": "hi, there!"},
+                }
+            ]
+        },
+        "usage": {"total_tokens": 161, "output_tokens": 91, "input_tokens": 70},
+    }
+
+
+@pytest.fixture()
+def prompt() -> str:
+    return "hi, there!"
+
+
+@patch("llama_index.llms.dashscope.base.call_with_messages")
+def test_dashscope_complete(
+    mock_call_with_messages, dashscope_llm, dashscope_api_response, prompt
+):
+    mock_call_with_messages.return_value = dashscope_api_response
+    response = dashscope_llm.complete(prompt)
+    assert isinstance(response, CompletionResponse)
+    assert response.text == "hi, there!"
+
+
+@patch("llama_index.llms.dashscope.base.call_with_messages")
+def test_dashscope_chat(
+    mock_call_with_messages, dashscope_llm, dashscope_api_response, prompt
+):
+    mock_call_with_messages.return_value = dashscope_api_response
+    response = dashscope_llm.chat(messages=[ChatMessage.from_str(prompt)])
+    assert isinstance(response, ChatResponse)
+    assert response.message.content == "hi, there!"
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/README.md b/llama-index-integrations/llms/llama-index-llms-sambanova/README.md
deleted file mode 100644
index 0bc8ca9c4dc3f..0000000000000
--- a/llama-index-integrations/llms/llama-index-llms-sambanova/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# LlamaIndex LLM Integration: SambaNova LLM
-
-SambaNovaLLM is a custom LLM (Language Model) interface that allows you to interact with AI models hosted on SambaNova's offerings - Sambaverse and SambaStudio
-
-## Key Features:
-
-- Integration with SambaNova-hosted AI models
-- Integration two SambaNova offerings - Sambaverse and SambaStudio
-- Support for completion based interactions
-- Streaming support for completion responses
-- Seamless integration with the LlamaIndex ecosystem
-
-## Installation
-
-```bash
-pip install llama-index-llms-sambanova
-```
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/__init__.py b/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/__init__.py
deleted file mode 100644
index 0adc8f6ffddf1..0000000000000
--- a/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from llama_index.llms.sambanova.base import Sambaverse, SambaStudio
-
-__all__ = ["Sambaverse", "SambaStudio"]
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/base.py b/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/base.py
deleted file mode 100644
index 749c50b707e52..0000000000000
--- a/llama-index-integrations/llms/llama-index-llms-sambanova/llama_index/llms/sambanova/base.py
+++ /dev/null
@@ -1,575 +0,0 @@
-import os
-from typing import Any, Dict, Optional, List
-
-import requests
-from llama_index.core.base.llms.types import (
-    CompletionResponse,
-    CompletionResponseGen,
-    LLMMetadata,
-)
-
-from llama_index.core.llms.callbacks import llm_completion_callback
-from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.bridge.pydantic import Field, PrivateAttr
-import json
-
-
-class Sambaverse(CustomLLM):
-    """
-    Sambaverse LLM.
-
-    Examples:
-        `pip install llama-index-llms-sambanova`
-
-        ```python
-        from llama_index.llms.sambanova import Sambaverse
-
-        llm = Sambaverse(...)
-
-        response = llm.complete("What is the meaning of life?")
-
-        print(response)
-        ```
-    """
-
-    sambaverse_url: str = Field(
-        default="https://sambaverse.sambanova.ai",
-        description="URL of the Sambaverse server",
-    )
-
-    sambaverse_api_key: str = Field(
-        default="",
-        description="API key for the Sambaverse server",
-    )
-    sambaverse_model_name: str = Field(
-        default="", description="Name of the Sambaverse model to use"
-    )
-    model_kwargs: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="Additional keyword arguments to pass to the model",
-    )
-    streaming: bool = Field(
-        default=False,
-        description="Boolean to state whether to stream response or not",
-    )
-
-    _client: requests.Session = PrivateAttr()
-
-    def __init__(
-        self,
-        sambaverse_url: str = "https://sambaverse.sambanova.ai",
-        sambaverse_api_key: str = "",
-        sambaverse_model_name: str = "",
-        model_kwargs: Dict[str, Any] = {},
-        streaming: bool = False,
-        client: Optional[requests.Session] = None,
-    ) -> None:
-        super().__init__()
-
-        self.sambaverse_url = sambaverse_url
-        self.sambaverse_api_key = sambaverse_api_key
-        self.sambaverse_model_name = sambaverse_model_name
-        self.model_kwargs = model_kwargs
-        self.streaming = streaming
-        self._client = client or requests.Session()
-        self._validate_env_vars()
-
-    def _validate_env_vars(self):
-        if not self.sambaverse_model_name:
-            self.sambaverse_model_name = os.getenv("SAMBAVERSE_MODEL_NAME")
-        if not self.sambaverse_api_key:
-            self.sambaverse_api_key = os.getenv("SAMBAVERSE_API_KEY")
-
-        if not self.sambaverse_model_name:
-            raise ValueError(
-                "Sambaverse model name must be provided either as an argument or set in the environment variable 'SAMBAVERSE_MODEL_NAME'."
-            )
-
-        if not self.sambaverse_api_key:
-            raise ValueError(
-                "Sambaverse API key must be provided either as an argument or set in the environment variable 'SAMBAVERSE_API_KEY'."
-            )
-
-    def _get_full_url(self, endpoint: str) -> str:
-        return f"{self.sambaverse_url}/{endpoint}"
-
-    def _get_model_kwargs(self, stop: Optional[List[str]]) -> str:
-        try:
-            _model_kwargs = self.model_kwargs or {}
-            _kwarg_stop_sequences = set(_model_kwargs.get("stop_sequences", []))
-            _stop_sequences = set(stop or _kwarg_stop_sequences)
-
-            if not _kwarg_stop_sequences:
-                _model_kwargs["stop_sequences"] = ",".join(
-                    f'"{x}"' for x in _stop_sequences
-                )
-
-            tuning_params_dict = {
-                k: {"type": type(v).__name__, "value": str(v)}
-                for k, v in _model_kwargs.items()
-            }
-
-            return json.dumps(tuning_params_dict)
-
-        except Exception as e:
-            raise ValueError(f"Error getting model kwargs: {e}")
-
-    def _process_api_response(self, response: requests.Response) -> Dict:
-        result: Dict[str, Any] = {}
-        if response.status_code != 200:
-            raise ValueError(
-                f"Received unexpected status code {response.status_code}: {response.text}"
-            )
-
-        try:
-            lines_result = response.text.strip().split("\n")
-            text_result = lines_result[-1]
-            if response.status_code == 200 and json.loads(text_result).get("error"):
-                completion = ""
-                for line in lines_result[:-1]:
-                    completion += json.loads(line)["result"]["responses"][0][
-                        "stream_token"
-                    ]
-                text_result = lines_result[-2]
-                result = json.loads(text_result)
-                result["result"]["responses"][0]["completion"] = completion
-            else:
-                result = json.loads(text_result)
-        except Exception as e:
-            result["detail"] = str(e)
-        if "status_code" not in result:
-            result["status_code"] = response.status_code
-        return result
-
-    def _process_api_stream_response(self, response: requests.Response) -> Any:
-        try:
-            for line in response.iter_lines():
-                chunk = json.loads(line)
-                if "status_code" not in chunk:
-                    chunk["status_code"] = response.status_code
-                if chunk["status_code"] == 200 and chunk.get("error"):
-                    chunk["result"] = {"responses": [{"stream_token": ""}]}
-                    return chunk
-                yield chunk
-        except Exception as e:
-            raise RuntimeError(f"Error processing streaming response: {e}")
-
-    def _send_sambaverse_request(
-        self, endpoint: str, data: Dict[str, Any], stream: bool = False
-    ) -> requests.Response:
-        url = self._get_full_url(endpoint)
-        headers = {
-            "key": self.sambaverse_api_key,
-            "Content-Type": "application/json",
-            "modelName": self.sambaverse_model_name,
-        }
-        try:
-            return self._client.post(url, headers=headers, json=data, stream=stream)
-
-        except Exception as e:
-            raise ValueError(f"Error sending request to Sambaverse: {e}")
-
-    def _prepare_request_data(self, prompt: str) -> Dict[str, Any]:
-        try:
-            model_params = self._get_model_kwargs(stop=None)
-            return {"instance": prompt, "params": json.loads(model_params)}
-
-        except Exception as e:
-            raise ValueError(f"Error preparing request data: {e}")
-
-    def _get_completion_from_response(self, response: Dict) -> str:
-        try:
-            return (
-                response.get("result", {})
-                .get("responses", [{}])[0]
-                .get("completion", "")
-            )
-        except Exception as e:
-            raise ValueError(f"Error processing response: {e}")
-
-    @classmethod
-    def class_name(cls) -> str:
-        return "Samabaverse"
-
-    @property
-    def metadata(self) -> LLMMetadata:
-        """LLM metadata."""
-        return LLMMetadata(
-            model_name=self.sambaverse_model_name,
-            model_kwargs=self.model_kwargs,
-            description="Sambanova LLM",
-            is_streaming=self.streaming,
-        )
-
-    @llm_completion_callback()
-    def complete(self, prompt: str) -> CompletionResponse:
-        """
-        Complete the given prompt using the Sambaverse model.
-
-        Args:
-            prompt (str): The input prompt to complete.
-
-        Returns:
-            CompletionResponse: The completed text generated by the model.
-        """
-        data = self._prepare_request_data(prompt)
-        response = self._send_sambaverse_request("api/predict", data)
-        processed_response = self._process_api_response(response)
-        completion_text = self._get_completion_from_response(processed_response)
-
-        return CompletionResponse(text=completion_text)
-
-    @llm_completion_callback()
-    def stream_complete(self, prompt: str) -> CompletionResponseGen:
-        """
-        Stream the completion of the given prompt using the Sambaverse model.
-
-        Args:
-            prompt (str): The input prompt to complete.
-
-        Yields:
-            CompletionResponseGen: Streamed completion text generated by the model.
-        """
-        print("In stream_complete")
-        data = self._prepare_request_data(prompt)
-        response = self._send_sambaverse_request("api/predict", data, stream=True)
-
-        for token in self._process_api_stream_response(response):
-            processed_token = token["result"]["responses"][0]["stream_token"]
-            yield CompletionResponse(text=processed_token)
-
-
-class SambaStudio(CustomLLM):
-    """
-    SambaStudio LLM.
-
-    Examples:
-        `pip install llama-index-llms-sambanova`
-
-        ```python
-        from llama_index.llms.sambanova import SambaStudio
-
-        llm = Sambaverse(...)
-
-        response = llm.complete("What is the meaning of life?")
-
-        print(response)
-        ```
-    """
-
-    sambastudio_base_url: str = Field(
-        default="",
-        description="URL of the SambaStudio server",
-    )
-    sambastudio_base_uri: str = Field(
-        default="",
-        description="Base URI of the SambaStudio server",
-    )
-    sambastudio_project_id: str = Field(
-        default="",
-        description="Project ID of the SambaStudio server",
-    )
-    sambastudio_endpoint_id: str = Field(
-        default="",
-        description="Endpoint ID of the SambaStudio server",
-    )
-    sambastudio_api_key: str = Field(
-        default="",
-        description="API key for the SambaStudio server",
-    )
-
-    model_kwargs: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="Additional keyword arguments to pass to the model",
-    )
-    streaming: bool = Field(
-        default=False,
-        description="Boolean to state whether to stream response or not",
-    )
-
-    _client: requests.Session = PrivateAttr()
-
-    def __init__(
-        self,
-        sambastudio_base_url: str = "",
-        sambastudio_base_uri: str = "",
-        sambastudio_project_id: str = "",
-        sambastudio_endpoint_id: str = "",
-        model_kwargs: Dict[str, Any] = {},
-        streaming: bool = False,
-        client: Optional[requests.Session] = None,
-    ) -> None:
-        super().__init__()
-
-        self.sambastudio_base_url = sambastudio_base_url
-        self.sambastudio_base_uri = sambastudio_base_uri
-        self.sambastudio_project_id = sambastudio_project_id
-        self.sambastudio_endpoint_id = sambastudio_endpoint_id
-        self.model_kwargs = model_kwargs
-        self.streaming = streaming
-        self._client = client or requests.Session()
-        self._validate_env_vars()
-
-    def _validate_env_vars(self):
-        if not self.sambaverse_api_key:
-            self.sambaverse_api_key = os.getenv("SAMBAVERSE_API_KEY")
-        if not self.sambastudio_base_url:
-            self.sambastudio_base_url = os.getenv("SAMBASTUDIO_BASE_URL")
-        if not self.sambastudio_base_uri:
-            self.sambastudio_base_uri = os.getenv("SAMBASTUDIO_BASE_URI")
-        if not self.sambastudio_project_id:
-            self.sambastudio_project_id = os.getenv("SAMBASTUDIO_PROJECT_ID")
-        if not self.sambastudio_endpoint_id:
-            self.sambastudio_endpoint_id = os.getenv("SAMBASTUDIO_ENDPOINT_ID")
-
-        if not self.sambaverse_api_key:
-            raise ValueError(
-                "Sambaverse API key must be provided either as an argument or set in the environment variable 'SAMBAVERSE_API_KEY'."
-            )
-
-        if not self.sambastudio_base_url:
-            raise ValueError(
-                "Sambastudio base URL must be provided either as an argument or set in the environment variable 'SAMBASTUDIO_BASE_URL'."
-            )
-
-        if not self.sambastudio_base_uri:
-            raise ValueError(
-                "Sambastudio base URI must be provided either as an argument or set in the environment variable 'SAMBASTUDIO_BASE_URI'."
-            )
-
-        if not self.sambastudio_project_id:
-            raise ValueError(
-                "Sambastudio project ID must be provided either as an argument or set in the environment variable 'SAMBASTUDIO_PROJECT_ID'."
-            )
-
-        if not self.sambastudio_endpoint_id:
-            raise ValueError(
-                "Sambastudio endpoint ID must be provided either as an argument or set in the environment variable 'SAMBASTUDIO_ENDPOINT_ID'."
-            )
-
-    def _get_full_url(self, path: str) -> str:
-        return f"{self.sambastudio_base_url}/{self.sambastudio_base_uri}/{path}"
-
-    def _get_model_kwargs(self, stop: Optional[List[str]]) -> str:
-        try:
-            _model_kwargs = self.model_kwargs or {}
-            _kwarg_stop_sequences = set(_model_kwargs.get("stop_sequences", []))
-            _stop_sequences = set(stop or _kwarg_stop_sequences)
-
-            if not _kwarg_stop_sequences:
-                _model_kwargs["stop_sequences"] = ",".join(
-                    f'"{x}"' for x in _stop_sequences
-                )
-
-            tuning_params_dict = {
-                k: {"type": type(v).__name__, "value": str(v)}
-                for k, v in _model_kwargs.items()
-            }
-
-            return json.dumps(tuning_params_dict)
-
-        except Exception as e:
-            raise ValueError(f"Error getting model kwargs: {e}")
-
-    def _process_api_response(self, response: requests.Response) -> Dict:
-        result: Dict[str, Any] = {}
-        try:
-            result = response.json()
-        except Exception as e:
-            result["detail"] = str(e)
-        if "status_code" not in result:
-            result["status_code"] = response.status_code
-        return result
-
-    def _process_api_stream_response(self, response: requests.Response) -> Any:
-        """Process the streaming response."""
-        if "nlp" in self.sambastudio_base_uri:
-            try:
-                import sseclient
-            except ImportError:
-                raise ImportError(
-                    "could not import sseclient library"
-                    "Please install it with `pip install sseclient-py`."
-                )
-            client = sseclient.SSEClient(response)
-            close_conn = False
-            for event in client.events():
-                if event.event == "error_event":
-                    close_conn = True
-                chunk = {
-                    "event": event.event,
-                    "data": event.data,
-                    "status_code": response.status_code,
-                }
-                yield chunk
-            if close_conn:
-                client.close()
-        elif "generic" in self.sambastudio_base_uri:
-            try:
-                for line in response.iter_lines():
-                    chunk = json.loads(line)
-                    if "status_code" not in chunk:
-                        chunk["status_code"] = response.status_code
-                    if chunk["status_code"] == 200 and chunk.get("error"):
-                        chunk["result"] = {"responses": [{"stream_token": ""}]}
-                    yield chunk
-            except Exception as e:
-                raise RuntimeError(f"Error processing streaming response: {e}")
-        else:
-            raise ValueError(
-                f"handling of endpoint uri: {self.api_base_uri} not implemented"
-            )
-
-    def _send_sambaverse_request(
-        self, data: Dict[str, Any], stream: bool = False
-    ) -> requests.Response:
-        try:
-            if stream:
-                url = self._get_full_url(
-                    f"stream/{self.sambastudio_project_id}/{self.sambastudio_endpoint_id}"
-                )
-                headers = {
-                    "key": self.sambaverse_api_key,
-                    "Content-Type": "application/json",
-                }
-                return self._client.post(url, headers=headers, json=data, stream=True)
-            else:
-                url = self._get_full_url(
-                    f"{self.sambastudio_project_id}/{self.sambastudio_endpoint_id}"
-                )
-                headers = {
-                    "key": self.sambaverse_api_key,
-                    "Content-Type": "application/json",
-                }
-
-                return self._client.post(url, headers=headers, json=data, stream=stream)
-        except Exception as e:
-            raise ValueError(f"Error sending request to Sambaverse: {e}")
-
-    def _prepare_request_data(self, prompt: str) -> Dict[str, Any]:
-        try:
-            data = {}
-            if isinstance(prompt, str):
-                input = [prompt]
-            if "nlp" in self.api_base_uri:
-                model_params = self._get_model_kwargs(stop=None)
-                if model_params:
-                    data = {"inputs": input, "params": json.loads(model_params)}
-                else:
-                    data = {"inputs": input}
-            elif "generic" in self.api_base_uri:
-                model_params = self._get_model_kwargs(stop=None)
-                if model_params:
-                    data = {"instance": input, "params": json.loads(model_params)}
-                else:
-                    data = {"instance": input}
-            else:
-                raise ValueError(
-                    f"handling of endpoint uri: {self.api_base_uri} not implemented"
-                )
-            return data
-
-        except Exception as e:
-            raise ValueError(f"Error preparing request data: {e}")
-
-    def _get_completion_from_response(self, response: Dict) -> str:
-        try:
-            if "nlp" in self.sambastudio_base_uri:
-                return response["data"][0]["completion"]
-            elif "generic" in self.sambastudio_base_uri:
-                return response["predictions"][0]["completion"]
-            else:
-                raise ValueError(
-                    f"handling of endpoint uri: {self.sambastudio_base_uri} not implemented"
-                )
-        except Exception as e:
-            raise ValueError(f"Error processing response: {e}")
-
-    def _get_stream_token_from_response(self, response: Dict) -> str:
-        try:
-            if "nlp" in self.sambastudio_base_uri:
-                return response["data"]["stream_token"]
-            elif "generic" in self.sambastudio_base_uri:
-                return response["result"]["responses"][0]["stream_token"]
-            else:
-                raise ValueError(
-                    f"handling of endpoint uri: {self.sambastudio_base_uri} not implemented"
-                )
-        except Exception as e:
-            raise ValueError(f"Error processing response: {e}")
-
-    @classmethod
-    def class_name(cls) -> str:
-        return "SambaStudio"
-
-    @property
-    def metadata(self) -> LLMMetadata:
-        """LLM metadata."""
-        return LLMMetadata(
-            model_kwargs=self.model_kwargs,
-            description="Sambanova LLM",
-            is_streaming=self.streaming,
-        )
-
-    @llm_completion_callback()
-    def complete(self, prompt: str) -> CompletionResponse:
-        """
-        Complete the given prompt using the SambaStudio model.
-
-        Args:
-            prompt (str): The input prompt to complete.
-
-        Returns:
-            CompletionResponse: The completed text generated by the model.
-        """
-        data = self._prepare_request_data(prompt)
-        response = self._send_sambaverse_request(data)
-        processed_response = self._process_api_response(response)
-        completion_text = self._get_completion_from_response(processed_response)
-
-        return CompletionResponse(text=completion_text)
-
-    @llm_completion_callback()
-    def stream_complete(self, prompt: str) -> CompletionResponseGen:
-        """
-        Stream the completion of the given prompt using the SambaStudio model.
-
-        Args:
-            prompt (str): The input prompt to complete.
-
-        Yields:
-            CompletionResponseGen: Streamed completion text generated by the model.
-        """
-        print("In stream_complete")
-        data = self._prepare_request_data(prompt)
-        response = self._send_sambaverse_request(data, stream=True)
-
-        for token in self._process_api_stream_response(response):
-            processed_token = self._get_stream_token_from_response(token)
-            yield CompletionResponse(text=processed_token)
-
-    @llm_completion_callback()
-    def nlp_prediction(self, prompt: str) -> CompletionResponse:
-        """
-        Perform NLP prediction for the given prompt using the SambaStudio model.
-
-        Args:
-            prompt (str): The input prompt to predict.
-
-        Returns:
-            CompletionResponse: The prediction result generated by the model.
-        """
-        return self.complete(prompt)
-
-    @llm_completion_callback()
-    def nlp_prediction_stream(self, prompt: str) -> CompletionResponseGen:
-        """
-        Stream NLP prediction for the given prompt using the SambaStudio model.
-
-        Args:
-            prompt (str): The input prompt to predict.
-
-        Yields:
-            CompletionResponseGen: Streamed prediction result generated by the model.
-        """
-        return self.stream_complete(prompt)
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/tests/test_llms_sambanova.py b/llama-index-integrations/llms/llama-index-llms-sambanova/tests/test_llms_sambanova.py
deleted file mode 100644
index 27b6f223f49d5..0000000000000
--- a/llama-index-integrations/llms/llama-index-llms-sambanova/tests/test_llms_sambanova.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from llama_index.core.base.llms.base import BaseLLM
-from llama_index.llms.sambanova import Sambaverse, SambaStudio
-
-
-def test_embedding_class():
-    # Check Sambaverse inherits from BaseLLM
-    names_of_base_classes = [b.__name__ for b in Sambaverse.__mro__]
-    assert BaseLLM.__name__ in names_of_base_classes
-
-    # Check SambaStudio inherits from BaseLLM
-    names_of_base_classes = [b.__name__ for b in SambaStudio.__mro__]
-    assert BaseLLM.__name__ in names_of_base_classes
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/.gitignore b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/.gitignore
new file mode 100644
index 0000000000000..990c18de22908
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/BUILD b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/BUILD
new file mode 100644
index 0000000000000..0896ca890d8bf
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/Makefile b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/Makefile
new file mode 100644
index 0000000000000..b9eab05aa3706
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/README.md b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/README.md
new file mode 100644
index 0000000000000..db8cf76156fa8
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/README.md
@@ -0,0 +1,39 @@
+# LlamaIndex LLM Integration: SambaNova LLM
+
+SambaNovaLLM is a custom LLM (Language Model) interface that allows you to interact with AI models hosted on SambaNova's offerings - SambaNova Cloud and SambaStudio
+
+## Key Features:
+
+- Integration with SambaNova-hosted AI models
+- Integration two SambaNova offerings - SambaNova Cloud and SambaStudio
+- Support for completion based interactions
+- Streaming support for completion responses
+- Seamless integration with the LlamaIndex ecosystem
+
+## Installation
+
+```bash
+pip install llama-index-llms-sambanovacloud
+```
+
+## Usage
+
+```python
+from llama_index.llms.sambanovacloud import SambaNovaCloud
+
+SambaNovaCloud(
+    sambanova_url="SambaNova cloud endpoint URL",
+    sambanova_api_key="set with your SambaNova cloud API key",
+    model="model name",
+)
+```
+
+## Usage
+
+```python
+SambaNovaCloud(
+    sambanova_url="SambaNova cloud endpoint URL",
+    sambanova_api_key="set with your SambaNova cloud API key",
+    model="model name",
+)
+```
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/BUILD b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/__init__.py b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/__init__.py
new file mode 100644
index 0000000000000..f289193d7e649
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.llms.sambanovacloud.base import SambaNovaCloud
+
+__all__ = ["SambaNovaCloud"]
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/base.py b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/base.py
new file mode 100644
index 0000000000000..27a89277434d2
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/llama_index/llms/sambanovacloud/base.py
@@ -0,0 +1,641 @@
+import aiohttp
+
+from typing import Any, Dict, List, Optional, Iterator, Sequence, AsyncIterator
+
+import requests
+from llama_index.core.llms.llm import LLM
+from llama_index.core.llms.callbacks import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
+from llama_index.core.base.llms.generic_utils import (
+    get_from_param_or_env,
+    chat_to_completion_decorator,
+    stream_chat_to_completion_decorator,
+    achat_to_completion_decorator,
+)
+from llama_index.core.bridge.pydantic import Field, SecretStr
+import json
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def _convert_message_to_dict(message: ChatMessage) -> Dict[str, Any]:
+    """Converts a ChatMessage to a dictionary with Role / content.
+
+    Args:
+        message: ChatMessage
+
+    Returns:
+        messages_dict:  role / content dict
+    """
+    if isinstance(message, ChatMessage):
+        message_dict = {"role": message.role, "content": message.content}
+    else:
+        raise TypeError(f"Got unknown type {message}")
+    return message_dict
+
+
+def _create_message_dicts(messages: Sequence[ChatMessage]) -> List[Dict[str, Any]]:
+    """Converts a list of ChatMessages to a list of dictionaries with Role / content.
+
+    Args:
+        messages: list of ChatMessages
+
+    Returns:
+        messages_dicts:  list of role / content dicts
+    """
+    return [_convert_message_to_dict(m) for m in messages]
+
+
+class SambaNovaCloud(LLM):
+    """
+    SambaNova Cloud model.
+
+    Setup:
+        To use, you should have the environment variables:
+        ``SAMBANOVA_URL`` set with your SambaNova Cloud URL.
+        ``SAMBANOVA_API_KEY`` set with your SambaNova Cloud API Key.
+        http://cloud.sambanova.ai/
+
+    Example:
+        .. code-block:: python
+            SambaNovaCloud(
+                sambanova_url = SambaNova cloud endpoint URL,
+                sambanova_api_key = set with your SambaNova cloud API key,
+                model = model name,
+                max_tokens = max number of tokens to generate,
+                temperature = model temperature,
+                top_p = model top p,
+                top_k = model top k,
+                stream_options = include usage to get generation metrics
+            )
+
+    Key init args — completion params:
+        model: str
+            The name of the model to use, e.g., Meta-Llama-3-70B-Instruct.
+        streaming: bool
+            Whether to use streaming handler when using non streaming methods
+        max_tokens: int
+            max tokens to generate
+        temperature: float
+            model temperature
+        top_p: float
+            model top p
+        top_k: int
+            model top k
+        stream_options: dict
+            stream options, include usage to get generation metrics
+
+    Key init args — client params:
+        sambanova_url: str
+            SambaNova Cloud Url
+        sambanova_api_key: str
+            SambaNova Cloud api key
+
+    Instantiate:
+        .. code-block:: python
+
+            from llama_index.llms.sambanovacloud import SambaNovaCloud
+
+            llm = SambaNovaCloud(
+                sambanova_url = SambaNova cloud endpoint URL,
+                sambanova_api_key = set with your SambaNova cloud API key,
+                model = model name,
+                max_tokens = max number of tokens to generate,
+                temperature = model temperature,
+                top_p = model top p,
+                top_k = model top k,
+                stream_options = include usage to get generation metrics
+            )
+    Complete:
+        .. code-block:: python
+            prompt = "Tell me about Naruto Uzumaki in one sentence"
+            response = llm.complete(prompt)
+
+    Chat:
+        .. code-block:: python
+            messages = [
+                ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+                ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
+            ]
+            response = llm.chat(messages)
+
+    Stream:
+        .. code-block:: python
+        prompt = "Tell me about Naruto Uzumaki in one sentence"
+        messages = [
+            ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+            ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
+        ]
+        for chunk in llm.stream_complete(prompt):
+            print(chunk.text)
+        for chunk in llm.stream_chat(messages):
+            print(chunk.message.content)
+
+    Async:
+        .. code-block:: python
+        prompt = "Tell me about Naruto Uzumaki in one sentence"
+        asyncio.run(llm.acomplete(prompt))
+
+        messages = [
+            ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+            ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
+        ]
+        asyncio.run(llm.achat(chat_text_msgs))
+
+    Response metadata and usage
+        .. code-block:: python
+
+        messages = [
+            ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+            ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
+        ]
+        metadata_and_usage = llm.chat(messages).message.additional_kwargs
+        print(metadata_and_usage)
+    """
+
+    sambanova_url: str = Field(default_factory=str, description="SambaNova Cloud Url")
+
+    sambanova_api_key: SecretStr = Field(
+        default_factory=str, description="SambaNova Cloud api key"
+    )
+
+    model: str = Field(
+        default="Meta-Llama-3.1-8B-Instruct",
+        description="The name of the model",
+    )
+
+    streaming: bool = Field(
+        default=False,
+        description="Whether to use streaming handler when using non streaming methods",
+    )
+
+    max_tokens: int = Field(default=1024, description="max tokens to generate")
+
+    temperature: float = Field(default=0.7, description="model temperature")
+
+    top_p: Optional[float] = Field(default=None, description="model top p")
+
+    top_k: Optional[int] = Field(default=None, description="model top k")
+
+    stream_options: dict = Field(
+        default={"include_usage": True},
+        description="stream options, include usage to get generation metrics",
+    )
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "SambaNovaCloud"
+
+    @property
+    def metadata(self) -> LLMMetadata:
+        return LLMMetadata(
+            context_window=None,
+            num_output=self.max_tokens,
+            is_chat_model=True,
+            model_name=self.model,
+        )
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Init and validate environment variables."""
+        kwargs["sambanova_url"] = get_from_param_or_env(
+            "url",
+            kwargs.get("sambanova_url"),
+            "SAMBANOVA_URL",
+            default="https://api.sambanova.ai/v1/chat/completions",
+        )
+        kwargs["sambanova_api_key"] = get_from_param_or_env(
+            "api_key", kwargs.get("sambanova_api_key"), "SAMBANOVA_API_KEY"
+        )
+        super().__init__(**kwargs)
+
+    def _handle_request(
+        self, messages_dicts: List[Dict], stop: Optional[List[str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Performs a post request to the LLM API.
+
+        Args:
+            messages_dicts: List of role / content dicts to use as input.
+            stop: list of stop tokens
+
+        Returns:
+            A response dict.
+        """
+        data = {
+            "messages": messages_dicts,
+            "max_tokens": self.max_tokens,
+            "stop": stop,
+            "model": self.model,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+        }
+        http_session = requests.Session()
+        response = http_session.post(
+            self.sambanova_url,
+            headers={
+                "Authorization": f"Bearer {self.sambanova_api_key.get_secret_value()}",
+                "Content-Type": "application/json",
+            },
+            json=data,
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Sambanova /complete call failed with status code "
+                f"{response.status_code}.",
+                f"{response.text}.",
+            )
+        response_dict = response.json()
+        if response_dict.get("error"):
+            raise RuntimeError(
+                f"Sambanova /complete call failed with status code "
+                f"{response.status_code}.",
+                f"{response_dict}.",
+            )
+        return response_dict
+
+    async def _handle_request_async(
+        self, messages_dicts: List[Dict], stop: Optional[List[str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Performs a async post request to the LLM API.
+
+        Args:
+            messages_dicts: List of role / content dicts to use as input.
+            stop: list of stop tokens
+
+        Returns:
+            A response dict.
+        """
+        data = {
+            "messages": messages_dicts,
+            "max_tokens": self.max_tokens,
+            "stop": stop,
+            "model": self.model,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+        }
+
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                self.sambanova_url,
+                headers={
+                    "Authorization": f"Bearer {self.sambanova_api_key.get_secret_value()}",
+                    "Content-Type": "application/json",
+                },
+                json=data,
+            ) as response:
+                if response.status != 200:
+                    raise RuntimeError(
+                        f"Sambanova /complete call failed with status code {response.status}.",
+                        f"{await response.text()}.",
+                    )
+                response_dict = await response.json()
+                if response_dict.get("error"):
+                    raise RuntimeError(
+                        f"Sambanova /complete call failed with status code {response.status}.",
+                        f"{response_dict}.",
+                    )
+                return response_dict
+
+    def _handle_streaming_request(
+        self, messages_dicts: List[Dict], stop: Optional[List[str]] = None
+    ) -> Iterator[Dict]:
+        """
+        Performs an streaming post request to the LLM API.
+
+        Args:
+            messages_dicts: List of role / content dicts to use as input.
+            stop: list of stop tokens
+
+        Yields:
+            An iterator of response dicts.
+        """
+        try:
+            import sseclient
+        except ImportError:
+            raise ImportError(
+                "could not import sseclient library"
+                "Please install it with `pip install sseclient-py`."
+            )
+        data = {
+            "messages": messages_dicts,
+            "max_tokens": self.max_tokens,
+            "stop": stop,
+            "model": self.model,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+            "stream": True,
+            "stream_options": self.stream_options,
+        }
+        http_session = requests.Session()
+        response = http_session.post(
+            self.sambanova_url,
+            headers={
+                "Authorization": f"Bearer {self.sambanova_api_key.get_secret_value()}",
+                "Content-Type": "application/json",
+            },
+            json=data,
+            stream=True,
+        )
+
+        client = sseclient.SSEClient(response)
+
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Sambanova /complete call failed with status code "
+                f"{response.status_code}."
+                f"{response.text}."
+            )
+
+        for event in client.events():
+            if event.event == "error_event":
+                raise RuntimeError(
+                    f"Sambanova /complete call failed with status code "
+                    f"{response.status_code}."
+                    f"{event.data}."
+                )
+
+            try:
+                # check if the response is a final event
+                # in that case event data response is '[DONE]'
+                if event.data != "[DONE]":
+                    if isinstance(event.data, str):
+                        data = json.loads(event.data)
+                    else:
+                        raise RuntimeError(
+                            f"Sambanova /complete call failed with status code "
+                            f"{response.status_code}."
+                            f"{event.data}."
+                        )
+                    if data.get("error"):
+                        raise RuntimeError(
+                            f"Sambanova /complete call failed with status code "
+                            f"{response.status_code}."
+                            f"{event.data}."
+                        )
+                    yield data
+            except Exception as e:
+                raise RuntimeError(
+                    f"Error getting content chunk raw streamed response: {e}"
+                    f"data: {event.data}"
+                )
+
+    async def _handle_streaming_request_async(
+        self, messages_dicts: List[Dict], stop: Optional[List[str]] = None
+    ) -> AsyncIterator[Dict]:
+        """
+        Performs an async streaming post request to the LLM API.
+
+        Args:
+            messages_dicts: List of role / content dicts to use as input.
+            stop: list of stop tokens
+
+        Yields:
+            An iterator of response dicts.
+        """
+        data = {
+            "messages": messages_dicts,
+            "max_tokens": self.max_tokens,
+            "stop": stop,
+            "model": self.model,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+            "stream": True,
+            "stream_options": self.stream_options,
+        }
+
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                self.sambanova_url,
+                headers={
+                    "Authorization": f"Bearer {self.sambanova_api_key.get_secret_value()}",
+                    "Content-Type": "application/json",
+                },
+                json=data,
+            ) as response:
+                if response.status != 200:
+                    raise RuntimeError(
+                        f"Sambanova /complete call failed with status code "
+                        f"{response.status}. {await response.text()}"
+                    )
+
+                async for line in response.content:
+                    if line:
+                        event = line.decode("utf-8").strip()
+
+                    if event.startswith("data:"):
+                        event = event[len("data:") :].strip()
+                        if event == "[DONE]":
+                            break
+                    elif len(event) == 0:
+                        continue
+
+                    try:
+                        data = json.loads(event)
+                        if data.get("error"):
+                            raise RuntimeError(
+                                f'Sambanova /complete call failed: {data["error"]}'
+                            )
+                        yield data
+                    except json.JSONDecodeError:
+                        raise RuntimeError(
+                            f"Sambanova /complete call failed to decode response: {event}"
+                        )
+                    except Exception as e:
+                        raise RuntimeError(
+                            f"Error processing response: {e} data: {event}"
+                        )
+
+    @llm_chat_callback()
+    def chat(
+        self,
+        messages: Sequence[ChatMessage],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> ChatResponse:
+        """
+        Calls the chat implementation of the SambaNovaCloud model.
+
+        Args:
+            messages: the prompt composed of a list of messages.
+            stop: a list of strings on which the model should stop generating.
+                  If generation stops due to a stop token, the stop token itself
+                  SHOULD BE INCLUDED as part of the output. This is not enforced
+                  across models right now, but it's a good practice to follow since
+                  it makes it much easier to parse the output of the model
+                  downstream and understand why generation stopped.
+
+        Returns:
+            ChatResponse with model generation
+        """
+        messages_dicts = _create_message_dicts(messages)
+
+        response = self._handle_request(messages_dicts, stop)
+        message = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=response["choices"][0]["message"]["content"],
+            additional_kwargs={
+                "id": response["id"],
+                "finish_reason": response["choices"][0]["finish_reason"],
+                "usage": response.get("usage"),
+                "model_name": response["model"],
+                "system_fingerprint": response["system_fingerprint"],
+                "created": response["created"],
+            },
+        )
+        return ChatResponse(message=message)
+
+    @llm_chat_callback()
+    def stream_chat(
+        self,
+        messages: Sequence[ChatMessage],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> ChatResponseGen:
+        """
+        Streams the chat output of the SambaNovaCloud model.
+
+        Args:
+            messages: the prompt composed of a list of messages.
+            stop: a list of strings on which the model should stop generating.
+                  If generation stops due to a stop token, the stop token itself
+                  SHOULD BE INCLUDED as part of the output. This is not enforced
+                  across models right now, but it's a good practice to follow since
+                  it makes it much easier to parse the output of the model
+                  downstream and understand why generation stopped.
+
+        Yields:
+            ChatResponseGen with model partial generation
+        """
+        messages_dicts = _create_message_dicts(messages)
+
+        finish_reason = None
+        content = ""
+        role = MessageRole.ASSISTANT
+
+        for partial_response in self._handle_streaming_request(messages_dicts, stop):
+            if len(partial_response["choices"]) > 0:
+                content_delta = partial_response["choices"][0]["delta"]["content"]
+                content += content_delta
+                additional_kwargs = {
+                    "id": partial_response["id"],
+                    "finish_reason": partial_response["choices"][0].get(
+                        "finish_reason"
+                    ),
+                }
+            else:
+                additional_kwargs = {
+                    "id": partial_response["id"],
+                    "finish_reason": finish_reason,
+                    "usage": partial_response.get("usage"),
+                    "model_name": partial_response["model"],
+                    "system_fingerprint": partial_response["system_fingerprint"],
+                    "created": partial_response["created"],
+                }
+
+            # yield chunk
+            yield ChatResponse(
+                message=ChatMessage(
+                    role=role, content=content, additional_kwargs=additional_kwargs
+                ),
+                delta=content_delta,
+                raw=partial_response,
+            )
+
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        complete_fn = chat_to_completion_decorator(self.chat)
+        return complete_fn(prompt, **kwargs)
+
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        stream_complete_fn = stream_chat_to_completion_decorator(self.stream_chat)
+        return stream_complete_fn(prompt, **kwargs)
+
+    ### Async ###
+    @llm_chat_callback()
+    async def achat(
+        self,
+        messages: Sequence[ChatMessage],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> ChatResponse:
+        """
+        Calls the async chat implementation of the SambaNovaCloud model.
+
+        Args:
+            messages: the prompt composed of a list of messages.
+            stop: a list of strings on which the model should stop generating.
+                  If generation stops due to a stop token, the stop token itself
+                  SHOULD BE INCLUDED as part of the output. This is not enforced
+                  across models right now, but it's a good practice to follow since
+                  it makes it much easier to parse the output of the model
+                  downstream and understand why generation stopped.
+
+        Returns:
+            ChatResponse with async model generation
+        """
+        messages_dicts = _create_message_dicts(messages)
+        response = await self._handle_request_async(messages_dicts, stop)
+        message = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=response["choices"][0]["message"]["content"],
+            additional_kwargs={
+                "id": response["id"],
+                "finish_reason": response["choices"][0]["finish_reason"],
+                "usage": response.get("usage"),
+                "model_name": response["model"],
+                "system_fingerprint": response["system_fingerprint"],
+                "created": response["created"],
+            },
+        )
+        return ChatResponse(message=message)
+
+    @llm_chat_callback()
+    async def astream_chat(
+        self,
+        messages: Sequence[ChatMessage],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> ChatResponseAsyncGen:
+        raise NotImplementedError(
+            "SambaNovaCloud does not currently support async streaming."
+        )
+
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        acomplete_fn = achat_to_completion_decorator(self.achat)
+        return await acomplete_fn(prompt, **kwargs)
+
+    @llm_completion_callback()
+    def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        raise NotImplementedError(
+            "SambaNovaCloud does not currently support async streaming."
+        )
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/pyproject.toml
similarity index 83%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/pyproject.toml
rename to llama-index-integrations/llms/llama-index-llms-sambanovacloud/pyproject.toml
index 3af3a61e4ed02..0b6052ce6e3e4 100644
--- a/llama-index-integrations/llms/llama-index-llms-sambanova/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/pyproject.toml
@@ -9,11 +9,10 @@ skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
 
 [tool.llamahub]
 contains_example = false
-import_path = "llama_index.llms.sambanova"
+import_path = "llama_index.llms.sambanovacloud"
 
 [tool.llamahub.class_authors]
-SambaStudio = "Pradeep"
-Sambaverse = "Pradeep"
+SambaNovaCloud = "Pradeep"
 
 [tool.mypy]
 disallow_untyped_defs = true
@@ -23,14 +22,15 @@ python_version = "3.8"
 
 [tool.poetry]
 authors = ["Your Name <you@example.com>"]
-description = "llama-index llms sambanova integration"
-name = "sambanova"
+description = "llama-index llms sambanova cloud integration"
+name = "llama-index-llms-sambanovacloud"
 readme = "README.md"
-version = "0.2.0"
+version = "0.3.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
 llama-index-core = "^0.11.0"
+python-dotenv = "^1.0.1"
 
 [tool.poetry.group.dev.dependencies]
 ipython = "8.10.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/BUILD
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/tests/BUILD
rename to llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/BUILD
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanova/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/__init__.py
similarity index 100%
rename from llama-index-integrations/llms/llama-index-llms-sambanova/tests/__init__.py
rename to llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/__init__.py
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/test_llms_sambanova.py b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/test_llms_sambanova.py
new file mode 100644
index 0000000000000..c8df197f80659
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovacloud/tests/test_llms_sambanova.py
@@ -0,0 +1,208 @@
+import time
+import asyncio
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    MessageRole,
+)
+
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from llama_index.llms.sambanovacloud import SambaNovaCloud
+import pytest
+
+
+sambanova_api_key = os.environ.get("SAMBANOVA_API_KEY", None)
+
+
+@pytest.mark.asyncio()
+async def run_async_test(fn, chat_msgs, number, verbose=False):
+    tasks = [fn(chat_msgs) for _ in range(number)]
+    if verbose:
+        for task in asyncio.as_completed(tasks):
+            result = await task  # Wait for the next completed task
+            print(result)
+    else:
+        await asyncio.gather(*tasks)
+
+
+def run_sync_test(fn, chat_msgs, number):
+    for _ in range(number):
+        fn(chat_msgs)
+
+
+def get_execution_time(fn, chat_msgs, is_async=False, number=10):
+    start_time = time.perf_counter()
+    if is_async:
+        asyncio.run(run_async_test(fn, chat_msgs, number))
+    else:
+        run_sync_test(fn, chat_msgs, number)
+    end_time = time.perf_counter()
+    execution_time = end_time - start_time
+    print(
+        f"Execution time {'for async test' if is_async else ''}: {execution_time:.6f} seconds"
+    )
+
+
+@pytest.mark.skipif(not sambanova_api_key, reason="No openai api key set")
+def test_sambanovacloud():
+    # chat interaction example
+    user_message = ChatMessage(
+        role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence"
+    )
+    chat_text_msgs = [
+        ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+        user_message,
+    ]
+
+    sambanovacloud_client = SambaNovaCloud()
+
+    # sync
+    print(f"chat response: {sambanovacloud_client.chat(chat_text_msgs)}\n")
+    print(
+        f"stream chat response: {[x.message.content for x in sambanovacloud_client.stream_chat(chat_text_msgs)]}\n"
+    )
+
+    print(
+        f"complete response: {sambanovacloud_client.complete(user_message.content)}\n"
+    )
+    print(
+        f"stream complete response: {[x.text for x in sambanovacloud_client.stream_complete(user_message.content)]}\n"
+    )
+
+    # async
+    print(
+        f"async chat response: {asyncio.run(sambanovacloud_client.achat(chat_text_msgs))}\n"
+    )
+    print(
+        f"async complete response: {asyncio.run(sambanovacloud_client.acomplete(user_message.content))}\n"
+    )
+
+
+@pytest.mark.skipif(not sambanova_api_key, reason="No openai api key set")
+def test_sambanovacloud_performance():
+    # chat interaction example
+    user_message = ChatMessage(
+        role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence"
+    )
+    chat_text_msgs = [
+        ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+        user_message,
+    ]
+
+    sambanovacloud_client = SambaNovaCloud()
+
+    # chat
+    get_execution_time(sambanovacloud_client.chat, chat_text_msgs, number=5)
+    get_execution_time(
+        sambanovacloud_client.achat, chat_text_msgs, is_async=True, number=5
+    )
+
+    # complete
+    get_execution_time(sambanovacloud_client.complete, user_message.content, number=5)
+    get_execution_time(
+        sambanovacloud_client.acomplete, user_message.content, is_async=True, number=5
+    )
+
+
+@pytest.mark.skipif(not sambanova_api_key, reason="No openai api key set")
+def test_hiperparameters():
+    user_message = ChatMessage(
+        role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence"
+    )
+    chat_text_msgs = [
+        ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+        user_message,
+    ]
+
+    model_list = ["llama3-8b", "llama3-70b"]
+    max_tokens_list = [10, 100]
+    temperature_list = [0, 1]
+    top_p_list = [0, 1]
+    top_k_list = [1, 50]
+    stream_options_list = [{"include_usage": False}, {"include_usage": True}]
+
+    for model in model_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model,
+            max_tokens=max_tokens_list[0],
+            temperature=temperature_list[0],
+            top_p=top_p_list[0],
+            top_k=top_k_list[0],
+            stream_options=stream_options_list[0],
+        )
+        print(
+            f"model: {model}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+    for max_tokens in max_tokens_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model_list[0],
+            max_tokens=max_tokens,
+            temperature=temperature_list[0],
+            top_p=top_p_list[0],
+            top_k=top_k_list[0],
+            stream_options=stream_options_list[0],
+        )
+        print(
+            f"max_tokens: {max_tokens}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+    for temperature in temperature_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model_list[0],
+            max_tokens=max_tokens_list[0],
+            temperature=temperature,
+            top_p=top_p_list[0],
+            top_k=top_k_list[0],
+            stream_options=stream_options_list[0],
+        )
+        print(
+            f"temperature: {temperature}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+    for top_p in top_p_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model_list[0],
+            max_tokens=max_tokens_list[0],
+            temperature=temperature_list[0],
+            top_p=top_p,
+            top_k=top_k_list[0],
+            stream_options=stream_options_list[0],
+        )
+        print(
+            f"top_p: {top_p}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+    for top_k in top_k_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model_list[0],
+            max_tokens=max_tokens_list[0],
+            temperature=temperature_list[0],
+            top_p=top_p_list[0],
+            top_k=top_k,
+            stream_options=stream_options_list[0],
+        )
+        print(
+            f"top_k: {top_k}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+    for stream_options in stream_options_list:
+        sambanovacloud_client = SambaNovaCloud(
+            model=model_list[0],
+            max_tokens=max_tokens_list[0],
+            temperature=temperature_list[0],
+            top_p=top_p_list[0],
+            top_k=top_k_list[0],
+            stream_options=stream_options,
+        )
+        print(
+            f"stream_options: {stream_options}, generation: {sambanovacloud_client.chat(chat_text_msgs)}"
+        )
+
+
+if __name__ == "__main__":
+    test_sambanovacloud()
+    test_sambanovacloud_performance()
+    test_hiperparameters()
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/.gitignore b/llama-index-integrations/memory/llama-index-memory-mem0/.gitignore
new file mode 100644
index 0000000000000..990c18de22908
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/BUILD b/llama-index-integrations/memory/llama-index-memory-mem0/BUILD
new file mode 100644
index 0000000000000..0469dba89e7ba
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/BUILD
@@ -0,0 +1,4 @@
+poetry_requirements(
+    name="poetry",
+    module_mapping={"mem0ai": ["mem0"]},
+)
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/Makefile b/llama-index-integrations/memory/llama-index-memory-mem0/Makefile
new file mode 100644
index 0000000000000..b9eab05aa3706
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/README.md b/llama-index-integrations/memory/llama-index-memory-mem0/README.md
new file mode 100644
index 0000000000000..dcfd356c682b4
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/README.md
@@ -0,0 +1,183 @@
+# LlamaIndex Memory Integration: Mem0
+
+## Installation
+
+To install the required package, run:
+
+```bash
+%pip install llama-index-memory-mem0
+```
+
+## Setup with Mem0 Platform
+
+1. Set your Mem0 Platform API key as an environment variable. You can replace `<your-mem0-api-key>` with your actual API key:
+
+> Note: You can obtain your Mem0 Platform API key from the [Mem0 Platform](https://app.mem0.ai/login).
+
+```python
+os.environ["MEM0_API_KEY"] = "<your-mem0-api-key>"
+```
+
+2. Import the necessary modules and create a Mem0Memory instance:
+
+```python
+from llama_index.memory.mem0 import Mem0Memory
+
+context = {"user_id": "user_1"}
+memory = Mem0Memory.from_client(
+    context=context,
+    api_key="<your-mem0-api-key>",
+    search_msg_limit=4,  # optional, default is 5
+)
+```
+
+Mem0 Context is used to identify the user, agent or the conversation in the Mem0. It is required to be passed in the at least one of the fields in the `Mem0Memory` constructor. It can be any of the following:
+
+```python
+context = {
+    "user_id": "user_1",
+    "agent_id": "agent_1",
+    "run_id": "run_1",
+}
+```
+
+`search_msg_limit` is optional, default is 5. It is the number of messages from the chat history to be used for memory retrieval from Mem0. More number of messages will result in more context being used for retrieval but will also increase the retrieval time and might result in some unwanted results.
+
+## Setup with Mem0 OSS
+
+1. Set your Mem0 OSS by providing configuration details:
+
+> Note: To know more about Mem0 OSS, read [Mem0 OSS Quickstart](https://docs.mem0.ai/open-source/quickstart).
+
+```python
+config = {
+    "vector_store": {
+        "provider": "qdrant",
+        "config": {
+            "collection_name": "test_9",
+            "host": "localhost",
+            "port": 6333,
+            "embedding_model_dims": 1536,  # Change this according to your local model's dimensions
+        },
+    },
+    "llm": {
+        "provider": "openai",
+        "config": {
+            "model": "gpt-4o",
+            "temperature": 0.2,
+            "max_tokens": 1500,
+        },
+    },
+    "embedder": {
+        "provider": "openai",
+        "config": {"model": "text-embedding-3-small"},
+    },
+    "version": "v1.1",
+}
+```
+
+2. Create a Mem0Memory instance:
+
+```python
+memory = Mem0Memory.from_config(
+    context=context,
+    config=config,
+    search_msg_limit=4,  # optional, default is 5
+)
+```
+
+## Basic Usage
+
+Currently, Mem0 Memory is supported in the `SimpleChatEngine`, `FunctionCallingAgent` and `ReActAgent`.
+
+Intilaize the LLM
+
+```python
+import os
+from llama_index.llms.openai import OpenAI
+
+os.environ["OPENAI_API_KEY"] = "<your-openai-api-key>"
+llm = OpenAI(model="gpt-4o")
+```
+
+### SimpleChatEngine
+
+```python
+from llama_index.core import SimpleChatEngine
+
+agent = SimpleChatEngine.from_defaults(
+    llm=llm, memory=memory  # set you memory here
+)
+
+# Start the chat
+response = agent.chat("Hi, My name is Mayank")
+print(response)
+```
+
+Initialize the tools
+
+```python
+from llama_index.core.tools import FunctionTool
+
+
+def call_fn(name: str):
+    """Call the provided name.
+    Args:
+        name: str (Name of the person)
+    """
+    print(f"Calling... {name}")
+
+
+def email_fn(name: str):
+    """Email the provided name.
+    Args:
+        name: str (Name of the person)
+    """
+    print(f"Emailing... {name}")
+
+
+call_tool = FunctionTool.from_defaults(fn=call_fn)
+email_tool = FunctionTool.from_defaults(fn=email_fn)
+```
+
+### FunctionCallingAgent
+
+```python
+from llama_index.core.agent import FunctionCallingAgent
+
+agent = FunctionCallingAgent.from_tools(
+    [call_tool, email_tool],
+    llm=llm,
+    memory=memory,
+    verbose=True,
+)
+
+# Start the chat
+response = agent.chat("Hi, My name is Mayank")
+print(response)
+```
+
+### ReActAgent
+
+```python
+from llama_index.core.agent import ReActAgent
+
+agent = ReActAgent.from_tools(
+    [call_tool, email_tool],
+    llm=llm,
+    memory=memory,
+    verbose=True,
+)
+
+# Start the chat
+response = agent.chat("Hi, My name is Mayank")
+print(response)
+```
+
+> Note: For more examples refer to: [Notebooks](https://github.com/run-llama/llama_index/tree/main/docs/docs/examples/memory)
+
+## References
+
+- [Mem0 Platform](https://app.mem0.ai/login)
+- [Mem0 OSS](https://docs.mem0.ai/open-source/quickstart)
+- [Mem0 Github](https://github.com/mem0ai/mem0)
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/BUILD b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/__init__.py b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/__init__.py
new file mode 100644
index 0000000000000..0db71375430df
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.memory.mem0.base import Mem0Memory
+
+__all__ = ["Mem0Memory"]
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/base.py b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/base.py
new file mode 100644
index 0000000000000..1aa456d977df4
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/base.py
@@ -0,0 +1,186 @@
+from typing import Dict, List, Optional, Union, Any
+from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
+from llama_index.core.memory.types import BaseMemory
+from llama_index.memory.mem0.utils import (
+    convert_memory_to_system_message,
+    convert_chat_history_to_dict,
+    convert_messages_to_string,
+)
+from mem0 import MemoryClient, Memory
+from llama_index.core.bridge.pydantic import (
+    BaseModel,
+    Field,
+    ValidationError,
+    model_validator,
+    SerializeAsAny,
+    PrivateAttr,
+)
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+
+
+class BaseMem0(BaseMemory):
+    """Base class for Mem0."""
+
+    _client: Optional[Union[MemoryClient, Memory]] = PrivateAttr()
+
+    def __init__(
+        self, client: Optional[Union[MemoryClient, Memory]] = None, **kwargs
+    ) -> None:
+        super().__init__(**kwargs)
+        if client is not None:
+            self._client = client
+
+    def add(
+        self, messages: Union[str, List[Dict[str, str]]], **kwargs
+    ) -> Optional[Dict[str, Any]]:
+        if self._client is None:
+            raise ValueError("Client is not initialized")
+        return self._client.add(messages=messages, **kwargs)
+
+    def search(self, query: str, **kwargs) -> Optional[Dict[str, Any]]:
+        if self._client is None:
+            raise ValueError("Client is not initialized")
+        return self._client.search(query=query, **kwargs)
+
+
+class Mem0Context(BaseModel):
+    user_id: Optional[str] = None
+    agent_id: Optional[str] = None
+    run_id: Optional[str] = None
+
+    @model_validator(mode="after")
+    def check_at_least_one_assigned(cls, values):
+        if not any(
+            getattr(values, field) for field in ["user_id", "agent_id", "run_id"]
+        ):
+            raise ValueError(
+                "At least one of 'user_id', 'agent_id', or 'run_id' must be assigned."
+            )
+        return values
+
+    def get_context(self) -> Dict[str, Optional[str]]:
+        return {key: value for key, value in self.__dict__.items() if value is not None}
+
+
+class Mem0Memory(BaseMem0):
+    primary_memory: SerializeAsAny[BaseMemory] = Field(
+        description="Primary memory source for chat agent."
+    )
+    context: Optional[Mem0Context] = None
+    search_msg_limit: int = Field(
+        default=5,
+        description="Limit of chat history messages to use for context in search API",
+    )
+
+    def __init__(self, context: Optional[Mem0Context] = None, **kwargs) -> None:
+        super().__init__(**kwargs)
+        if context is not None:
+            self.context = context
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Class name."""
+        return "Mem0Memory"
+
+    @classmethod
+    def from_defaults(cls, **kwargs: Any) -> "Mem0Memory":
+        raise NotImplementedError("Use either from_client or from_config")
+
+    @classmethod
+    def from_client(
+        cls,
+        context: Dict[str, Any],
+        api_key: Optional[str] = None,
+        host: Optional[str] = None,
+        organization: Optional[str] = None,
+        project: Optional[str] = None,
+        search_msg_limit: int = 5,
+        **kwargs: Any,
+    ):
+        primary_memory = ChatMemoryBuffer.from_defaults()
+
+        try:
+            context = Mem0Context(**context)
+        except ValidationError as e:
+            raise ValidationError(f"Context validation error: {e}")
+
+        client = MemoryClient(
+            api_key=api_key, host=host, organization=organization, project=project
+        )
+        return cls(
+            primary_memory=primary_memory,
+            context=context,
+            client=client,
+            search_msg_limit=search_msg_limit,
+        )
+
+    @classmethod
+    def from_config(
+        cls,
+        context: Dict[str, Any],
+        config: Dict[str, Any],
+        search_msg_limit: int = 5,
+        **kwargs: Any,
+    ):
+        primary_memory = ChatMemoryBuffer.from_defaults()
+
+        try:
+            context = Mem0Context(**context)
+        except Exception as e:
+            raise ValidationError(f"Context validation error: {e}")
+
+        client = Memory.from_config(config_dict=config)
+        return cls(
+            primary_memory=primary_memory,
+            context=context,
+            client=client,
+            search_msg_limit=search_msg_limit,
+        )
+
+    def get(self, input: Optional[str] = None, **kwargs: Any) -> List[ChatMessage]:
+        """Get chat history. With memory system message."""
+        messages = self.primary_memory.get(input=input, **kwargs)
+        input = convert_messages_to_string(messages, input, limit=self.search_msg_limit)
+
+        search_results = self.search(query=input, **self.context.get_context())
+
+        if isinstance(self._client, Memory) and self._client.version == "v1.1":
+            search_results = search_results["results"]
+
+        system_message = convert_memory_to_system_message(search_results)
+
+        # If system message is present
+        if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM:
+            assert messages[0].content is not None
+            system_message = convert_memory_to_system_message(
+                response=search_results, existing_system_message=messages[0]
+            )
+        messages.insert(0, system_message)
+        return messages
+
+    def get_all(self) -> List[ChatMessage]:
+        """Returns all chat history."""
+        return self.primary_memory.get_all()
+
+    def _add_msgs_to_client_memory(self, messages: List[ChatMessage]) -> None:
+        """Add new user and assistant messages to client memory."""
+        self.add(
+            messages=convert_chat_history_to_dict(messages),
+            **self.context.get_context(),
+        )
+
+    def put(self, message: ChatMessage) -> None:
+        """Add message to chat history and client memory."""
+        self._add_msgs_to_client_memory([message])
+        self.primary_memory.put(message)
+
+    def set(self, messages: List[ChatMessage]) -> None:
+        """Set chat history and add new messages to client memory."""
+        initial_chat_len = len(self.primary_memory.get_all())
+        # Insert only new chat messages
+        self._add_msgs_to_client_memory(messages[initial_chat_len:])
+        self.primary_memory.set(messages)
+
+    def reset(self) -> None:
+        """Only reset chat history."""
+        self.primary_memory.reset()
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/utils.py b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/utils.py
new file mode 100644
index 0000000000000..2e0f4c35d7738
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/llama_index/memory/mem0/utils.py
@@ -0,0 +1,58 @@
+from typing import Any, Dict, List, Optional
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+
+DEFAULT_INTRO_PREFERENCES = "Below are a set of relevant preferences retrieved from potentially several memory sources:"
+DEFAULT_OUTRO_PREFERENCES = "This is the end of the retrieved preferences."
+
+
+def convert_memory_to_system_message(
+    response: List[Dict[str, Any]], existing_system_message: ChatMessage = None
+) -> ChatMessage:
+    memories = [format_memory_json(memory_json) for memory_json in response]
+    formatted_messages = "\n\n" + DEFAULT_INTRO_PREFERENCES + "\n"
+    for memory in memories:
+        formatted_messages += f"\n {memory} \n\n"
+    formatted_messages += DEFAULT_OUTRO_PREFERENCES
+    system_message = formatted_messages
+    # If existing system message is available
+    if existing_system_message is not None:
+        system_message = existing_system_message.content.split(
+            DEFAULT_INTRO_PREFERENCES
+        )[0]
+        system_message = system_message + formatted_messages
+    return ChatMessage(content=system_message, role=MessageRole.SYSTEM)
+
+
+def format_memory_json(memory_json: Dict[str, Any]) -> List[str]:
+    categories = memory_json.get("categories")
+    memory = memory_json.get("memory", "")
+    if categories is not None:
+        categories_str = ", ".join(categories)
+        return f"[{categories_str}] : {memory}"
+    return f"{memory}"
+
+
+def convert_chat_history_to_dict(messages: List[ChatMessage]) -> List[Dict[str, str]]:
+    chat_history_dict = []
+    for message in messages:
+        if (
+            message.role in [MessageRole.USER, MessageRole.ASSISTANT]
+            and message.content
+        ):
+            chat_history_dict.append(
+                {"role": message.role.value, "content": message.content}
+            )
+    return chat_history_dict
+
+
+def convert_messages_to_string(
+    messages: List[ChatMessage], input: Optional[str] = None, limit: int = 5
+) -> str:
+    recent_messages = messages[-limit:]
+    formatted_messages = [f"{msg.role.value}: {msg.content}" for msg in recent_messages]
+    result = "\n".join(formatted_messages)
+
+    if input:
+        result += f"\nuser: {input}"
+
+    return result
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/pyproject.toml b/llama-index-integrations/memory/llama-index-memory-mem0/pyproject.toml
new file mode 100644
index 0000000000000..d78986661ee6b
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/pyproject.toml
@@ -0,0 +1,57 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+# Feel free to un-skip examples, and experimental, you will just need to
+# work through many typos (--write-changes and --interactive will help)
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.memory.mem0"
+
+[tool.llamahub.class_authors]
+Mem0Memory = "llama-index"
+
+[tool.mypy]
+disallow_untyped_defs = true
+# Remove venv skip when integrated with pre-commit
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Your Name <you@example.com>"]
+description = "llama-index memory mem0 integration"
+license = "MIT"
+name = "llama-index-memory-mem0"
+packages = [{include = "llama_index/"}]
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = "<4.0,>=3.9"
+llama-index-core = "^0.11.0"
+mem0ai = "^0.1.19"
+
+[tool.poetry.group.dev.dependencies]
+black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
+codespell = {extras = ["toml"], version = ">=v2.2.6"}
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"  # TODO: unpin when mypy>0.991
+types-setuptools = "67.1.0.0"
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/tests/BUILD b/llama-index-integrations/memory/llama-index-memory-mem0/tests/BUILD
new file mode 100644
index 0000000000000..dabf212d7e716
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/tests/__init__.py b/llama-index-integrations/memory/llama-index-memory-mem0/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llama-index-integrations/memory/llama-index-memory-mem0/tests/test_mem0.py b/llama-index-integrations/memory/llama-index-memory-mem0/tests/test_mem0.py
new file mode 100644
index 0000000000000..483d5ea547178
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-mem0/tests/test_mem0.py
@@ -0,0 +1,273 @@
+from unittest.mock import patch, MagicMock
+from llama_index.memory.mem0.base import Mem0Memory, Mem0Context
+from llama_index.core.memory.chat_memory_buffer import ChatMessage, MessageRole
+from llama_index.memory.mem0.utils import (
+    convert_chat_history_to_dict,
+    convert_messages_to_string,
+)
+
+
+def test_mem0_memory_from_client():
+    # Mock context
+    context = {"user_id": "test_user"}
+
+    # Mock arguments for MemoryClient
+    api_key = "test_api_key"
+    host = "test_host"
+    organization = "test_org"
+    project = "test_project"
+    search_msg_limit = 10  # Add this line
+
+    # Patch MemoryClient
+    with patch("llama_index.memory.mem0.base.MemoryClient") as MockMemoryClient:
+        mock_client = MagicMock()
+        MockMemoryClient.return_value = mock_client
+
+        # Call from_client method
+        mem0_memory = Mem0Memory.from_client(
+            context=context,
+            api_key=api_key,
+            host=host,
+            organization=organization,
+            project=project,
+            search_msg_limit=search_msg_limit,  # Add this line
+        )
+
+        # Assert that MemoryClient was called with the correct arguments
+        MockMemoryClient.assert_called_once_with(
+            api_key=api_key, host=host, organization=organization, project=project
+        )
+
+        # Assert that the returned object is an instance of Mem0Memory
+        assert isinstance(mem0_memory, Mem0Memory)
+
+        # Assert that the context was set correctly
+        assert isinstance(mem0_memory.context, Mem0Context)
+        assert mem0_memory.context.user_id == "test_user"
+
+        # Assert that the client was set correctly
+        assert mem0_memory._client == mock_client
+
+        # Assert that the search_msg_limit was set correctly
+        assert mem0_memory.search_msg_limit == search_msg_limit  # Add this line
+
+
+def test_mem0_memory_from_config():
+    # Mock context
+    context = {"user_id": "test_user"}
+
+    # Mock config
+    config = {"test": "test"}
+
+    # Set search_msg_limit
+    search_msg_limit = 15  # Add this line
+
+    # Patch Memory
+    with patch("llama_index.memory.mem0.base.Memory") as MockMemory:
+        mock_client = MagicMock()
+        MockMemory.from_config.return_value = mock_client
+
+        # Call from_config method
+        mem0_memory = Mem0Memory.from_config(
+            context=context,
+            config=config,
+            search_msg_limit=search_msg_limit,  # Add this line
+        )
+
+        # Assert that the client was set correctly
+        assert mem0_memory._client == mock_client
+
+        # Assert that the search_msg_limit was set correctly
+        assert mem0_memory.search_msg_limit == search_msg_limit  # Add this line
+
+
+def test_mem0_memory_set():
+    # Mock context
+    context = {"user_id": "test_user"}
+
+    # Mock arguments for MemoryClient
+    api_key = "test_api_key"
+    host = "test_host"
+    organization = "test_org"
+    project = "test_project"
+
+    # Patch MemoryClient
+    with patch("llama_index.memory.mem0.base.MemoryClient") as MockMemoryClient:
+        mock_client = MagicMock()
+        MockMemoryClient.return_value = mock_client
+
+        # Create Mem0Memory instance
+        mem0_memory = Mem0Memory.from_client(
+            context=context,
+            api_key=api_key,
+            host=host,
+            organization=organization,
+            project=project,
+        )
+
+        # Create a list of alternating user and assistant messages
+        messages = [
+            ChatMessage(role=MessageRole.USER, content="User message 1"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Assistant message 1"),
+            ChatMessage(role=MessageRole.USER, content="User message 2"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Assistant message 2"),
+        ]
+
+        # Call the set method
+        mem0_memory.set(messages)
+
+        # Assert that add was called only for user messages
+        assert mock_client.add.call_count == 1
+        expected_messages = convert_chat_history_to_dict(messages)
+        mock_client.add.assert_called_once_with(
+            messages=expected_messages, user_id="test_user"
+        )
+
+        # Assert that the primary_memory was set with all messages
+        assert mem0_memory.primary_memory.get_all() == messages
+
+        # Test setting messages when chat history is not empty
+        new_messages = [
+            ChatMessage(role=MessageRole.USER, content="User message 3"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Assistant message 3"),
+        ]
+
+        # Reset the mock to clear previous calls
+        mock_client.add.reset_mock()
+
+        # Call the set method again
+        mem0_memory.set(messages + new_messages)
+
+        # Assert that add was called only for the new messages
+        expected_new_messages = convert_chat_history_to_dict(new_messages)
+        mock_client.add.assert_called_once_with(
+            messages=expected_new_messages, user_id="test_user"
+        )
+
+        # Assert that the primary_memory was updated with all messages
+        assert mem0_memory.primary_memory.get_all() == messages + new_messages
+
+
+def test_mem0_memory_get():
+    # Mock context
+    context = {"user_id": "test_user"}
+
+    # Mock arguments for MemoryClient
+    api_key = "test_api_key"
+    host = "test_host"
+    organization = "test_org"
+    project = "test_project"
+
+    # Patch MemoryClient
+    with patch("llama_index.memory.mem0.base.MemoryClient") as MockMemoryClient:
+        mock_client = MagicMock()
+        MockMemoryClient.return_value = mock_client
+
+        # Create Mem0Memory instance
+        mem0_memory = Mem0Memory.from_client(
+            context=context,
+            api_key=api_key,
+            host=host,
+            organization=organization,
+            project=project,
+        )
+
+        # Set dummy chat history
+        dummy_messages = [
+            ChatMessage(role=MessageRole.USER, content="Hello"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Hi there!"),
+            ChatMessage(role=MessageRole.USER, content="How are you?"),
+            ChatMessage(
+                role=MessageRole.ASSISTANT, content="I'm doing well, thank you!"
+            ),
+        ]
+        mem0_memory.primary_memory.set(dummy_messages)
+
+        # Set dummy response for search
+        dummy_search_results = [
+            {
+                "categories": ["greeting"],
+                "memory": "The user usually starts with a greeting.",
+            },
+            {"categories": ["mood"], "memory": "The user often asks about well-being."},
+        ]
+        mock_client.search.return_value = dummy_search_results
+
+        # Call get method
+        result = mem0_memory.get(input="How are you?")
+
+        # Assert that search was called with correct arguments
+        expected_query = convert_messages_to_string(
+            dummy_messages, "How are you?", limit=mem0_memory.search_msg_limit
+        )
+        mock_client.search.assert_called_once_with(
+            query=expected_query, user_id="test_user"
+        )
+
+        # Assert that the result contains the correct number of messages
+        assert len(result) == len(dummy_messages) + 1  # +1 for the system message
+
+        # Assert that the first message is a system message
+        assert result[0].role == MessageRole.SYSTEM
+
+        # Assert that the system message contains the search results
+        assert "The user usually starts with a greeting." in result[0].content
+        assert "The user often asks about well-being." in result[0].content
+
+        # Assert that the rest of the messages match the dummy messages
+        assert result[1:] == dummy_messages
+
+        # Test get method without input (should use last user message)
+        mock_client.search.reset_mock()
+        result_no_input = mem0_memory.get()
+
+        # Assert that search was called with the last user message
+        expected_query_no_input = convert_messages_to_string(
+            dummy_messages, limit=mem0_memory.search_msg_limit
+        )
+        mock_client.search.assert_called_once_with(
+            query=expected_query_no_input, user_id="test_user"
+        )
+
+        # Assert that the results are the same as before
+        assert result_no_input == result
+
+
+def test_mem0_memory_put():
+    # Mock context
+    context = {"user_id": "test_user"}
+
+    # Mock arguments for MemoryClient
+    api_key = "test_api_key"
+    host = "test_host"
+    organization = "test_org"
+    project = "test_project"
+
+    # Patch MemoryClient
+    with patch("llama_index.memory.mem0.base.MemoryClient") as MockMemoryClient:
+        mock_client = MagicMock()
+        MockMemoryClient.return_value = mock_client
+
+        # Create Mem0Memory instance
+        mem0_memory = Mem0Memory.from_client(
+            context=context,
+            api_key=api_key,
+            host=host,
+            organization=organization,
+            project=project,
+        )
+
+        # Create a test message
+        test_message = ChatMessage(role=MessageRole.USER, content="Hello, world!")
+
+        # Call the put method
+        mem0_memory.put(test_message)
+
+        # Assert that the message was added to primary_memory
+        assert mem0_memory.primary_memory.get_all() == [test_message]
+
+        # Assert that add was called with the correct arguments
+        expected_messages = convert_chat_history_to_dict([test_message])
+        mock_client.add.assert_called_once_with(
+            messages=expected_messages, user_id="test_user"
+        )
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/.gitignore b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/.gitignore
new file mode 100644
index 0000000000000..990c18de22908
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/BUILD b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/BUILD
new file mode 100644
index 0000000000000..0896ca890d8bf
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/Makefile b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/Makefile
new file mode 100644
index 0000000000000..b9eab05aa3706
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/README.md b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/README.md
new file mode 100644
index 0000000000000..4d408ed99bbea
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/README.md
@@ -0,0 +1,36 @@
+# LlamaIndex Postprocessor Integration: SiliconFLow Rerank
+
+## 1. Product Introduction
+
+SiliconCloud provides cost-effective GenAI services based on an excellent open-source foundation model.
+introduction: https://docs.siliconflow.cn/introduction
+
+## 2. Product features
+
+- As a one-stop cloud service platform that integrates top large models, SiliconCloud is committed to providing developers with faster, cheaper, more comprehensive, and smoother model APIs.
+
+  - SiliconCloud has been listed on Qwen2.5-72B, DeepSeek-V2.5, Qwen2, InternLM2.5-20B-Chat, BCE, BGE, SenseVoice-Small, Llama-3.1, FLUX.1, DeepSeek-Coder-V2, SD3 Medium, GLM-4-9B-Chat, A variety of open-source large language models, image generation models, code generation models, vector and reordering models, and multimodal large models, including InstantID.
+
+  - Among them, Qwen 2.5 (7B), Llama 3.1 (8B) and other large model APIs are free to use, so that developers and product managers do not need to worry about the computing power costs caused by the R&D stage and large-scale promotion, and realize "token freedom".
+
+- Provide out-of-the-box large model inference acceleration services to bring a more efficient user experience to your GenAI applications.
+
+## 3. Usage
+
+```
+import os
+from llama_index.postprocessor.siliconflow_rerank import SiliconFlowRerank
+
+reranker = SiliconFlowRerank(
+    model="BAAI/bge-reranker-v2-m3",
+    api_key=os.getenv("SILICONFLOW_API_KEY"),
+    top_n=4,
+)
+
+nodes = [
+    ...
+]
+
+response = reranker.postprocess_nodes(nodes=nodes, query_str="...")
+print(response)
+```
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/BUILD b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/__init__.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/__init__.py
new file mode 100644
index 0000000000000..383ada9118f9e
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.postprocessor.siliconflow_rerank.base import SiliconFlowRerank
+
+__all__ = ["SiliconFlowRerank"]
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/base.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/base.py
new file mode 100644
index 0000000000000..c5f2c6c857058
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/llama_index/postprocessor/siliconflow_rerank/base.py
@@ -0,0 +1,149 @@
+import requests
+from typing import Any, Dict, List, Optional
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.callbacks import CBEventType, EventPayload
+from llama_index.core.instrumentation import get_dispatcher
+from llama_index.core.instrumentation.events.rerank import (
+    ReRankEndEvent,
+    ReRankStartEvent,
+)
+from llama_index.core.postprocessor.types import BaseNodePostprocessor
+from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle
+
+DEFAULT_SILICONFLOW_API_URL = "https://api.siliconflow.cn/v1/rerank"
+
+dispatcher = get_dispatcher(__name__)
+
+AVAILABLE_OPTIONS = [
+    "BAAI/bge-reranker-v2-m3",
+    "Bnetease-youdao/bce-reranker-base_v1",
+]
+
+
+class SiliconFlowRerank(BaseNodePostprocessor):
+    model: str = Field(
+        default="BAAI/bge-reranker-v2-m3",
+        description="Specifies the model to be used.",
+    )
+    base_url: str = Field(
+        default=DEFAULT_SILICONFLOW_API_URL,
+        description="The URL of the SiliconFlow Rerank API.",
+    )
+    api_key: str = Field(default=None, description="The SiliconFlow API key.")
+
+    top_n: int = Field(
+        description="Number of most relevant documents or indices to return."
+    )
+    return_documents: bool = Field(
+        default=True,
+        description="Specify whether the response should include the document text.",
+    )
+    max_chunks_per_doc: int = Field(
+        default=1024,
+        description="""\
+            Maximum number of chunks generated from within a document.
+            Long documents are divided into multiple chunks for calculation,
+            and the highest score among the chunks is taken as the document's score.
+        """,
+    )
+    overlap_tokens: int = Field(
+        default=80,
+        description="Number of token overlaps between adjacent chunks when documents are chunked.",
+    )
+
+    _session: Any = PrivateAttr()
+
+    def __init__(
+        self,
+        model: str = "BAAI/bge-reranker-v2-m3",
+        base_url: str = DEFAULT_SILICONFLOW_API_URL,
+        api_key: Optional[str] = None,
+        top_n: int = 4,
+        return_documents: bool = True,
+        max_chunks_per_doc: int = 1024,
+        overlap_tokens: int = 80,
+    ):
+        super().__init__(
+            model=model,
+            base_url=base_url,
+            api_key=api_key,
+            top_n=top_n,
+            return_documents=return_documents,
+            max_chunks_per_doc=max_chunks_per_doc,
+            overlap_tokens=overlap_tokens,
+        )
+        self._session: requests.Session = requests.Session()
+        self._session.headers.update(
+            {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
+        )
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "SiliconFlowRerank"
+
+    @property
+    def _model_kwargs(self) -> Dict[str, Any]:
+        return {
+            "return_documents": self.return_documents,
+            "max_chunks_per_doc": self.max_chunks_per_doc,
+            "overlap_tokens": self.overlap_tokens,
+        }
+
+    def _postprocess_nodes(
+        self,
+        nodes: List[NodeWithScore],
+        query_bundle: Optional[QueryBundle] = None,
+    ) -> List[NodeWithScore]:
+        dispatcher.event(
+            ReRankStartEvent(
+                query=query_bundle,
+                nodes=nodes,
+                top_n=self.top_n,
+                model_name=self.model,
+            )
+        )
+
+        if query_bundle is None:
+            raise ValueError("Missing query bundle in extra info.")
+        if len(nodes) == 0:
+            return []
+
+        with self.callback_manager.event(
+            CBEventType.RERANKING,
+            payload={
+                EventPayload.NODES: nodes,
+                EventPayload.MODEL_NAME: self.model,
+                EventPayload.QUERY_STR: query_bundle.query_str,
+                EventPayload.TOP_K: self.top_n,
+            },
+        ) as event:
+            texts = [
+                node.node.get_content(metadata_mode=MetadataMode.EMBED)
+                for node in nodes
+            ]
+            response = self._session.post(
+                self.base_url,
+                json={
+                    "model": self.model,
+                    "query": query_bundle.query_str,
+                    "documents": texts,
+                    "top_n": self.top_n,
+                    **self._model_kwargs,
+                },
+            ).json()
+            if "results" not in response:
+                raise RuntimeError(response)
+
+            new_nodes = []
+            for result in response["results"]:
+                new_node_with_score = NodeWithScore(
+                    node=nodes[result["index"]].node, score=result["relevance_score"]
+                )
+                new_nodes.append(new_node_with_score)
+            event.on_end(payload={EventPayload.NODES: new_nodes})
+
+        dispatcher.event(ReRankEndEvent(nodes=new_nodes))
+        return new_nodes
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/pyproject.toml b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/pyproject.toml
new file mode 100644
index 0000000000000..c747f23ecaaf6
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/pyproject.toml
@@ -0,0 +1,62 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.postprocessor.siliconflow_rerank"
+
+[tool.llamahub.class_authors]
+SiliconFlowRerank = "nightosong"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["nightosong"]
+description = "llama-index postprocessor siliconflow rerank integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-postprocessor-siliconflow-rerank"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.11.0"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/BUILD b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/BUILD
new file mode 100644
index 0000000000000..dabf212d7e716
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/__init__.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/test_postprocessor_siliconflow_rerank.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/test_postprocessor_siliconflow_rerank.py
new file mode 100644
index 0000000000000..3076a286fb739
--- /dev/null
+++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-siliconflow-rerank/tests/test_postprocessor_siliconflow_rerank.py
@@ -0,0 +1,54 @@
+import json
+from requests import Response
+from unittest import mock
+from llama_index.core.postprocessor.types import BaseNodePostprocessor
+from llama_index.core.schema import NodeWithScore, Document
+from llama_index.postprocessor.siliconflow_rerank import SiliconFlowRerank
+
+_FAKE_API_KEY = ""
+_FAKE_RERANK_RESPONSE = Response()
+_FAKE_RERANK_RESPONSE._content = json.dumps(
+    {
+        "id": "<string>",
+        "results": [
+            {
+                "document": {"text": "last 1"},
+                "index": 2,
+                "relevance_score": 0.9,
+            },
+            {
+                "document": {"text": "last 2"},
+                "index": 3,
+                "relevance_score": 0.8,
+            },
+        ],
+        "tokens": {"input_tokens": 123, "output_tokens": 123},
+    }
+).encode("utf-8")
+
+
+def test_class():
+    names_of_base_classes = [b.__name__ for b in SiliconFlowRerank.__mro__]
+    assert BaseNodePostprocessor.__name__ in names_of_base_classes
+
+
+def test_fake_rerank():
+    input_nodes = [
+        NodeWithScore(node=Document(doc_id="1", text="first 1")),
+        NodeWithScore(node=Document(doc_id="2", text="first 2")),
+        NodeWithScore(node=Document(doc_id="3", text="last 1")),
+        NodeWithScore(node=Document(doc_id="4", text="last 2")),
+    ]
+    expected_nodes = [
+        NodeWithScore(node=Document(doc_id="3", text="last 1"), score=0.9),
+        NodeWithScore(node=Document(doc_id="4", text="last 2"), score=0.8),
+    ]
+    reranker = SiliconFlowRerank(api_key=_FAKE_API_KEY)
+
+    with mock.patch.object(
+        reranker._session,
+        "post",
+        return_value=_FAKE_RERANK_RESPONSE,
+    ):
+        actual_nodes = reranker.postprocess_nodes(input_nodes, query_str="last")
+        assert actual_nodes == expected_nodes
diff --git a/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/llama_index/storage/chat_store/azure/base.py b/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/llama_index/storage/chat_store/azure/base.py
index cd45d5911050c..372856ae0fbb4 100644
--- a/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/llama_index/storage/chat_store/azure/base.py
+++ b/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/llama_index/storage/chat_store/azure/base.py
@@ -116,6 +116,31 @@ def from_account_and_key(
             service_mode,
         )
 
+    @classmethod
+    def from_account_and_id(
+        cls,
+        account_name: str,
+        endpoint: Optional[str] = None,
+        chat_table_name: str = DEFAULT_CHAT_TABLE,
+        metadata_table_name: str = DEFAULT_METADATA_TABLE,
+        metadata_partition_key: str = None,
+        service_mode: ServiceMode = ServiceMode.STORAGE,
+    ) -> "AzureChatStore":
+        """Initializes AzureChatStore from an account name and managed ID."""
+        from azure.identity import DefaultAzureCredential
+
+        if endpoint is None:
+            endpoint = f"https://{account_name}.table.core.windows.net"
+        credential = DefaultAzureCredential()
+        return cls._from_clients(
+            endpoint,
+            credential,
+            chat_table_name,
+            metadata_table_name,
+            metadata_partition_key,
+            service_mode,
+        )
+
     @classmethod
     def from_sas_token(
         cls,
diff --git a/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/pyproject.toml b/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/pyproject.toml
index de5b5ee7ed815..c99e568696e5a 100644
--- a/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/pyproject.toml
+++ b/llama-index-integrations/storage/chat_store/llama-index-storage-chat-store-azure/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-storage-chat-store-azure"
 readme = "README.md"
-version = "0.2.3"
+version = "0.2.4"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/llama_index/storage/docstore/azure/base.py b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/llama_index/storage/docstore/azure/base.py
index b93e3bc996a1a..915a5b2fd22e2 100644
--- a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/llama_index/storage/docstore/azure/base.py
+++ b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/llama_index/storage/docstore/azure/base.py
@@ -107,6 +107,33 @@ def from_account_and_key(
             **kwargs,
         )
 
+    @classmethod
+    def from_account_and_id(
+        cls,
+        account_name: str,
+        namespace: Optional[str] = None,
+        node_collection_suffix: Optional[str] = None,
+        ref_doc_collection_suffix: Optional[str] = None,
+        metadata_collection_suffix: Optional[str] = None,
+        service_mode: ServiceMode = ServiceMode.STORAGE,
+        partition_key: Optional[str] = None,
+        **kwargs,
+    ) -> "AzureDocumentStore":
+        """Initialize an AzureDocumentStore from an account name and managed ID."""
+        azure_kvstore = AzureKVStore.from_account_and_id(
+            account_name,
+            service_mode=service_mode,
+            partition_key=partition_key,
+        )
+        return cls(
+            azure_kvstore,
+            namespace,
+            node_collection_suffix,
+            ref_doc_collection_suffix,
+            metadata_collection_suffix,
+            **kwargs,
+        )
+
     @classmethod
     def from_sas_token(
         cls,
diff --git a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/pyproject.toml b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/pyproject.toml
index afd60d4b73e1f..6396d637e4e29 100644
--- a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/pyproject.toml
+++ b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-azure/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-storage-docstore-azure"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/llama_index/storage/index_store/azure/base.py b/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/llama_index/storage/index_store/azure/base.py
index fc66d3079c861..020ab4cb9c55b 100644
--- a/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/llama_index/storage/index_store/azure/base.py
+++ b/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/llama_index/storage/index_store/azure/base.py
@@ -67,6 +67,28 @@ def from_account_and_key(
         )
         return cls(azure_kvstore, namespace, collection_suffix)
 
+    @classmethod
+    def from_account_and_id(
+        cls,
+        account_name: str,
+        namespace: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        service_mode: ServiceMode = ServiceMode.STORAGE,
+        partition_key: Optional[str] = None,
+        collection_suffix: Optional[str] = None,
+    ) -> "AzureIndexStore":
+        """Load an AzureIndexStore from an account name and managed ID.
+
+        Args:
+            account_name (str): Azure Storage Account Name
+            namespace (Optional[str]): namespace for the AzureIndexStore
+            service_mode (ServiceMode): CosmosDB or Azure Table service mode
+        """
+        azure_kvstore = AzureKVStore.from_account_and_id(
+            account_name, endpoint, service_mode, partition_key
+        )
+        return cls(azure_kvstore, namespace, collection_suffix)
+
     @classmethod
     def from_sas_token(
         cls,
diff --git a/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/pyproject.toml b/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/pyproject.toml
index 819e47cd9d4c7..2e4c1e85e0198 100644
--- a/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/pyproject.toml
+++ b/llama-index-integrations/storage/index_store/llama-index-storage-index-store-azure/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-storage-index-store-azure"
 readme = "README.md"
-version = "0.3.0"
+version = "0.3.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/llama_index/storage/kvstore/azure/base.py b/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/llama_index/storage/kvstore/azure/base.py
index 2d7ffbe4aed06..dee2d3cf256b0 100644
--- a/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/llama_index/storage/kvstore/azure/base.py
+++ b/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/llama_index/storage/kvstore/azure/base.py
@@ -126,6 +126,29 @@ def from_account_and_key(
             endpoint, credential, service_mode, partition_key, *args, **kwargs
         )
 
+    @classmethod
+    def from_account_and_id(
+        cls,
+        account_name: str,
+        endpoint: Optional[str] = None,
+        service_mode: ServiceMode = ServiceMode.STORAGE,
+        partition_key: Optional[str] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "AzureKVStore":
+        """Creates an instance of AzureKVStore from an account name and managed ID."""
+        try:
+            from azure.identity import DefaultAzureCredential
+        except ImportError:
+            raise ImportError(IMPORT_ERROR_MSG)
+
+        if endpoint is None:
+            endpoint = f"https://{account_name}.table.core.windows.net"
+        credential = DefaultAzureCredential()
+        return cls._from_clients(
+            endpoint, credential, service_mode, partition_key, *args, **kwargs
+        )
+
     @classmethod
     def from_sas_token(
         cls,
diff --git a/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/pyproject.toml b/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/pyproject.toml
index 2e0ae7a51b35c..780ef6e24715f 100644
--- a/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/pyproject.toml
+++ b/llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-storage-kvstore-azure"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py b/llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py
index 14f22e4484489..06deb6c37231a 100644
--- a/llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py
+++ b/llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py
@@ -1,4 +1,4 @@
-"""OpenAI Image Generation tool sppec.."""
+"""OpenAI Image Generation tool spec."""
 
 import base64
 import os
@@ -8,7 +8,22 @@
 from llama_index.core.tools.tool_spec.base import BaseToolSpec
 
 DEFAULT_CACHE_DIR = "../../../img_cache"
-DEFAULT_SIZE = "1024x1024"  # Dall-e-3 only supports 1024x1024
+DEFAULT_SIZE = "1024x1024"
+
+valid_sizes = {
+    "dall-e-2": ["256x256", "512x512", "1024x1024"],
+    "dall-e-3": ["1024x1024", "1792x1024", "1024x1792"],
+}
+
+
+def get_extension(content: str):
+    map = {
+        "/": "jpg",
+        "i": "png",
+        "R": "gif",
+        "U": "webp",
+    }
+    return map.get(content[0], "jpg")
 
 
 class OpenAIImageGenerationToolSpec(BaseToolSpec):
@@ -16,7 +31,9 @@ class OpenAIImageGenerationToolSpec(BaseToolSpec):
 
     spec_functions = ["image_generation"]
 
-    def __init__(self, api_key: str, cache_dir: Optional[str] = None) -> None:
+    def __init__(
+        self, api_key: Optional[str] = None, cache_dir: Optional[str] = None
+    ) -> None:
         try:
             from openai import OpenAI
         except ImportError:
@@ -62,28 +79,60 @@ def image_generation(
         model: Optional[str] = "dall-e-3",
         quality: Optional[str] = "standard",
         num_images: Optional[int] = 1,
+        size: Optional[str] = DEFAULT_SIZE,
+        style: Optional[str] = "vivid",
+        timeout: Optional[int] = None,
+        download: Optional[bool] = False,
     ) -> str:
         """
         This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.
 
         Args:
-            text (str): The text to generate an image from.
-            size (str): The size of the image to generate (1024x1024, 256x256, 512x512).
-            model (str): The model to use to generate the image (dall-e-3, dall-e-2).
-            quality (str): The quality of the image to generate (standard, hd).
-            num_images (int): The number of images to generate.
+            text: The text to generate an image from.
+
+            model: The model to use for image generation. Defaults to `dall-e-3`.
+                Must be one of `dall-e-2` or `dall-e-3`.
+
+            num_images: The number of images to generate. Defaults to 1.
+                Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
+
+            quality: The quality of the image that will be generated. Defaults to `standard`.
+                Must be one of `standard` or `hd`. `hd` creates images with finer
+                details and greater consistency across the image. This param is only supported
+                for `dall-e-3`.
+
+            size: The size of the generated images. Defaults to `1024x1024`.
+                Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
+                Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
+
+            style: The style of the generated images. Defaults to `vivid`.
+                Must be one of `vivid` or `natural`.
+                Vivid causes the model to lean towards generating hyper-real and dramatic images.
+                Natural causes the model to produce more natural, less hyper-real looking images.
+                This param is only supported for `dall-e-3`.
+
+            timeout: Override the client-level default timeout for this request, in seconds. Defaults to `None`.
+
+            download: If `True`, the image will be downloaded to the cache directory. Defaults to `True`.
         """
+        if size not in valid_sizes[model]:
+            raise Exception(f"Invalid size for {model}: {size}")
+
         response = self.client.images.generate(
-            model=model,
             prompt=text,
-            size=DEFAULT_SIZE,
-            quality=quality,
             n=num_images,
-            response_format="b64_json",
+            model=model,
+            quality=quality,
+            size=size,
+            response_format="b64_json" if download else "url",
+            style=style,
+            timeout=timeout,
         )
+        if download:
+            image_bytes = response.data[0].b64_json
+            ext = get_extension(image_bytes)
+            filename = f"{time.time()}.{ext}"
 
-        image_bytes = response.data[0].b64_json
-
-        filename = f"{time.time()}.jpg"
+            return (self.save_base64_image(image_bytes, filename),)
 
-        return self.save_base64_image(image_bytes, filename)
+        return response.data[0].url
diff --git a/llama-index-integrations/tools/llama-index-tools-openai/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-openai/pyproject.toml
index a4e09ace5b20f..90e8f0507e047 100644
--- a/llama-index-integrations/tools/llama-index-tools-openai/pyproject.toml
+++ b/llama-index-integrations/tools/llama-index-tools-openai/pyproject.toml
@@ -29,7 +29,7 @@ license = "MIT"
 maintainers = ["manelferreira_"]
 name = "llama-index-tools-openai-image-generation"
 readme = "README.md"
-version = "0.2.0"
+version = "0.3.0"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/.gitignore b/llama-index-integrations/tools/llama-index-tools-vectara-query/.gitignore
new file mode 100644
index 0000000000000..990c18de22908
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/BUILD b/llama-index-integrations/tools/llama-index-tools-vectara-query/BUILD
new file mode 100644
index 0000000000000..0896ca890d8bf
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/Makefile b/llama-index-integrations/tools/llama-index-tools-vectara-query/Makefile
new file mode 100644
index 0000000000000..b9eab05aa3706
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md b/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md
new file mode 100644
index 0000000000000..3b48504bd8a2d
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md
@@ -0,0 +1,33 @@
+## Vectara Query Tool
+
+This tool connects to a Vectara corpus and allows agents to make semantic search or retrieval augmented generation (RAG) queries.
+
+## Usage
+
+This tool has a more extensive example usage documented in a Jupyter notebok [here](https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb)
+
+To use this tool, you'll need the following information in your environment:
+
+- `VECTARA_CUSTOMER_ID`: The customer id for your Vectara account. If you don't have an account, you can create one [here](https://vectara.com/integrations/llamaindex).
+- `VECTARA_CORPUS_ID`: The corpus id for the Vectara corpus that you want your tool to search for information. If you need help creating a corpus with your data, follow this [Quick Start](https://docs.vectara.com/docs/quickstart) guide.
+- `VECTARA_API_KEY`: An API key that can perform queries on this corpus.
+
+Here's an example usage of the VectaraQueryToolSpec.
+
+```python
+from llama_index.tools.vectara_query import VectaraQueryToolSpec
+from llama_index.agent.openai import OpenAIAgent
+
+# Connecting to a Vectara corpus about Electric Vehicles
+tool_spec = VectaraQueryToolSpec()
+
+agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
+
+agent.chat("What are the different types of electric vehicles?")
+```
+
+The available tools are:
+
+`semantic_search`: A tool that accepts a query and uses semantic search to obtain the top search results.
+
+`rag_query`: A tool that accepts a query and uses RAG to obtain a generative response grounded in the search results.
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb b/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb
new file mode 100644
index 0000000000000..028979f10f8a7
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb
@@ -0,0 +1,223 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b48281b0-80c1-4ed8-8e09-8c046cbecd18",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2762271-0db4-4d50-aa50-036d01926a7d",
+   "metadata": {},
+   "source": [
+    "## Vectara Query Tool"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4452df2a-976b-4e1c-9f9b-2fa983569948",
+   "metadata": {},
+   "source": [
+    "To get started with Vectara, [sign up](https://vectara.com/integrations/llamaindex) (if you haven't already) and follow our [quickstart](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key.\n",
+    "\n",
+    "Once you have done this, add the following variables to your environment:\n",
+    "\n",
+    "`VECTARA_CUSTOMER_ID`: The customer id for your Vectara account.\n",
+    "\n",
+    "`VECTARA_CORPUS_ID`: The corpus id for the Vectara corpus that you want your tool to search for information.\n",
+    "\n",
+    "`VECTARA_API_KEY`: An API key that can perform queries on this corpus.\n",
+    "\n",
+    "You are now ready to use the Vectara query tool.\n",
+    "\n",
+    "To initialize the tool, provide your Vectara information and any query parameters that you want to adjust, such as the reranker, summarizer prompt, etc. To see the entire list of parameters, see the [VectaraQueryToolSpec class definition](https://github.com/david-oplatka/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py#L11)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adbd9f2c-1cc9-4220-a117-7099a248d855",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import and initialize our tool spec\n",
+    "# pip install -U llama-index-tools-vectara-query\n",
+    "\n",
+    "from llama_index.tools.vectara_query.base import VectaraQueryToolSpec\n",
+    "\n",
+    "tool_spec = VectaraQueryToolSpec()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac3f03a4-2448-4ef2-8aa8-e6c536875a0e",
+   "metadata": {},
+   "source": [
+    "After initializing the tool spec, we can provide it to our agent. For this notebook, we will use the OpenAI Agent, but our tool can be used with any type of agent. You will need your own OpenAI API key to run this notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f9fe90d-4a54-416f-a0ed-81c3de614487",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup OpenAI Agent\n",
+    "import openai\n",
+    "\n",
+    "openai.api_key = \"sk-your-key\"\n",
+    "from llama_index.agent.openai import OpenAIAgent\n",
+    "\n",
+    "agent = OpenAIAgent.from_tools(\n",
+    "    tool_spec.to_tool_list(),\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb2fea58-8084-48b8-84f1-713e2d476b47",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Added user message to memory: What are the different types of electric vehicles? How do they work?\n",
+      "=== Calling Function ===\n",
+      "Calling function: semantic_search with args: {\"query\":\"types of electric vehicles and how they work\"}\n",
+      "Got output: [{'text': 'Electric Car Overview | DriveClean Skip to main content Electric Car Overview Electric cars come in three main types with differing power systems designed to meet varying driving needs.They are: fuel cell, battery-electric, plug-in hybrid electric cars. Battery-Electric Cars Battery-electric cars are all electric and don’t use gasoline, and instead have a large battery that powers one or more electric motors. Currently, battery-electrics have a driving range of 80 to more than 300 miles, with ranges increasing as new models are introduced.', 'citation_metadata': {'lang': 'eng', 'offset': '0', 'len': '218', 'source': 'website', 'url': 'https://driveclean.ca.gov/electric-car-overview#main-content', 'title': 'Electric Car Overview | DriveClean'}}, {'text': 'Carpool Lane Decals Which cars qualify for the carpool lane sticker? How long are carpool lane decals valid? Electric Cars What are the different types of electric vehicles? Why should I drive electric? Will switching to an electric car save me money?', 'citation_metadata': {'lang': 'eng', 'offset': '269', 'len': '64', 'source': 'website', 'url': 'https://driveclean.ca.gov/faqs', 'title': 'Frequently Asked Questions | DriveClean'}}, {'text': 'NOx is a lung irritant. When combined with hydrocarbons and sunlight, NOx compounds form smog Plug-in electric vehicle or PEV – These cars can be recharged from any external source of electricity and the electricity is stored in a rechargeable battery pack to drive or contribute to driving the wheels. These types of cars include battery-electric vehicles and plug-in hybrid-electric vehicles PHEV – Plug-in hybrid-electric cars are similar to traditional hybrids but are also equipped with a larger, more advanced battery that allows the vehicle to be plugged in and recharged in addition to refueling with gasoline. This larger battery allows the car to drive on battery alone, gasoline alone or a combination of electric and gasoline fuels PM – Particulate matter. Tiny particles of solid matter that lodge in the lungs and form deposits on buildings.', 'citation_metadata': {'lang': 'eng', 'offset': '8376', 'len': '315', 'source': 'website', 'url': 'https://driveclean.ca.gov/glossary', 'title': 'Glossary of Terms | DriveClean'}}, {'text': \"Rated for you by America's best test team. Performance 8.5/10 How does the iX drive? Electric SUVs are redefining how large vehicles are supposed to handle, and the iX is a prime example. The weight of batteries mounted low under the floor really helps provide a hunkered-down feeling. The iX isn't really exciting to drive, but it corners with ease, and the light-effort steering creates the illusion of the iX being lighter than it actually is.\", 'citation_metadata': {'lang': 'eng', 'offset': '8729', 'len': '102', 'source': 'website', 'url': 'https://www.edmunds.com/bmw/ix/2025/', 'title': '2025 BMW iX Prices, Reviews, and Pictures | Edmunds'}}, {'text': \"Ronald Montoya · 05/19/2023 BASICS Do Electric Cars Have Transmissions? Ronald Montoya · 05/05/2023 BASICS Keep Your EV Running Smoothly With This Electric Car Maintenance Guide Jake Sundstrom · 04/28/2023 BASICS Why Electric Cars Don't Use Motor Oil Will Kaufman · 04/19/2023 BASICS Top Electric Car Companies of 2023 Ronald Montoya · 04/14/2023 BASICS Is an Electric Car Worth It? Will Kaufman · 03/24/2023 BASICS Pros and Cons of Electric Vehicles Jake Sundstrom · 03/07/2023 BASICS How Do Electric Cars Work? An In-Depth Guide Will Kaufman · 03/07/2023 EV tax credits, rebates & incentives INCENTIVES & REBATES Electric Vehicle Tax Credits 2024: What You Need to Know Ronald Montoya · 01/02/2024 INCENTIVES & REBATES Find EV tax credits, rebates, and incentives Edmunds · 01/28/2023 Am I Ready for an EV? EV ownership works best if you can charge at home (240V outlet) This typically means a 240V home installation, or other places your car is parked for several hours each day.\", 'citation_metadata': {'lang': 'eng', 'offset': '6253', 'len': '129', 'source': 'website', 'url': 'https://www.edmunds.com/electric-car/articles/', 'title': 'Edmunds EV Hub | Edmunds'}}]\n",
+      "========================\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/markdown": [
+       "There are three main types of electric vehicles:\n",
+       "\n",
+       "1. Battery-Electric Cars: These cars are all-electric and do not use gasoline. They are powered by a large battery that drives one or more electric motors. Battery-electric cars have a driving range of 80 to more than 300 miles, with ranges increasing as new models are introduced.\n",
+       "\n",
+       "2. Plug-in Electric Vehicles (PEV): These cars can be recharged from any external source of electricity, and the electricity is stored in a rechargeable battery pack to drive or contribute to driving the wheels. PEVs include battery-electric vehicles and plug-in hybrid-electric vehicles (PHEV).\n",
+       "\n",
+       "3. Plug-in Hybrid-Electric Vehicles (PHEV): PHEVs are similar to traditional hybrids but are equipped with a larger, more advanced battery that allows the vehicle to be plugged in and recharged in addition to refueling with gasoline. This larger battery enables the car to drive on battery alone, gasoline alone, or a combination of electric and gasoline fuels.\n",
+       "\n",
+       "These electric vehicles work by utilizing electric motors powered by batteries to drive the wheels, providing a cleaner and more sustainable mode of transportation."
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from IPython.display import Markdown, display\n",
+    "\n",
+    "response = agent.chat(\n",
+    "    \"What are the different types of electric vehicles? How do they work?\"\n",
+    ").response\n",
+    "display(Markdown(response))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a736f3b-5ea6-4191-9781-1c4985086c5f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Added user message to memory: What are the pros and cons of Fuel Cell EVs? What are some popular models?\n",
+      "=== Calling Function ===\n",
+      "Calling function: semantic_search with args: {\"query\":\"pros and cons of Fuel Cell EVs, popular models\"}\n",
+      "Got output: [{'text': \"Jonathan got his start testing cars for Road & Track magazine as a newly minted mechanical engineer grad from University of California, Irvine, and has also contributed to Motor Trend and the Associated Press. He likes to say he learned to drive a manual transmission in a rear-wheel-drive mid-engine vehicle but often omits it was his family's 1991 Toyota Previa minivan. Pros Impressive real-world EV range Loads of standard features and in-car tech Comfortable, well-appointed interior Cons Disappointing rear passenger space given the car's size More expensive than some rivals and not as quick or sporty What's new Larger battery and revised regenerative brakes improve range Standard Luxury Line styling with traditional Mercedes hood ornament Part of the first EQS generation introduced for 2022 Overview Perched at the top of the Mercedes-Benz's EV sedan lineup is the 2025 EQS. A sleek aerodynamic design distinguishes the EQS from its stately gas-fueled sibling, the S-Class, but otherwise the idea is similar: Offer the utmost in comfort and luxury from a large sedan. For 2025, Mercedes has added a new Luxury Line appearance package with traditional styling cues such as chrome grille panel slats and a stand-up Mercedes-star hood ornament.\", 'citation_metadata': {'lang': 'eng', 'offset': '2065', 'len': '513', 'source': 'website', 'url': 'https://www.edmunds.com/mercedes-benz/eqs/2025/', 'title': '2025 Mercedes-Benz EQS Prices, Reviews, and Pictures | Edmunds'}}, {'text': 'U.S. Department of Energy – Fuel Cell Vehicles Federal government website providing overview of fuel cell technology and a comparison between available models. California Hydrogen Business Council (CHBC) Advocacy group comprised of over 100 companies and agencies working to advance the commercialization of hydrogen and fuel cell industry. Electric For All Fuel Cell Cars A comprehensive listing of fuel cell electric car models currently available in California, including incentives.', 'citation_metadata': {'lang': 'eng', 'offset': '4261', 'len': '145', 'source': 'website', 'url': 'https://driveclean.ca.gov/hydrogen-fuel-cell', 'title': 'Hydrogen Fuel Cell Electric Cars | DriveClean'}}, {'text': \"Mark has also contributed to Motor Trend, Auto Aficionado, Chevy High Performance and several motorcycle magazines in various roles. Mark is also a juror on the North American Car, Truck and Utility Vehicle of the Year Awards and can be seen regularly on the Edmunds YouTube channel and sometimes representing the company in media interviews. Pros Less costly to run than gas-powered alternatives Smart features for commercial users Onboard power for mobile services Cons 126-mile range limit No one-pedal driving Loud, rumbly interior What's new New features, including a virtual rearview mirror and interior shelving Minor shuffling of features and options Part of the first E-Transit generation introduced for 2022 Overview Vans are the go-to choice for commercial vehicles that provide an enclosed space for parcels, tools and whatever your business needs to move. With the shift away from the old Econolines and Express vans, the newer models from Europe have fulfilled the need for more efficient and maneuverable alternatives. If you've wanted an electric van, however, you'd likely need a specialized contract with one of the few niche manufacturers.\", 'citation_metadata': {'lang': 'eng', 'offset': '2166', 'len': '525', 'source': 'website', 'url': 'https://www.edmunds.com/ford/e-transit-cargo-van/2023/', 'title': '2023 Ford E-Transit Cargo Van Prices, Reviews, and Pictures | Edmunds'}}, {'text': \"Mark has also contributed to Motor Trend, Auto Aficionado, Chevy High Performance and several motorcycle magazines in various roles. Mark is also a juror on the North American Car, Truck and Utility Vehicle of the Year Awards and can be seen regularly on the Edmunds YouTube channel and sometimes representing the company in media interviews. Pros Nimble size and handling make it fun to drive One of the most affordable EVs you can buy Upscale cabin design Cons Not as much range as other EVs Two-door layout and small back seat limit utility Choppy ride on bumpy roads What's new The base Classic trim returns to the lineup Minor feature availability changes Part of the third Hardtop generation introduced for 2014 Overview The 2024 Mini Cooper SE Hardtop 2 Door is the smallest vehicle in Mini's lineup, and it's also one of the most affordable EVs you can get today. It's based on the gas-powered Mini Cooper, which was last redesigned a decade ago, and this year the big news is the return of the entry-level Classic trim. Other changes are very minor since the Mini is due for a full makeover for the 2025 model year.\", 'citation_metadata': {'lang': 'eng', 'offset': '1799', 'len': '528', 'source': 'website', 'url': 'https://www.edmunds.com/mini/hardtop-2-door/2024/electric/', 'title': '2024 MINI Hardtop 2 Door Electric Prices, Reviews, and Pictures | Edmunds'}}, {'text': \"He's driven and photographed hundreds, if not thousands, of vehicles all over the world, so Kurt's library of automotive experiences would certainly make for a good book. When not dreaming about getting his racing license or trying to buy out-of-date film for his cameras, Kurt can usually be found cursing at his 1966 Mustang. Pros Scintillating performance on both road and track Genuinely fun to drive at all speeds Long list of standard features Comfortable ride despite the performance potential Cons Short range for a modern EV An overabundance of drive modes and settings Very heavy for a performance vehicle What's new New higher-performance version of the Ioniq 5 Generates up to 641 horsepower Many other performance upgrades, including brakes, suspension and more Part of the first Ioniq 5 generation introduced for 2022 Overview While a handful of automakers have dabbled in ultra high-performance EVs, most are expensive luxury models that are primarily best at straight-line performance. That's not the case with the all-new 2025 Ioniq 5 N. Hyundai's N performance division has turned its racing and tuning experience on the highly regarded Ioniq 5 electric crossover to create a vehicle that's currently unique in the EV space: It's relatively affordable, practical and genuinely fun to drive. Continue reading Edmunds Expert Rating below Hyundai IONIQ 5 N EV InsightsTrim: IONIQ 5 N Base Range 221 miles EPA Estimated Range EV batteries lose 1-2% of range per year.\", 'citation_metadata': {'lang': 'eng', 'offset': '1705', 'len': '673', 'source': 'website', 'url': 'https://www.edmunds.com/hyundai/ioniq-5-n/2025/', 'title': '2025 Hyundai IONIQ 5 N Prices, Reviews, and Pictures | Edmunds'}}]\n",
+      "========================\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/markdown": [
+       "### Pros and Cons of Fuel Cell Electric Vehicles (FCEVs):\n",
+       "\n",
+       "#### Pros:\n",
+       "- Impressive real-world EV range\n",
+       "- Loads of standard features and in-car tech\n",
+       "- Comfortable and well-appointed interior\n",
+       "- Less costly to run than gas-powered alternatives\n",
+       "- Smart features for commercial users\n",
+       "- Onboard power for mobile services\n",
+       "\n",
+       "#### Cons:\n",
+       "- Disappointing rear passenger space given the car's size\n",
+       "- More expensive than some rivals and not as quick or sporty\n",
+       "- 126-mile range limit\n",
+       "- No one-pedal driving\n",
+       "- Loud, rumbly interior\n",
+       "\n",
+       "### Popular Models of Fuel Cell Electric Vehicles:\n",
+       "\n",
+       "1. **2025 Mercedes-Benz EQS**\n",
+       "   - Overview: Perched at the top of Mercedes-Benz's EV sedan lineup, the 2025 EQS offers comfort and luxury in a sleek aerodynamic design.\n",
+       "   - [More Info](https://www.edmunds.com/mercedes-benz/eqs/2025/)\n",
+       "\n",
+       "2. **2023 Ford E-Transit Cargo Van**\n",
+       "   - Overview: A commercial vehicle with smart features and onboard power for mobile services.\n",
+       "   - [More Info](https://www.edmunds.com/ford/e-transit-cargo-van/2023/)\n",
+       "\n",
+       "3. **2024 Mini Cooper SE Hardtop 2 Door**\n",
+       "   - Overview: An affordable and nimble EV with an upscale cabin design.\n",
+       "   - [More Info](https://www.edmunds.com/mini/hardtop-2-door/2024/electric/)\n",
+       "\n",
+       "4. **2025 Hyundai IONIQ 5 N**\n",
+       "   - Overview: The IONIQ 5 N offers scintillating performance on both road and track, making it genuinely fun to drive at all speeds.\n",
+       "   - [More Info](https://www.edmunds.com/hyundai/ioniq-5-n/2025/)\n",
+       "\n",
+       "These models showcase the advancements and variety in the fuel cell electric vehicle market, catering to different needs and preferences of consumers."
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "response = agent.chat(\n",
+    "    \"What are the pros and cons of Fuel Cell EVs? What are some popular models?\"\n",
+    ").response\n",
+    "display(Markdown(response))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/BUILD b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/__init__.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/__init__.py
new file mode 100644
index 0000000000000..bc25cbd15d789
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/__init__.py
@@ -0,0 +1,6 @@
+from llama_index.tools.vectara_query.base import (
+    VectaraQueryToolSpec,
+)
+
+
+__all__ = ["VectaraQueryToolSpec"]
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py
new file mode 100644
index 0000000000000..dff6bf75ff4d9
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py
@@ -0,0 +1,175 @@
+from typing import Any, List, Dict, Optional
+from llama_index.core.tools.tool_spec.base import BaseToolSpec
+from llama_index.core.schema import QueryBundle
+from llama_index.core.callbacks.base import CallbackManager
+
+from llama_index.indices.managed.vectara import VectaraIndex
+from llama_index.indices.managed.vectara.retriever import VectaraRetriever
+from llama_index.indices.managed.vectara.query import VectaraQueryEngine
+
+
+class VectaraQueryToolSpec(BaseToolSpec):
+    """Vectara Query tool spec."""
+
+    spec_functions = ["semantic_search", "rag_query"]
+
+    def __init__(
+        self,
+        vectara_customer_id: Optional[str] = None,
+        vectara_corpus_id: Optional[str] = None,
+        vectara_api_key: Optional[str] = None,
+        num_results: int = 5,
+        lambda_val: float = 0.005,
+        n_sentences_before: int = 2,
+        n_sentences_after: int = 2,
+        metadata_filter: str = "",
+        reranker: str = "mmr",
+        rerank_k: int = 50,
+        mmr_diversity_bias: float = 0.2,
+        udf_expression: str = None,
+        rerank_chain: List[Dict] = None,
+        summarizer_prompt_name: str = "vectara-summary-ext-24-05-sml",
+        summary_num_results: int = 5,
+        summary_response_lang: str = "eng",
+        citations_style: Optional[str] = None,
+        citations_url_pattern: Optional[str] = None,
+        citations_text_pattern: Optional[str] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initializes the Vectara API and query parameters.
+
+        Parameters:
+        - vectara_customer_id (str): Your Vectara customer ID.
+            If not specified, reads for environment variable "VECTARA_CUSTOMER_ID".
+        - vectara_corpus_id (str): The corpus ID for the corpus you want to search for information.
+            If not specified, reads for environment variable "VECTARA_CORPUS_ID".
+        - vectara_api_key (str): An API key that has query permissions for the given corpus.
+            If not specified, reads for environment variable "VECTARA_API_KEY".
+        - num_results (int): Number of search results to return with response.
+        - lambda_val (float): Lambda value for the Vectara query.
+        - n_sentences_before (int): Number of sentences before the summary.
+        - n_sentences_after (int): Number of sentences after the summary.
+        - metadata_filter (str): A string with expressions to filter the search documents.
+        - reranker (str): The reranker mode, either "mmr", "slingshot", "multilingual_reranker_v1", "udf", or "none".
+        - rerank_k (int): Number of top-k documents for reranking.
+        - mmr_diversity_bias (float): MMR diversity bias.
+        - udf_expression (str): the user defined expression for reranking results.
+            See (https://docs.vectara.com/docs/learn/user-defined-function-reranker)
+            for more details about syntax for udf reranker expressions.
+        - rerank_chain: a list of rerankers to be applied in a sequence and their associated parameters
+            for the chain reranker. Each element should specify the "type" of reranker (mmr, slingshot, udf)
+            and any other parameters (e.g. "limit" or "cutoff" for any type,  "diversity_bias" for mmr, and "user_function" for udf).
+            If using slingshot/multilingual_reranker_v1, it must be first in the list.
+        - summarizer_prompt_name (str): If enable_summarizer is True, the Vectara summarizer to use.
+        - summary_num_results (int): If enable_summarizer is True, the number of summary results.
+        - summary_response_lang (str): If enable_summarizer is True, the response language for the summary.
+        - citations_style (str): The style of the citations in the summary generation,
+            either "numeric", "html", "markdown", or "none".
+            This is a Vectara Scale only feature. Defaults to None.
+        - citations_url_pattern (str): URL pattern for html and markdown citations.
+            If non-empty, specifies the URL pattern to use for citations; e.g. "{doc.url}".
+            See (https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) for more details.
+            This is a Vectara Scale only feature. Defaults to None.
+        - citations_text_pattern (str): The displayed text for citations.
+            If not specified, numeric citations are displayed.
+        """
+        self.index = VectaraIndex(
+            vectara_customer_id=vectara_customer_id,
+            vectara_corpus_id=vectara_corpus_id,
+            vectara_api_key=vectara_api_key,
+        )
+
+        self.retriever = VectaraRetriever(
+            index=self.index,
+            similarity_top_k=num_results,
+            lambda_val=lambda_val,
+            n_sentences_before=n_sentences_before,
+            n_sentences_after=n_sentences_after,
+            filter=metadata_filter,
+            reranker=reranker,
+            rerank_k=rerank_k,
+            mmr_diversity_bias=mmr_diversity_bias,
+            udf_expression=udf_expression,
+            rerank_chain=rerank_chain,
+            summary_enabled=False,
+            callback_manager=callback_manager,
+            **kwargs,
+        )
+
+        query_engine_retriever = VectaraRetriever(
+            index=self.index,
+            similarity_top_k=num_results,
+            lambda_val=lambda_val,
+            n_sentences_before=n_sentences_before,
+            n_sentences_after=n_sentences_after,
+            filter=metadata_filter,
+            reranker=reranker,
+            rerank_k=rerank_k,
+            mmr_diversity_bias=mmr_diversity_bias,
+            udf_expression=udf_expression,
+            rerank_chain=rerank_chain,
+            summary_enabled=True,
+            summary_response_lang=summary_response_lang,
+            summary_num_results=summary_num_results,
+            summary_prompt_name=summarizer_prompt_name,
+            citations_style=citations_style,
+            citations_url_pattern=citations_url_pattern,
+            citations_text_pattern=citations_text_pattern,
+            callback_manager=callback_manager,
+            **kwargs,
+        )
+
+        self.query_engine = VectaraQueryEngine(retriever=query_engine_retriever)
+
+    def semantic_search(
+        self,
+        query: str,
+    ) -> List[Dict]:
+        """
+        Makes a query to a Vectara corpus and returns the top search results from the retrieved documents.
+
+        Parameters:
+            query (str): The input query from the user.
+
+        Returns:
+            List[Dict]: A list of retrieved documents with their associated metadata
+        """
+        response = self.retriever._retrieve(query_bundle=QueryBundle(query_str=query))
+
+        if len(response) == 0:
+            return []
+
+        return [
+            {
+                "text": doc.node.text,
+                "citation_metadata": doc.node.metadata,
+            }
+            for doc in response
+        ]
+
+    def rag_query(
+        self,
+        query: str,
+    ) -> Dict:
+        """
+        Makes a query to a Vectara corpus and returns the generated summary, the citation metadata, and the factual consistency score.
+
+        Parameters:
+            query (str): The input query from the user.
+
+        Returns:
+            Dict: A dictionary containing the generated summary, citation metadata, and the factual consistency score.
+        """
+        response = self.query_engine._query(query_bundle=QueryBundle(query_str=query))
+
+        if str(response) == "None":
+            return {}
+
+        return {
+            "summary": response.response,
+            "citation_metadata": response.source_nodes,
+            "factual_consistency_score": response.metadata["fcs"]
+            if "fcs" in response.metadata
+            else 0.0,
+        }
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml
new file mode 100644
index 0000000000000..ec79986d10871
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml
@@ -0,0 +1,55 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.tools.vectara_query"
+
+[tool.llamahub.class_authors]
+VectaraQueryToolSpec = "david-oplatka"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["David Oplatka <david.oplatka@vectara.com>"]
+description = "llama-index tools vectara query integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-tools-vectara-query"
+packages = [{include = "llama_index/"}]
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.11.0"
+llama-index-indices-managed-vectara = "^0.2.3"
+
+[tool.poetry.group.dev.dependencies]
+black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
+codespell = {extras = ["toml"], version = ">=v2.2.6"}
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"  # TODO: unpin when mypy>0.991
+types-setuptools = "67.1.0.0"
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/BUILD b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/BUILD
new file mode 100644
index 0000000000000..dabf212d7e716
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/__init__.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py
new file mode 100644
index 0000000000000..46310e6f011eb
--- /dev/null
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py
@@ -0,0 +1,318 @@
+from typing import List
+from llama_index.core.schema import Document
+from llama_index.indices.managed.vectara import VectaraIndex
+from llama_index.core.tools.tool_spec.base import BaseToolSpec
+from llama_index.tools.vectara_query import VectaraQueryToolSpec
+from llama_index.agent.openai import OpenAIAgent
+
+import pytest
+import re
+
+#
+# For this test to run properly, please setup as follows:
+# 1. Create a Vectara account: sign up at https://console.vectara.com/signup
+# 2. Create a corpus in your Vectara account, with the following filter attributes:
+#   a. doc.test_num (text)
+#   b. doc.test_score (integer)
+#   c. doc.date (text)
+#   d. doc.url (text)
+# 3. Create an API_KEY for this corpus with permissions for query and indexing
+# 4. Setup environment variables:
+#    VECTARA_API_KEY, VECTARA_CORPUS_ID, VECTARA_CUSTOMER_ID, and OPENAI_API_KEY
+#
+# Note: In order to run test_citations, you will need a Scale account.
+#
+
+
+def test_class():
+    names_of_base_classes = [b.__name__ for b in VectaraQueryToolSpec.__mro__]
+    assert BaseToolSpec.__name__ in names_of_base_classes
+
+
+def get_docs() -> List[Document]:
+    inputs = [
+        {
+            "text": "This is test text for Vectara integration with LlamaIndex",
+            "metadata": {"test_num": "1", "test_score": 10, "date": "2020-02-25"},
+        },
+        {
+            "text": "And now for something completely different",
+            "metadata": {"test_num": "2", "test_score": 2, "date": "2015-10-13"},
+        },
+        {
+            "text": "when 900 years you will be, look as good you will not",
+            "metadata": {"test_num": "3", "test_score": 20, "date": "2023-09-12"},
+        },
+        {
+            "text": "when 850 years you will be, look as good you will not",
+            "metadata": {"test_num": "4", "test_score": 50, "date": "2022-01-01"},
+        },
+    ]
+    docs: List[Document] = []
+    for inp in inputs:
+        doc = Document(
+            text=str(inp["text"]),
+            metadata=inp["metadata"],
+        )
+        docs.append(doc)
+    return docs
+
+
+@pytest.fixture()
+def vectara1():
+    docs = get_docs()
+    try:
+        vectara1 = VectaraIndex.from_documents(docs)
+    except ValueError:
+        pytest.skip("Missing Vectara credentials, skipping test")
+
+    yield vectara1
+
+    # Tear down code
+    for id in vectara1.doc_ids:
+        vectara1._delete_doc(id)
+
+
+def test_simple_retrieval(vectara1) -> None:
+    docs = get_docs()
+    tool_spec = VectaraQueryToolSpec(num_results=1)
+    res = tool_spec.semantic_search("Find me something different.")
+    assert len(res) == 1
+    assert res[0]["text"] == docs[1].text
+
+
+def test_mmr_retrieval(vectara1) -> None:
+    docs = get_docs()
+
+    # test with diversity bias = 0
+    tool_spec = VectaraQueryToolSpec(
+        num_results=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="mmr",
+        rerank_k=10,
+        mmr_diversity_bias=0.0,
+    )
+    res = tool_spec.semantic_search("How will I look?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[2].text
+    assert res[1]["text"] == docs[3].text
+
+    # test with diversity bias = 1
+    tool_spec = VectaraQueryToolSpec(
+        num_results=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="mmr",
+        rerank_k=10,
+        mmr_diversity_bias=1.0,
+    )
+    res = tool_spec.semantic_search("How will I look?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[2].text
+
+
+def test_retrieval_with_filter(vectara1) -> None:
+    docs = get_docs()
+
+    tool_spec = VectaraQueryToolSpec(
+        num_results=1, metadata_filter="doc.test_num = '1'"
+    )
+    res = tool_spec.semantic_search("What does this test?")
+    assert len(res) == 1
+    assert res[0]["text"] == docs[0].text
+
+
+def test_udf_retrieval(vectara1) -> None:
+    docs = get_docs()
+
+    # test with basic math expression
+    tool_spec = VectaraQueryToolSpec(
+        num_results=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="udf",
+        udf_expression="get('$.score') + get('$.document_metadata.test_score')",
+    )
+
+    res = tool_spec.semantic_search("What will the future look like?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[3].text
+    assert res[1]["text"] == docs[2].text
+
+    # test with dates: Weight of score subtracted by number of years from current date
+    tool_spec = VectaraQueryToolSpec(
+        num_results=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="udf",
+        udf_expression="max(0, 5 * get('$.score') - (to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.date'), 'yyyy-MM-dd'))) / 31536000)",
+    )
+
+    res = tool_spec.semantic_search("What will the future look like?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[2].text
+    assert res[1]["text"] == docs[3].text
+
+
+def test_chain_rerank_retrieval(vectara1) -> None:
+    docs = get_docs()
+
+    # Test basic chain
+    tool_spec = VectaraQueryToolSpec(
+        num_results=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="chain",
+        rerank_chain=[{"type": "slingshot"}, {"type": "mmr", "diversity_bias": 0.4}],
+    )
+
+    res = tool_spec.semantic_search("What's this all about?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[0].text
+
+    # Test chain with UDF and limit
+    tool_spec = VectaraQueryToolSpec(
+        num_results=4,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="chain",
+        rerank_chain=[
+            {"type": "slingshot"},
+            {"type": "mmr"},
+            {
+                "type": "udf",
+                "user_function": "5 * get('$.score') + get('$.document_metadata.test_score') / 2",
+                "limit": 2,
+            },
+        ],
+    )
+
+    res = tool_spec.semantic_search("What's this all about?")
+    assert len(res) == 2
+    assert res[0]["text"] == docs[3].text
+    assert res[1]["text"] == docs[2].text
+
+    # Test chain with cutoff
+    tool_spec = VectaraQueryToolSpec(
+        num_results=4,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="chain",
+        rerank_chain=[
+            {"type": "slingshot"},
+            {"type": "mmr", "diversity_bias": 0.4, "cutoff": 0.75},
+        ],
+    )
+
+    res = tool_spec.semantic_search("What's this all about?")
+    assert len(res) == 1
+    assert res[0]["text"] == docs[0].text
+
+
+@pytest.fixture()
+def vectara2():
+    try:
+        vectara2 = VectaraIndex()
+    except ValueError:
+        pytest.skip("Missing Vectara credentials, skipping test")
+
+    file_path = "docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
+    id = vectara2.insert_file(
+        file_path, metadata={"url": "https://www.paulgraham.com/worked.html"}
+    )
+
+    yield vectara2
+
+    # Tear down code
+    vectara2._delete_doc(id)
+
+
+def test_basic_rag_query(vectara2) -> None:
+    # test query with Vectara summarization (default)
+    tool_spec = VectaraQueryToolSpec(num_results=3)
+    res = tool_spec.rag_query("What software did Paul Graham write?")
+    assert (
+        "paul graham" in res["summary"].lower() and "software" in res["summary"].lower()
+    )
+    assert "factual_consistency_score" in res
+    assert res["factual_consistency_score"] >= 0
+
+    res = tool_spec.rag_query("How is Paul related to Reddit?")
+    summary = res["summary"]
+    assert "paul graham" in summary.lower() and "reddit" in summary.lower()
+    assert "https://www.paulgraham.com/worked.html" in str(res["citation_metadata"])
+
+
+def test_citations(vectara2) -> None:
+    # test markdown citations
+    tool_spec = VectaraQueryToolSpec(
+        num_results=10,
+        summary_num_results=7,
+        summarizer_prompt_name="vectara-summary-ext-24-05-med-omni",
+        citations_style="markdown",
+        citations_url_pattern="{doc.url}",
+        citations_text_pattern="(source)",
+    )
+    res = tool_spec.rag_query("What colleges has Paul attended?")
+    summary = res["summary"]
+    assert "(source)" in summary
+    assert "https://www.paulgraham.com/worked.html" in summary
+
+    # test numeric citations
+    tool_spec = VectaraQueryToolSpec(
+        num_results=10,
+        summary_num_results=7,
+        summarizer_prompt_name="mockingbird-1.0-2024-07-16",
+        citations_style="numeric",
+    )
+    res = tool_spec.rag_query("What colleges has Paul attended?")
+    summary = res["summary"]
+    assert re.search(r"\[\d+\]", summary)
+
+
+def test_agent_basic(vectara2) -> None:
+    tool_spec = VectaraQueryToolSpec(num_results=10, reranker="slingshot")
+    agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
+    res = agent.chat("What software did Paul Graham write?").response
+    agent_tasks = agent.get_completed_tasks()
+    tool_called = (
+        agent_tasks[0]
+        .memory.chat_store.store["chat_history"][1]
+        .additional_kwargs["tool_calls"][0]
+        .function.name
+    )
+    assert tool_called in ["semantic_search", "rag_query"]
+    assert "paul graham" in res.lower() and "software" in res.lower()
+
+    tool_spec = VectaraQueryToolSpec(num_results=10, reranker="mmr")
+    agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
+    res = agent.chat("Please summarize Paul Graham's work").response
+    agent_tasks = agent.get_completed_tasks()
+    tool_called = (
+        agent_tasks[0]
+        .memory.chat_store.store["chat_history"][1]
+        .additional_kwargs["tool_calls"][0]
+        .function.name
+    )
+    assert tool_called == "rag_query"
+    assert "bel" in res.lower() and "lisp" in res.lower()
+
+
+def test_agent_filter(vectara1) -> None:
+    tool_spec = VectaraQueryToolSpec(
+        num_results=1, metadata_filter="doc.date > '2022-02-01'"
+    )
+
+    agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
+
+    res = agent.chat("How will I look when I am much older compared to now?").response
+    agent_tasks = agent.get_completed_tasks()
+    tool_called = (
+        agent_tasks[0]
+        .memory.chat_store.store["chat_history"][1]
+        .additional_kwargs["tool_calls"][0]
+        .function.name
+    )
+    assert tool_called in ["semantic_search", "rag_query"]
+    assert "you" in res.lower()
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/azureaisearch_utils.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/azureaisearch_utils.py
new file mode 100644
index 0000000000000..2639acd465950
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/azureaisearch_utils.py
@@ -0,0 +1,118 @@
+from typing import Any, Dict, List, Optional, Tuple
+from llama_index.core.schema import BaseNode, TextNode
+from llama_index.core.vector_stores.utils import (
+    metadata_dict_to_node,
+    legacy_metadata_dict_to_node,
+)
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def create_node_from_result(
+    result: Dict[str, Any], field_mapping: Dict[str, str]
+) -> BaseNode:
+    """Create a node from a search result.
+
+    Args:
+        result (Dict[str, Any]): Search result dictionary
+        field_mapping (Dict[str, str]): Field mapping dictionary
+
+    Returns:
+        BaseNode: Created node
+    """
+    metadata_str = result[field_mapping["metadata"]]
+    metadata = json.loads(metadata_str) if metadata_str else {}
+
+    try:
+        node = metadata_dict_to_node(metadata)
+        node.set_content(result[field_mapping["chunk"]])
+        node.embedding = result.get(field_mapping["embedding"])
+    except Exception:
+        # NOTE: deprecated legacy logic for backward compatibility
+        metadata, node_info, relationships = legacy_metadata_dict_to_node(metadata)
+
+        node = TextNode(
+            text=result[field_mapping["chunk"]],
+            id_=result[field_mapping["id"]],
+            metadata=metadata,
+            start_char_idx=node_info.get("start", None),
+            end_char_idx=node_info.get("end", None),
+            relationships=relationships,
+        )
+        if field_mapping.get("embedding"):
+            node.embedding = result.get(field_mapping["embedding"])
+
+    return node
+
+
+def process_batch_results(
+    batch_nodes: List[BaseNode],
+    nodes: List[BaseNode],
+    batch_size: int,
+    limit: Optional[int] = None,
+) -> Tuple[List[BaseNode], bool]:
+    """Process batch results and determine if we should continue fetching.
+
+    Args:
+        batch_nodes (List[BaseNode]): Current batch of nodes
+        nodes (List[BaseNode]): Accumulated nodes
+        batch_size (int): Size of each batch
+        limit (Optional[int]): Maximum number of nodes to retrieve
+
+    Returns:
+        Tuple[List[BaseNode], bool]: Updated nodes list and whether to continue fetching
+    """
+    if not batch_nodes:
+        return nodes, False
+
+    nodes.extend(batch_nodes)
+
+    # If we've hit the requested limit, stop
+    if limit and len(nodes) >= limit:
+        return nodes[:limit], False
+
+    # If we got less than batch_size results, we've hit the end
+    if len(batch_nodes) < batch_size:
+        return nodes, False
+
+    return nodes, True
+
+
+def create_search_request(
+    field_mapping: Dict[str, str],
+    filter_str: Optional[str],
+    batch_size: int,
+    offset: int,
+) -> Dict[str, Any]:
+    """Create a search request dictionary.
+
+    Args:
+        field_mapping (Dict[str, str]): Field mapping dictionary
+        filter_str (Optional[str]): OData filter string
+        batch_size (int): Size of batch to retrieve
+        offset (int): Number of results to skip
+
+    Returns:
+        Dict[str, Any]: Search request parameters
+    """
+    return {
+        "search_text": "*",
+        "filter": filter_str,
+        "top": batch_size,
+        "skip": offset,
+        "select": list(field_mapping.values()),
+    }
+
+
+def handle_search_error(e: Exception) -> None:
+    """Handle search errors by logging them appropriately.
+
+    Args:
+        e (Exception): The exception that occurred
+    """
+    if isinstance(e, ValueError):
+        logger.error(f"Invalid search parameters: {e}")
+    else:
+        logger.error(f"Error during search operation: {e}")
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py
index 1af492d2c4973..c0da5d1812a22 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py
@@ -28,6 +28,12 @@
     metadata_dict_to_node,
     node_to_metadata_dict,
 )
+from llama_index.vector_stores.azureaisearch.azureaisearch_utils import (
+    create_node_from_result,
+    process_batch_results,
+    create_search_request,
+    handle_search_error,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -98,6 +104,7 @@ class AzureAISearchVectorStore(BasePydanticVectorStore):
         vector_store = AzureAISearchVectorStore(
             search_or_index_client=index_client,
             filterable_metadata_field_keys=metadata_fields,
+            hidden_field_keys=["embedding"],
             index_name=index_name,
             index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
             id_field_key="id",
@@ -122,6 +129,7 @@ class AzureAISearchVectorStore(BasePydanticVectorStore):
     _async_search_client: AsyncSearchClient = PrivateAttr()
     _embedding_dimensionality: int = PrivateAttr()
     _language_analyzer: str = PrivateAttr()
+    _hidden_field_keys: List[str] = PrivateAttr()
     _field_mapping: Dict[str, str] = PrivateAttr()
     _index_management: IndexManagement = PrivateAttr()
     _index_mapping: Callable[
@@ -223,7 +231,12 @@ def _create_metadata_index_fields(self) -> List[Any]:
             elif field_type == MetadataIndexFieldType.COLLECTION:
                 index_field_type = "Collection(Edm.String)"
 
-            field = SimpleField(name=field_name, type=index_field_type, filterable=True)
+            field = SimpleField(
+                name=field_name,
+                type=index_field_type,
+                filterable=True,
+                hidden=field_name in self._hidden_field_keys,
+            )
             index_fields.append(field)
 
         return index_fields
@@ -273,11 +286,18 @@ def _create_index(self, index_name: Optional[str]) -> None:
 
         logger.info(f"Configuring {index_name} fields for Azure AI Search")
         fields = [
-            SimpleField(name=self._field_mapping["id"], type="Edm.String", key=True),
+            SimpleField(
+                name=self._field_mapping["id"],
+                type="Edm.String",
+                key=True,
+                filterable=True,
+                hidden=self._field_mapping["id"] in self._hidden_field_keys,
+            ),
             SearchableField(
                 name=self._field_mapping["chunk"],
                 type="Edm.String",
                 analyzer_name=self._language_analyzer,
+                hidden=self._field_mapping["chunk"] in self._hidden_field_keys,
             ),
             SearchField(
                 name=self._field_mapping["embedding"],
@@ -285,10 +305,18 @@ def _create_index(self, index_name: Optional[str]) -> None:
                 searchable=True,
                 vector_search_dimensions=self._embedding_dimensionality,
                 vector_search_profile_name=self._vector_profile_name,
+                hidden=self._field_mapping["embedding"] in self._hidden_field_keys,
             ),
-            SimpleField(name=self._field_mapping["metadata"], type="Edm.String"),
             SimpleField(
-                name=self._field_mapping["doc_id"], type="Edm.String", filterable=True
+                name=self._field_mapping["metadata"],
+                type="Edm.String",
+                hidden=self._field_mapping["metadata"] in self._hidden_field_keys,
+            ),
+            SimpleField(
+                name=self._field_mapping["doc_id"],
+                type="Edm.String",
+                filterable=True,
+                hidden=self._field_mapping["doc_id"] in self._hidden_field_keys,
             ),
         ]
         logger.info(f"Configuring {index_name} metadata fields")
@@ -391,6 +419,7 @@ async def _acreate_index(self, index_name: Optional[str]) -> None:
                 name=self._field_mapping["chunk"],
                 type="Edm.String",
                 analyzer_name=self._language_analyzer,
+                hidden=self._field_mapping["chunk"] in self._hidden_field_keys,
             ),
             SearchField(
                 name=self._field_mapping["embedding"],
@@ -398,10 +427,18 @@ async def _acreate_index(self, index_name: Optional[str]) -> None:
                 searchable=True,
                 vector_search_dimensions=self._embedding_dimensionality,
                 vector_search_profile_name=self._vector_profile_name,
+                hidden=self._field_mapping["embedding"] in self._hidden_field_keys,
             ),
-            SimpleField(name=self._field_mapping["metadata"], type="Edm.String"),
             SimpleField(
-                name=self._field_mapping["doc_id"], type="Edm.String", filterable=True
+                name=self._field_mapping["metadata"],
+                type="Edm.String",
+                hidden=self._field_mapping["metadata"] in self._hidden_field_keys,
+            ),
+            SimpleField(
+                name=self._field_mapping["doc_id"],
+                type="Edm.String",
+                filterable=True,
+                hidden=self._field_mapping["doc_id"] in self._hidden_field_keys,
             ),
         ]
         logger.info(f"Configuring {index_name} metadata fields")
@@ -468,6 +505,7 @@ async def _acreate_index(self, index_name: Optional[str]) -> None:
             semantic_search=semantic_search,
         )
         logger.debug(f"Creating {index_name} search index")
+
         await self._async_index_client.create_index(index)
 
     def _validate_index(self, index_name: Optional[str]) -> None:
@@ -497,6 +535,7 @@ def __init__(
                 Dict[str, Tuple[str, MetadataIndexFieldType]],
             ]
         ] = None,
+        hidden_field_keys: Optional[List[str]] = None,
         index_name: Optional[str] = None,
         index_mapping: Optional[
             Callable[[Dict[str, str], Dict[str, Any]], Dict[str, str]]
@@ -530,6 +569,10 @@ def __init__(
                 as separate fields in the index, use filterable_metadata_field_keys
                 to specify the metadata values that should be stored in these filterable fields
             doc_id_field_key (str): Index field storing doc_id
+            hidden_field_keys (List[str]):
+                List of index fields that should be hidden from the client.
+                This is useful for fields that are not needed for retrieving,
+                but are used for similarity search, like the embedding field.
             index_mapping:
                 Optional function with definition
                 (enriched_doc: Dict[str, str], metadata: Dict[str, Any]): Dict[str,str]
@@ -683,6 +726,7 @@ def __init__(
         }
 
         self._field_mapping = field_mapping
+        self._hidden_field_keys = hidden_field_keys or []
 
         self._index_mapping = (
             self._default_index_mapping if index_mapping is None else index_mapping
@@ -1144,53 +1188,127 @@ async def aquery(
             )
         return await azure_query_result_search.asearch()
 
+    def _build_filter_str(
+        self,
+        field_mapping: Dict[str, str],
+        node_ids: Optional[List[str]] = None,
+        filters: Optional[MetadataFilters] = None,
+    ) -> Optional[str]:
+        """Build OData filter string from node IDs and metadata filters.
+
+        Args:
+            field_mapping (Dict[str, str]): Field mapping dictionary
+            node_ids (Optional[List[str]]): List of node IDs to filter by
+            filters (Optional[MetadataFilters]): Metadata filters to apply
+
+        Returns:
+            Optional[str]: OData filter string or None if no filters
+        """
+        filter_str = None
+        if node_ids is not None:
+            filter_str = " or ".join(
+                [f"{field_mapping['id']} eq '{node_id}'" for node_id in node_ids]
+            )
+
+        if filters is not None:
+            metadata_filter = self._create_odata_filter(filters)
+            if filter_str is not None:
+                filter_str = f"({filter_str}) or ({metadata_filter})"
+            else:
+                filter_str = metadata_filter
+
+        return filter_str
+
     def get_nodes(
         self,
         node_ids: Optional[List[str]] = None,
         filters: Optional[MetadataFilters] = None,
+        limit: Optional[int] = None,
     ) -> List[BaseNode]:
-        """
-        Get nodes from the index.
+        """Get nodes from the Azure AI Search index.
 
         Args:
             node_ids (Optional[List[str]]): List of node IDs to retrieve.
             filters (Optional[MetadataFilters]): Metadata filters to apply.
+            limit (Optional[int]): Maximum number of nodes to retrieve.
 
         Returns:
             List[BaseNode]: List of nodes retrieved from the index.
         """
-        odata_filter = (
-            self._create_odata_filter(filters) if filters is not None else None
-        )
-        results = self._search_client.search(filter=odata_filter)
+        if not self._search_client:
+            raise ValueError("Search client not initialized")
 
-        # Converting results to List of BaseNodes
-        node_results = []
-        for result in results:
-            node_id = result[self._field_mapping["id"]]
-            metadata_str = result[self._field_mapping["metadata"]]
-            metadata = json.loads(metadata_str) if metadata_str else {}
-            chunk = result[self._field_mapping["chunk"]]
+        filter_str = self._build_filter_str(self._field_mapping, node_ids, filters)
+        nodes = []
+        batch_size = 1000  # Azure Search batch size limit
+
+        while True:
             try:
-                node = metadata_dict_to_node(metadata)
-                node.set_content(chunk)
-            except Exception:
-                # NOTE: deprecated legacy logic for backward compatibility
-                metadata, node_info, relationships = legacy_metadata_dict_to_node(
-                    metadata
+                search_request = create_search_request(
+                    self._field_mapping, filter_str, batch_size, len(nodes)
                 )
-                node = TextNode(
-                    text=chunk,
-                    id_=node_id,
-                    metadata=metadata,
-                    start_char_idx=node_info.get("start", None),
-                    end_char_idx=node_info.get("end", None),
-                    relationships=relationships,
+                results = self._search_client.search(**search_request)
+            except Exception as e:
+                handle_search_error(e)
+                break
+
+            batch_nodes = [
+                create_node_from_result(result, self._field_mapping)
+                for result in results
+            ]
+
+            nodes, continue_fetching = process_batch_results(
+                batch_nodes, nodes, batch_size, limit
+            )
+            if not continue_fetching:
+                break
+
+        return nodes
+
+    async def aget_nodes(
+        self,
+        node_ids: Optional[List[str]] = None,
+        filters: Optional[MetadataFilters] = None,
+        limit: Optional[int] = None,
+    ) -> List[BaseNode]:
+        """Get nodes asynchronously from the Azure AI Search index.
+
+        Args:
+            node_ids (Optional[List[str]]): List of node IDs to retrieve.
+            filters (Optional[MetadataFilters]): Metadata filters to apply.
+            limit (Optional[int]): Maximum number of nodes to retrieve.
+
+        Returns:
+            List[BaseNode]: List of nodes retrieved from the index.
+        """
+        if not self._async_search_client:
+            raise ValueError("Async Search client not initialized")
+
+        filter_str = self._build_filter_str(self._field_mapping, node_ids, filters)
+        nodes = []
+        batch_size = 1000  # Azure Search batch size limit
+
+        while True:
+            try:
+                search_request = create_search_request(
+                    self._field_mapping, filter_str, batch_size, len(nodes)
                 )
+                results = await self._async_search_client.search(**search_request)
+            except Exception as e:
+                handle_search_error(e)
+                break
+
+            batch_nodes = []
+            async for result in results:
+                batch_nodes.append(create_node_from_result(result, self._field_mapping))
 
-            node_results.append(node)
+            nodes, continue_fetching = process_batch_results(
+                batch_nodes, nodes, batch_size, limit
+            )
+            if not continue_fetching:
+                break
 
-        return node_results
+        return nodes
 
 
 class AzureQueryResultSearchBase:
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml
index 8bd52c4f83f4e..f547193e82460 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml
@@ -28,7 +28,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-vector-stores-azureaisearch"
 readme = "README.md"
-version = "0.2.3"
+version = "0.2.6"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_azureaisearch.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_azureaisearch.py
index a7c607bd37bb5..8ec813c227362 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_azureaisearch.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_azureaisearch.py
@@ -34,6 +34,7 @@ def create_mock_vector_store(
         metadata_string_field_key="metadata",
         doc_id_field_key="doc_id",
         filterable_metadata_field_keys=[],  # Added to match the updated constructor
+        hidden_field_keys=["embedding"],
         index_name=index_name,
         index_management=index_management,
         embedding_dimensionality=2,  # Assuming a dimensionality of 2 for simplicity
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py
index d871c1652edb9..8a3d953da9773 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py
@@ -255,7 +255,7 @@ def get_nodes(
         if filters:
             where = _to_chroma_filter(filters)
         else:
-            where = {}
+            where = None
 
         result = self._get(None, where=where, ids=node_ids)
 
@@ -332,7 +332,7 @@ def delete_nodes(
         if filters:
             where = _to_chroma_filter(filters)
         else:
-            where = {}
+            where = None
 
         self._collection.delete(ids=node_ids, where=where)
 
@@ -363,7 +363,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
                 )
             where = _to_chroma_filter(query.filters)
         else:
-            where = kwargs.pop("where", {})
+            where = kwargs.pop("where", None)
 
         if not query.query_embedding:
             return self._get(limit=query.similarity_top_k, where=where, **kwargs)
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/pyproject.toml
index e260fdb93501d..15276dadbcddd 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/pyproject.toml
@@ -27,11 +27,11 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-vector-stores-chroma"
 readme = "README.md"
-version = "0.2.1"
+version = "0.3.0"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
-chromadb = ">=0.4.0,<0.6.0,!=0.5.4,!=0.5.7,!=0.5.9,!=0.5.10,!=0.5.11,!=0.5.12"
+chromadb = ">=0.5.17"
 llama-index-core = "^0.11.0"
 
 [tool.poetry.group.dev.dependencies]
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/CHANGELOG.md b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/CHANGELOG.md
index 8061e7bad3b9b..45aa83b36cf51 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/CHANGELOG.md
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/CHANGELOG.md
@@ -1,5 +1,9 @@
 # CHANGELOG — llama-index-vector-stores-opensearch
 
+## [0.4.1]
+
+- Added ability to create OpensearchVectorClient with custom os_async_client (like os_client)
+
 ## [0.2.2]
 
 - Fixed issue where Opensearch Serverless does not support painless scripting so handling the case where is_aoss is set and using knn_score script instead.
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/llama_index/vector_stores/opensearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/llama_index/vector_stores/opensearch/base.py
index c9c051213ffe6..034197cb484c1 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/llama_index/vector_stores/opensearch/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/llama_index/vector_stores/opensearch/base.py
@@ -56,6 +56,8 @@ class OpensearchVectorClient:
         settings: Optional[dict]: Settings for the Opensearch index creation. Defaults to:
             {"index": {"knn": True, "knn.algo_param.ef_search": 100}}
         space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
+        os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
+        os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
         **kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
 
     """
@@ -74,6 +76,7 @@ def __init__(
         max_chunk_bytes: int = 1 * 1024 * 1024,
         search_pipeline: Optional[str] = None,
         os_client: Optional[OSClient] = None,
+        os_async_client: Optional[OSClient] = None,
         **kwargs: Any,
     ):
         """Init params."""
@@ -117,7 +120,7 @@ def __init__(
         self._os_client = os_client or self._get_opensearch_client(
             self._endpoint, **kwargs
         )
-        self._os_async_client = self._get_async_opensearch_client(
+        self._os_async_client = os_async_client or self._get_async_opensearch_client(
             self._endpoint, **kwargs
         )
         self._os_version = self._get_opensearch_version()
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/pyproject.toml
index f6e13baf97a63..2bd43658d6d61 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-vector-stores-opensearch"
 readme = "README.md"
-version = "0.4.0"
+version = "0.4.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py
index 9a16dc052093b..4b18dc03ed39d 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py
@@ -351,6 +351,7 @@ def get_nodes(
             collection_name=self.collection_name,
             limit=limit or 9999,
             scroll_filter=filter,
+            with_vectors=True,
         )
 
         return self.parse_to_query_result(response[0]).nodes
@@ -396,6 +397,7 @@ async def aget_nodes(
             collection_name=self.collection_name,
             limit=limit or 9999,
             scroll_filter=filter,
+            with_vectors=True,
         )
 
         return self.parse_to_query_result(response[0]).nodes
@@ -991,8 +993,19 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult:
 
         for point in response:
             payload = cast(Payload, point.payload)
+            vector = point.vector
+            embedding = None
+
+            if isinstance(vector, dict):
+                embedding = vector.get(DENSE_VECTOR_NAME, vector.get("", None))
+            elif isinstance(vector, list):
+                embedding = vector
+
             try:
                 node = metadata_dict_to_node(payload)
+
+                if embedding and node.embedding is None:
+                    node.embedding = embedding
             except Exception:
                 metadata, node_info, relationships = legacy_metadata_dict_to_node(
                     payload
@@ -1005,6 +1018,7 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult:
                     start_char_idx=node_info.get("start", None),
                     end_char_idx=node_info.get("end", None),
                     relationships=relationships,
+                    embedding=embedding,
                 )
             nodes.append(node)
             ids.append(str(point.id))
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml
index 9aaa7783df6a6..0ee29d31327f0 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-vector-stores-qdrant"
 readme = "README.md"
-version = "0.3.2"
+version = "0.3.3"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.13"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py
index 2cca8ac3c90c2..7526cee699007 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py
@@ -2,6 +2,8 @@
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 import pytest
 
+from qdrant_client.http.models import PointsList, PointStruct
+
 
 def test_class():
     names_of_base_classes = [b.__name__ for b in QdrantVectorStore.__mro__]
@@ -58,3 +60,46 @@ async def test_aclear(vector_store: QdrantVectorStore) -> None:
                 "33333333-3333-3333-3333-333333333333",
             ]
         )
+
+
+def test_parse_query_result(vector_store: QdrantVectorStore) -> None:
+    payload = {
+        "text": "Hello, world!",
+    }
+
+    vector_dict = {
+        "": [1, 2, 3],
+    }
+
+    # test vector name is empty (default)
+    points = PointsList(points=[PointStruct(id=1, vector=vector_dict, payload=payload)])
+
+    results = vector_store.parse_to_query_result(list(points.points))
+
+    assert len(results.nodes) == 1
+    assert results.nodes[0].embedding == [1, 2, 3]
+
+    # test vector name is not empty
+    vector_dict = {
+        "text-dense": [1, 2, 3],
+    }
+
+    points = PointsList(points=[PointStruct(id=1, vector=vector_dict, payload=payload)])
+
+    results = vector_store.parse_to_query_result(list(points.points))
+
+    assert len(results.nodes) == 1
+    assert results.nodes[0].embedding == [1, 2, 3]
+
+
+@pytest.mark.asyncio()
+async def test_get_with_embedding(vector_store: QdrantVectorStore) -> None:
+    existing_nodes = await vector_store.aget_nodes(
+        node_ids=[
+            "11111111-1111-1111-1111-111111111111",
+            "22222222-2222-2222-2222-222222222222",
+            "33333333-3333-3333-3333-333333333333",
+        ]
+    )
+
+    assert all(node.embedding is not None for node in existing_nodes)
diff --git a/llama-index-packs/llama-index-packs-zenguard/README.md b/llama-index-packs/llama-index-packs-zenguard/README.md
index e0174c3536b84..f91763b8a0b4f 100644
--- a/llama-index-packs/llama-index-packs-zenguard/README.md
+++ b/llama-index-packs/llama-index-packs-zenguard/README.md
@@ -7,7 +7,6 @@ This LlamaPack lets you quickly set up [ZenGuard AI](https://www.zenguard.ai/) i
 - Prompts Attacks
 - Veering of the pre-defined topics
 - PII, sensitive info, and keywords leakage.
-- Toxicity
 - Etc.
 
 Please, also check out our [open-source Python Client](https://github.com/ZenGuard-AI/fast-llm-security-guardrails?tab=readme-ov-file) for more inspiration.
@@ -120,4 +119,3 @@ zenguard = pack.get_modules()["zenguard"]
 - [Detect Banned Topics](https://docs.zenguard.ai/detectors/banned-topics/)
 - [Detect Keywords](https://docs.zenguard.ai/detectors/keywords/)
 - [Detect Secrets](https://docs.zenguard.ai/detectors/secrets/)
-- [Detect Toxicity](https://docs.zenguard.ai/detectors/toxicity/)
diff --git a/llama-index-packs/llama-index-packs-zenguard/examples/zenguard.ipynb b/llama-index-packs/llama-index-packs-zenguard/examples/zenguard.ipynb
index 0ebde59e463cf..442423add13f2 100644
--- a/llama-index-packs/llama-index-packs-zenguard/examples/zenguard.ipynb
+++ b/llama-index-packs/llama-index-packs-zenguard/examples/zenguard.ipynb
@@ -13,7 +13,6 @@
     " * Prompts Attacks\n",
     " * Veering of the pre-defined topics\n",
     " * PII, sensitive info, and keywords leakage.\n",
-    " * Toxicity\n",
     " * Etc.\n",
     "\n",
     " Please, also check out our [open-source Python Client](https://github.com/ZenGuard-AI/fast-llm-security-guardrails?tab=readme-ov-file) for more inspiration.\n",
@@ -167,8 +166,7 @@
     " * [Detect Allowed Topics](https://docs.zenguard.ai/detectors/allowed-topics/)\n",
     " * [Detect Banned Topics](https://docs.zenguard.ai/detectors/banned-topics/)\n",
     " * [Detect Keywords](https://docs.zenguard.ai/detectors/keywords/)\n",
-    " * [Detect Secrets](https://docs.zenguard.ai/detectors/secrets/)\n",
-    " * [Detect Toxicity](https://docs.zenguard.ai/detectors/toxicity/)"
+    " * [Detect Secrets](https://docs.zenguard.ai/detectors/secrets/)"
    ]
   }
  ],
diff --git a/llama-index-packs/llama-index-packs-zenguard/tests/test_packs_zenguard.py b/llama-index-packs/llama-index-packs-zenguard/tests/test_packs_zenguard.py
index 93607622a5cad..7da9084b41d75 100644
--- a/llama-index-packs/llama-index-packs-zenguard/tests/test_packs_zenguard.py
+++ b/llama-index-packs/llama-index-packs-zenguard/tests/test_packs_zenguard.py
@@ -65,14 +65,6 @@ def test_secrets(zenguard_pack):
     assert response["is_detected"] is False
 
 
-# TODO: toxcity endpoint is down
-# def test_toxicity(zenguard_pack):
-#     prompt = "Simple toxicity test"
-#     detectors = [Detector.TOXICITY]
-#     response = zenguard_pack.run(detectors=detectors, prompt=prompt)
-#     assert response["is_detected"] is False
-
-
 def test_get_modules(zenguard_pack):
     modules = zenguard_pack.get_modules()
     assert isinstance(modules, Dict)
diff --git a/poetry.lock b/poetry.lock
index f4a2a2271cd88..f5923d61de7df 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1425,84 +1425,84 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jiter"
-version = "0.6.1"
+version = "0.7.0"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "jiter-0.6.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:d08510593cb57296851080018006dfc394070178d238b767b1879dc1013b106c"},
-    {file = "jiter-0.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adef59d5e2394ebbad13b7ed5e0306cceb1df92e2de688824232a91588e77aa7"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3e02f7a27f2bcc15b7d455c9df05df8ffffcc596a2a541eeda9a3110326e7a3"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed69a7971d67b08f152c17c638f0e8c2aa207e9dd3a5fcd3cba294d39b5a8d2d"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2019d966e98f7c6df24b3b8363998575f47d26471bfb14aade37630fae836a1"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36c0b51a285b68311e207a76c385650322734c8717d16c2eb8af75c9d69506e7"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:220e0963b4fb507c525c8f58cde3da6b1be0bfddb7ffd6798fb8f2531226cdb1"},
-    {file = "jiter-0.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa25c7a9bf7875a141182b9c95aed487add635da01942ef7ca726e42a0c09058"},
-    {file = "jiter-0.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e90552109ca8ccd07f47ca99c8a1509ced93920d271bb81780a973279974c5ab"},
-    {file = "jiter-0.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:67723a011964971864e0b484b0ecfee6a14de1533cff7ffd71189e92103b38a8"},
-    {file = "jiter-0.6.1-cp310-none-win32.whl", hash = "sha256:33af2b7d2bf310fdfec2da0177eab2fedab8679d1538d5b86a633ebfbbac4edd"},
-    {file = "jiter-0.6.1-cp310-none-win_amd64.whl", hash = "sha256:7cea41c4c673353799906d940eee8f2d8fd1d9561d734aa921ae0f75cb9732f4"},
-    {file = "jiter-0.6.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b03c24e7da7e75b170c7b2b172d9c5e463aa4b5c95696a368d52c295b3f6847f"},
-    {file = "jiter-0.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47fee1be677b25d0ef79d687e238dc6ac91a8e553e1a68d0839f38c69e0ee491"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f0d2f6e01a8a0fb0eab6d0e469058dab2be46ff3139ed2d1543475b5a1d8e7"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b809e39e342c346df454b29bfcc7bca3d957f5d7b60e33dae42b0e5ec13e027"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e9ac7c2f092f231f5620bef23ce2e530bd218fc046098747cc390b21b8738a7a"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e51a2d80d5fe0ffb10ed2c82b6004458be4a3f2b9c7d09ed85baa2fbf033f54b"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3343d4706a2b7140e8bd49b6c8b0a82abf9194b3f0f5925a78fc69359f8fc33c"},
-    {file = "jiter-0.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82521000d18c71e41c96960cb36e915a357bc83d63a8bed63154b89d95d05ad1"},
-    {file = "jiter-0.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c843e7c1633470708a3987e8ce617ee2979ee18542d6eb25ae92861af3f1d62"},
-    {file = "jiter-0.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a2e861658c3fe849efc39b06ebb98d042e4a4c51a8d7d1c3ddc3b1ea091d0784"},
-    {file = "jiter-0.6.1-cp311-none-win32.whl", hash = "sha256:7d72fc86474862c9c6d1f87b921b70c362f2b7e8b2e3c798bb7d58e419a6bc0f"},
-    {file = "jiter-0.6.1-cp311-none-win_amd64.whl", hash = "sha256:3e36a320634f33a07794bb15b8da995dccb94f944d298c8cfe2bd99b1b8a574a"},
-    {file = "jiter-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1fad93654d5a7dcce0809aff66e883c98e2618b86656aeb2129db2cd6f26f867"},
-    {file = "jiter-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4e6e340e8cd92edab7f6a3a904dbbc8137e7f4b347c49a27da9814015cc0420c"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691352e5653af84ed71763c3c427cff05e4d658c508172e01e9c956dfe004aba"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:defee3949313c1f5b55e18be45089970cdb936eb2a0063f5020c4185db1b63c9"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26d2bdd5da097e624081c6b5d416d3ee73e5b13f1703bcdadbb1881f0caa1933"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18aa9d1626b61c0734b973ed7088f8a3d690d0b7f5384a5270cd04f4d9f26c86"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a3567c8228afa5ddcce950631c6b17397ed178003dc9ee7e567c4c4dcae9fa0"},
-    {file = "jiter-0.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5c0507131c922defe3f04c527d6838932fcdfd69facebafd7d3574fa3395314"},
-    {file = "jiter-0.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:540fcb224d7dc1bcf82f90f2ffb652df96f2851c031adca3c8741cb91877143b"},
-    {file = "jiter-0.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e7b75436d4fa2032b2530ad989e4cb0ca74c655975e3ff49f91a1a3d7f4e1df2"},
-    {file = "jiter-0.6.1-cp312-none-win32.whl", hash = "sha256:883d2ced7c21bf06874fdeecab15014c1c6d82216765ca6deef08e335fa719e0"},
-    {file = "jiter-0.6.1-cp312-none-win_amd64.whl", hash = "sha256:91e63273563401aadc6c52cca64a7921c50b29372441adc104127b910e98a5b6"},
-    {file = "jiter-0.6.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:852508a54fe3228432e56019da8b69208ea622a3069458252f725d634e955b31"},
-    {file = "jiter-0.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f491cc69ff44e5a1e8bc6bf2b94c1f98d179e1aaf4a554493c171a5b2316b701"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc56c8f0b2a28ad4d8047f3ae62d25d0e9ae01b99940ec0283263a04724de1f3"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:51b58f7a0d9e084a43b28b23da2b09fc5e8df6aa2b6a27de43f991293cab85fd"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f79ce15099154c90ef900d69c6b4c686b64dfe23b0114e0971f2fecd306ec6c"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:03a025b52009f47e53ea619175d17e4ded7c035c6fbd44935cb3ada11e1fd592"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74a8d93718137c021d9295248a87c2f9fdc0dcafead12d2930bc459ad40f885"},
-    {file = "jiter-0.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40b03b75f903975f68199fc4ec73d546150919cb7e534f3b51e727c4d6ccca5a"},
-    {file = "jiter-0.6.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:825651a3f04cf92a661d22cad61fc913400e33aa89b3e3ad9a6aa9dc8a1f5a71"},
-    {file = "jiter-0.6.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:928bf25eb69ddb292ab8177fe69d3fbf76c7feab5fce1c09265a7dccf25d3991"},
-    {file = "jiter-0.6.1-cp313-none-win32.whl", hash = "sha256:352cd24121e80d3d053fab1cc9806258cad27c53cad99b7a3cac57cf934b12e4"},
-    {file = "jiter-0.6.1-cp313-none-win_amd64.whl", hash = "sha256:be7503dd6f4bf02c2a9bacb5cc9335bc59132e7eee9d3e931b13d76fd80d7fda"},
-    {file = "jiter-0.6.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:31d8e00e1fb4c277df8ab6f31a671f509ebc791a80e5c61fdc6bc8696aaa297c"},
-    {file = "jiter-0.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77c296d65003cd7ee5d7b0965f6acbe6cffaf9d1fa420ea751f60ef24e85fed5"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeeb0c0325ef96c12a48ea7e23e2e86fe4838e6e0a995f464cf4c79fa791ceeb"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a31c6fcbe7d6c25d6f1cc6bb1cba576251d32795d09c09961174fe461a1fb5bd"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59e2b37f3b9401fc9e619f4d4badcab2e8643a721838bcf695c2318a0475ae42"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bae5ae4853cb9644144e9d0755854ce5108d470d31541d83f70ca7ecdc2d1637"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9df588e9c830b72d8db1dd7d0175af6706b0904f682ea9b1ca8b46028e54d6e9"},
-    {file = "jiter-0.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15f8395e835cf561c85c1adee72d899abf2733d9df72e9798e6d667c9b5c1f30"},
-    {file = "jiter-0.6.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a99d4e0b5fc3b05ea732d67eb2092fe894e95a90e6e413f2ea91387e228a307"},
-    {file = "jiter-0.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a311df1fa6be0ccd64c12abcd85458383d96e542531bafbfc0a16ff6feda588f"},
-    {file = "jiter-0.6.1-cp38-none-win32.whl", hash = "sha256:81116a6c272a11347b199f0e16b6bd63f4c9d9b52bc108991397dd80d3c78aba"},
-    {file = "jiter-0.6.1-cp38-none-win_amd64.whl", hash = "sha256:13f9084e3e871a7c0b6e710db54444088b1dd9fbefa54d449b630d5e73bb95d0"},
-    {file = "jiter-0.6.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f1c53615fcfec3b11527c08d19cff6bc870da567ce4e57676c059a3102d3a082"},
-    {file = "jiter-0.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f791b6a4da23238c17a81f44f5b55d08a420c5692c1fda84e301a4b036744eb1"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c97e90fec2da1d5f68ef121444c2c4fa72eabf3240829ad95cf6bbeca42a301"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cbc1a66b4e41511209e97a2866898733c0110b7245791ac604117b7fb3fedb7"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4e85f9e12cd8418ab10e1fcf0e335ae5bb3da26c4d13a0fd9e6a17a674783b6"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08be33db6dcc374c9cc19d3633af5e47961a7b10d4c61710bd39e48d52a35824"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:677be9550004f5e010d673d3b2a2b815a8ea07a71484a57d3f85dde7f14cf132"},
-    {file = "jiter-0.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e8bd065be46c2eecc328e419d6557bbc37844c88bb07b7a8d2d6c91c7c4dedc9"},
-    {file = "jiter-0.6.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bd95375ce3609ec079a97c5d165afdd25693302c071ca60c7ae1cf826eb32022"},
-    {file = "jiter-0.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db459ed22d0208940d87f614e1f0ea5a946d29a3cfef71f7e1aab59b6c6b2afb"},
-    {file = "jiter-0.6.1-cp39-none-win32.whl", hash = "sha256:d71c962f0971347bd552940ab96aa42ceefcd51b88c4ced8a27398182efa8d80"},
-    {file = "jiter-0.6.1-cp39-none-win_amd64.whl", hash = "sha256:d465db62d2d10b489b7e7a33027c4ae3a64374425d757e963f86df5b5f2e7fc5"},
-    {file = "jiter-0.6.1.tar.gz", hash = "sha256:e19cd21221fc139fb032e4112986656cb2739e9fe6d84c13956ab30ccc7d4449"},
+    {file = "jiter-0.7.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e14027f61101b3f5e173095d9ecf95c1cac03ffe45a849279bde1d97e559e314"},
+    {file = "jiter-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:979ec4711c2e37ac949561858bd42028884c9799516a923e1ff0b501ef341a4a"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:662d5d3cca58ad6af7a3c6226b641c8655de5beebcb686bfde0df0f21421aafa"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d89008fb47043a469f97ad90840b97ba54e7c3d62dc7cbb6cbf938bd0caf71d"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8b16c35c846a323ce9067170d5ab8c31ea3dbcab59c4f7608bbbf20c2c3b43f"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9e82daaa1b0a68704f9029b81e664a5a9de3e466c2cbaabcda5875f961702e7"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43a87a9f586636e1f0dd3651a91f79b491ea0d9fd7cbbf4f5c463eebdc48bda7"},
+    {file = "jiter-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ec05b1615f96cc3e4901678bc863958611584072967d9962f9e571d60711d52"},
+    {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a5cb97e35370bde7aa0d232a7f910f5a0fbbc96bc0a7dbaa044fd5cd6bcd7ec3"},
+    {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb316dacaf48c8c187cea75d0d7f835f299137e6fdd13f691dff8f92914015c7"},
+    {file = "jiter-0.7.0-cp310-none-win32.whl", hash = "sha256:243f38eb4072763c54de95b14ad283610e0cd3bf26393870db04e520f60eebb3"},
+    {file = "jiter-0.7.0-cp310-none-win_amd64.whl", hash = "sha256:2221d5603c139f6764c54e37e7c6960c469cbcd76928fb10d15023ba5903f94b"},
+    {file = "jiter-0.7.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:91cec0ad755bd786c9f769ce8d843af955df6a8e56b17658771b2d5cb34a3ff8"},
+    {file = "jiter-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:feba70a28a27d962e353e978dbb6afd798e711c04cb0b4c5e77e9d3779033a1a"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d866ec066c3616cacb8535dbda38bb1d470b17b25f0317c4540182bc886ce2"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8e7a7a00b6f9f18289dd563596f97ecaba6c777501a8ba04bf98e03087bcbc60"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9aaf564094c7db8687f2660605e099f3d3e6ea5e7135498486674fcb78e29165"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4d27e09825c1b3c7a667adb500ce8b840e8fc9f630da8454b44cdd4fb0081bb"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca7c287da9c1d56dda88da1d08855a787dbb09a7e2bd13c66a2e288700bd7c7"},
+    {file = "jiter-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db19a6d160f093cbc8cd5ea2abad420b686f6c0e5fb4f7b41941ebc6a4f83cda"},
+    {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e46a63c7f877cf7441ffc821c28287cfb9f533ae6ed707bde15e7d4dfafa7ae"},
+    {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ba426fa7ff21cb119fa544b75dd3fbee6a70e55a5829709c0338d07ccd30e6d"},
+    {file = "jiter-0.7.0-cp311-none-win32.whl", hash = "sha256:c07f55a64912b0c7982377831210836d2ea92b7bd343fca67a32212dd72e38e0"},
+    {file = "jiter-0.7.0-cp311-none-win_amd64.whl", hash = "sha256:ed27b2c43e1b5f6c7fedc5c11d4d8bfa627de42d1143d87e39e2e83ddefd861a"},
+    {file = "jiter-0.7.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac7930bcaaeb1e229e35c91c04ed2e9f39025b86ee9fc3141706bbf6fff4aeeb"},
+    {file = "jiter-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:571feae3e7c901a8eedde9fd2865b0dfc1432fb15cab8c675a8444f7d11b7c5d"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8af4df8a262fa2778b68c2a03b6e9d1cb4d43d02bea6976d46be77a3a331af1"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd028d4165097a611eb0c7494d8c1f2aebd46f73ca3200f02a175a9c9a6f22f5"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6b487247c7836810091e9455efe56a52ec51bfa3a222237e1587d04d3e04527"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6d28a92f28814e1a9f2824dc11f4e17e1df1f44dc4fdeb94c5450d34bcb2602"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90443994bbafe134f0b34201dad3ebe1c769f0599004084e046fb249ad912425"},
+    {file = "jiter-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f9abf464f9faac652542ce8360cea8e68fba2b78350e8a170248f9bcc228702a"},
+    {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db7a8d99fc5f842f7d2852f06ccaed066532292c41723e5dff670c339b649f88"},
+    {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:15cf691ebd8693b70c94627d6b748f01e6d697d9a6e9f2bc310934fcfb7cf25e"},
+    {file = "jiter-0.7.0-cp312-none-win32.whl", hash = "sha256:9dcd54fa422fb66ca398bec296fed5f58e756aa0589496011cfea2abb5be38a5"},
+    {file = "jiter-0.7.0-cp312-none-win_amd64.whl", hash = "sha256:cc989951f73f9375b8eacd571baaa057f3d7d11b7ce6f67b9d54642e7475bfad"},
+    {file = "jiter-0.7.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:24cecd18df540963cd27c08ca5ce1d0179f229ff78066d9eecbe5add29361340"},
+    {file = "jiter-0.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d41b46236b90b043cca73785674c23d2a67d16f226394079d0953f94e765ed76"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b160db0987171365c153e406a45dcab0ee613ae3508a77bfff42515cb4ce4d6e"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d1c8d91e0f0bd78602eaa081332e8ee4f512c000716f5bc54e9a037306d693a7"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:997706c683195eeff192d2e5285ce64d2a610414f37da3a3f2625dcf8517cf90"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ea52a8a0ff0229ab2920284079becd2bae0688d432fca94857ece83bb49c541"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d77449d2738cf74752bb35d75ee431af457e741124d1db5e112890023572c7c"},
+    {file = "jiter-0.7.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8203519907a1d81d6cb00902c98e27c2d0bf25ce0323c50ca594d30f5f1fbcf"},
+    {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41d15ccc53931c822dd7f1aebf09faa3cda2d7b48a76ef304c7dbc19d1302e51"},
+    {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:febf3179b2fabf71fbd2fd52acb8594163bb173348b388649567a548f356dbf6"},
+    {file = "jiter-0.7.0-cp313-none-win32.whl", hash = "sha256:4a8e2d866e7eda19f012444e01b55079d8e1c4c30346aaac4b97e80c54e2d6d3"},
+    {file = "jiter-0.7.0-cp313-none-win_amd64.whl", hash = "sha256:7417c2b928062c496f381fb0cb50412eee5ad1d8b53dbc0e011ce45bb2de522c"},
+    {file = "jiter-0.7.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9c62c737b5368e51e74960a08fe1adc807bd270227291daede78db24d5fbf556"},
+    {file = "jiter-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e4640722b1bef0f6e342fe4606aafaae0eb4f4be5c84355bb6867f34400f6688"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f367488c3b9453eab285424c61098faa1cab37bb49425e69c8dca34f2dfe7d69"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0cf5d42beb3514236459454e3287db53d9c4d56c4ebaa3e9d0efe81b19495129"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc5190ea1113ee6f7252fa8a5fe5a6515422e378356c950a03bbde5cafbdbaab"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ee47a149d698796a87abe445fc8dee21ed880f09469700c76c8d84e0d11efd"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48592c26ea72d3e71aa4bea0a93454df907d80638c3046bb0705507b6704c0d7"},
+    {file = "jiter-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:79fef541199bd91cfe8a74529ecccb8eaf1aca38ad899ea582ebbd4854af1e51"},
+    {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d1ef6bb66041f2514739240568136c81b9dcc64fd14a43691c17ea793b6535c0"},
+    {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aca4d950863b1c238e315bf159466e064c98743eef3bd0ff9617e48ff63a4715"},
+    {file = "jiter-0.7.0-cp38-none-win32.whl", hash = "sha256:897745f230350dcedb8d1ebe53e33568d48ea122c25e6784402b6e4e88169be7"},
+    {file = "jiter-0.7.0-cp38-none-win_amd64.whl", hash = "sha256:b928c76a422ef3d0c85c5e98c498ce3421b313c5246199541e125b52953e1bc0"},
+    {file = "jiter-0.7.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c9b669ff6f8ba08270dee9ccf858d3b0203b42314a428a1676762f2d390fbb64"},
+    {file = "jiter-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b5be919bacd73ca93801c3042bce6e95cb9c555a45ca83617b9b6c89df03b9c2"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a282e1e8a396dabcea82d64f9d05acf7efcf81ecdd925b967020dcb0e671c103"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:17ecb1a578a56e97a043c72b463776b5ea30343125308f667fb8fce4b3796735"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b6045fa0527129218cdcd8a8b839f678219686055f31ebab35f87d354d9c36e"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:189cc4262a92e33c19d4fd24018f5890e4e6da5b2581f0059938877943f8298c"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c138414839effbf30d185e30475c6dc8a16411a1e3681e5fd4605ab1233ac67a"},
+    {file = "jiter-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2791604acef33da6b72d5ecf885a32384bcaf9aa1e4be32737f3b8b9588eef6a"},
+    {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae60ec89037a78d60bbf3d8b127f1567769c8fa24886e0abed3f622791dea478"},
+    {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:836f03dea312967635233d826f783309b98cfd9ccc76ac776e224cfcef577862"},
+    {file = "jiter-0.7.0-cp39-none-win32.whl", hash = "sha256:ebc30ae2ce4bc4986e1764c404b4ea1924f926abf02ce92516485098f8545374"},
+    {file = "jiter-0.7.0-cp39-none-win_amd64.whl", hash = "sha256:abf596f951370c648f37aa9899deab296c42a3829736e598b0dd10b08f77a44d"},
+    {file = "jiter-0.7.0.tar.gz", hash = "sha256:c061d9738535497b5509f8970584f20de1e900806b239a39a9994fc191dad630"},
 ]
 
 [[package]]
@@ -1732,13 +1732,13 @@ llama-index-llms-openai = ">=0.2.0,<0.3.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.11.20"
+version = "0.11.21"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llama_index_core-0.11.20-py3-none-any.whl", hash = "sha256:e84daf45e90e4b5d9e135baf40ab9853a1c3169a1076af6d58739d098e70adb1"},
-    {file = "llama_index_core-0.11.20.tar.gz", hash = "sha256:6b5eaaf4be5030808b9ba953e8f7aead7ba495b8e72ba0a81dfc7dda96be416f"},
+    {file = "llama_index_core-0.11.21-py3-none-any.whl", hash = "sha256:08d0a605d022127f2eee45d2000b19d1b95fc6f1f3387c8424d924dfa795882d"},
+    {file = "llama_index_core-0.11.21.tar.gz", hash = "sha256:720b6e57e5350a72a22657caa69a8a871fa3da3b37edc2adf4a0bde8e5790ad3"},
 ]
 
 [package.dependencies]
@@ -2252,13 +2252,13 @@ pygments = ">2.12.0"
 
 [[package]]
 name = "mkdocs-material"
-version = "9.5.42"
+version = "9.5.43"
 description = "Documentation that simply works"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "mkdocs_material-9.5.42-py3-none-any.whl", hash = "sha256:452a7c5d21284b373f36b981a2cbebfff59263feebeede1bc28652e9c5bbe316"},
-    {file = "mkdocs_material-9.5.42.tar.gz", hash = "sha256:92779b5e9b5934540c574c11647131d217dc540dce72b05feeda088c8eb1b8f2"},
+    {file = "mkdocs_material-9.5.43-py3-none-any.whl", hash = "sha256:4aae0664c456fd12837a3192e0225c17960ba8bf55d7f0a7daef7e4b0b914a34"},
+    {file = "mkdocs_material-9.5.43.tar.gz", hash = "sha256:83be7ff30b65a1e4930dfa4ab911e75780a3afc9583d162692e434581cb46979"},
 ]
 
 [package.dependencies]
@@ -2703,13 +2703,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.52.2"
+version = "1.53.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"},
-    {file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"},
+    {file = "openai-1.53.0-py3-none-any.whl", hash = "sha256:20f408c32fc5cb66e60c6882c994cdca580a5648e10045cd840734194f033418"},
+    {file = "openai-1.53.0.tar.gz", hash = "sha256:be2c4e77721b166cce8130e544178b7d579f751b4b074ffbaade3854b6f85ec5"},
 ]
 
 [package.dependencies]
@@ -3393,13 +3393,13 @@ testutils = ["gitpython (>3)"]
 
 [[package]]
 name = "pymdown-extensions"
-version = "10.11.2"
+version = "10.12"
 description = "Extension pack for Python Markdown."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"},
-    {file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"},
+    {file = "pymdown_extensions-10.12-py3-none-any.whl", hash = "sha256:49f81412242d3527b8b4967b990df395c89563043bc51a3d2d7d500e52123b77"},
+    {file = "pymdown_extensions-10.12.tar.gz", hash = "sha256:b0ee1e0b2bef1071a47891ab17003bfe5bf824a398e13f49f8ed653b699369a7"},
 ]
 
 [package.dependencies]
@@ -3877,114 +3877,114 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "rpds-py"
-version = "0.20.0"
+version = "0.20.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "rpds_py-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2"},
-    {file = "rpds_py-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9bb4a0d90fdb03437c109a17eade42dfbf6190408f29b2744114d11586611d6f"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6377e647bbfd0a0b159fe557f2c6c602c159fc752fa316572f012fc0bf67150"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb851b7df9dda52dc1415ebee12362047ce771fc36914586b2e9fcbd7d293b3e"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e0f80b739e5a8f54837be5d5c924483996b603d5502bfff79bf33da06164ee2"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a8c94dad2e45324fc74dce25e1645d4d14df9a4e54a30fa0ae8bad9a63928e3"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e604fe73ba048c06085beaf51147eaec7df856824bfe7b98657cf436623daf"},
-    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:df3de6b7726b52966edf29663e57306b23ef775faf0ac01a3e9f4012a24a4140"},
-    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf258ede5bc22a45c8e726b29835b9303c285ab46fc7c3a4cc770736b5304c9f"},
-    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:55fea87029cded5df854ca7e192ec7bdb7ecd1d9a3f63d5c4eb09148acf4a7ce"},
-    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ae94bd0b2f02c28e199e9bc51485d0c5601f58780636185660f86bf80c89af94"},
-    {file = "rpds_py-0.20.0-cp310-none-win32.whl", hash = "sha256:28527c685f237c05445efec62426d285e47a58fb05ba0090a4340b73ecda6dee"},
-    {file = "rpds_py-0.20.0-cp310-none-win_amd64.whl", hash = "sha256:238a2d5b1cad28cdc6ed15faf93a998336eb041c4e440dd7f902528b8891b399"},
-    {file = "rpds_py-0.20.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac2f4f7a98934c2ed6505aead07b979e6f999389f16b714448fb39bbaa86a489"},
-    {file = "rpds_py-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:220002c1b846db9afd83371d08d239fdc865e8f8c5795bbaec20916a76db3318"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d7919548df3f25374a1f5d01fbcd38dacab338ef5f33e044744b5c36729c8db"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:758406267907b3781beee0f0edfe4a179fbd97c0be2e9b1154d7f0a1279cf8e5"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d61339e9f84a3f0767b1995adfb171a0d00a1185192718a17af6e124728e0f5"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1259c7b3705ac0a0bd38197565a5d603218591d3f6cee6e614e380b6ba61c6f6"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c1dc0f53856b9cc9a0ccca0a7cc61d3d20a7088201c0937f3f4048c1718a209"},
-    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e60cb630f674a31f0368ed32b2a6b4331b8350d67de53c0359992444b116dd3"},
-    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbe982f38565bb50cb7fb061ebf762c2f254ca3d8c20d4006878766e84266272"},
-    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:514b3293b64187172bc77c8fb0cdae26981618021053b30d8371c3a902d4d5ad"},
-    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0a26ffe9d4dd35e4dfdd1e71f46401cff0181c75ac174711ccff0459135fa58"},
-    {file = "rpds_py-0.20.0-cp311-none-win32.whl", hash = "sha256:89c19a494bf3ad08c1da49445cc5d13d8fefc265f48ee7e7556839acdacf69d0"},
-    {file = "rpds_py-0.20.0-cp311-none-win_amd64.whl", hash = "sha256:c638144ce971df84650d3ed0096e2ae7af8e62ecbbb7b201c8935c370df00a2c"},
-    {file = "rpds_py-0.20.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a84ab91cbe7aab97f7446652d0ed37d35b68a465aeef8fc41932a9d7eee2c1a6"},
-    {file = "rpds_py-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:56e27147a5a4c2c21633ff8475d185734c0e4befd1c989b5b95a5d0db699b21b"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2580b0c34583b85efec8c5c5ec9edf2dfe817330cc882ee972ae650e7b5ef739"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b80d4a7900cf6b66bb9cee5c352b2d708e29e5a37fe9bf784fa97fc11504bf6c"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50eccbf054e62a7b2209b28dc7a22d6254860209d6753e6b78cfaeb0075d7bee"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:49a8063ea4296b3a7e81a5dfb8f7b2d73f0b1c20c2af401fb0cdf22e14711a96"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea438162a9fcbee3ecf36c23e6c68237479f89f962f82dae83dc15feeceb37e4"},
-    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:18d7585c463087bddcfa74c2ba267339f14f2515158ac4db30b1f9cbdb62c8ef"},
-    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d4c7d1a051eeb39f5c9547e82ea27cbcc28338482242e3e0b7768033cb083821"},
-    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4df1e3b3bec320790f699890d41c59d250f6beda159ea3c44c3f5bac1976940"},
-    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2cf126d33a91ee6eedc7f3197b53e87a2acdac63602c0f03a02dd69e4b138174"},
-    {file = "rpds_py-0.20.0-cp312-none-win32.whl", hash = "sha256:8bc7690f7caee50b04a79bf017a8d020c1f48c2a1077ffe172abec59870f1139"},
-    {file = "rpds_py-0.20.0-cp312-none-win_amd64.whl", hash = "sha256:0e13e6952ef264c40587d510ad676a988df19adea20444c2b295e536457bc585"},
-    {file = "rpds_py-0.20.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa9a0521aeca7d4941499a73ad7d4f8ffa3d1affc50b9ea11d992cd7eff18a29"},
-    {file = "rpds_py-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1f1d51eccb7e6c32ae89243cb352389228ea62f89cd80823ea7dd1b98e0b91"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a86a9b96070674fc88b6f9f71a97d2c1d3e5165574615d1f9168ecba4cecb24"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c8ef2ebf76df43f5750b46851ed1cdf8f109d7787ca40035fe19fbdc1acc5a7"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b25f024b421d5859d156750ea9a65651793d51b76a2e9238c05c9d5f203a9"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57eb94a8c16ab08fef6404301c38318e2c5a32216bf5de453e2714c964c125c8"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1940dae14e715e2e02dfd5b0f64a52e8374a517a1e531ad9412319dc3ac7879"},
-    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d20277fd62e1b992a50c43f13fbe13277a31f8c9f70d59759c88f644d66c619f"},
-    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:06db23d43f26478303e954c34c75182356ca9aa7797d22c5345b16871ab9c45c"},
-    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b2a5db5397d82fa847e4c624b0c98fe59d2d9b7cf0ce6de09e4d2e80f8f5b3f2"},
-    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a35df9f5548fd79cb2f52d27182108c3e6641a4feb0f39067911bf2adaa3e57"},
-    {file = "rpds_py-0.20.0-cp313-none-win32.whl", hash = "sha256:fd2d84f40633bc475ef2d5490b9c19543fbf18596dcb1b291e3a12ea5d722f7a"},
-    {file = "rpds_py-0.20.0-cp313-none-win_amd64.whl", hash = "sha256:9bc2d153989e3216b0559251b0c260cfd168ec78b1fac33dd485750a228db5a2"},
-    {file = "rpds_py-0.20.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f2fbf7db2012d4876fb0d66b5b9ba6591197b0f165db8d99371d976546472a24"},
-    {file = "rpds_py-0.20.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1e5f3cd7397c8f86c8cc72d5a791071431c108edd79872cdd96e00abd8497d29"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce9845054c13696f7af7f2b353e6b4f676dab1b4b215d7fe5e05c6f8bb06f965"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3e130fd0ec56cb76eb49ef52faead8ff09d13f4527e9b0c400307ff72b408e1"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b16aa0107ecb512b568244ef461f27697164d9a68d8b35090e9b0c1c8b27752"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa7f429242aae2947246587d2964fad750b79e8c233a2367f71b554e9447949c"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af0fc424a5842a11e28956e69395fbbeab2c97c42253169d87e90aac2886d751"},
-    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8c00a3b1e70c1d3891f0db1b05292747f0dbcfb49c43f9244d04c70fbc40eb8"},
-    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:40ce74fc86ee4645d0a225498d091d8bc61f39b709ebef8204cb8b5a464d3c0e"},
-    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4fe84294c7019456e56d93e8ababdad5a329cd25975be749c3f5f558abb48253"},
-    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:338ca4539aad4ce70a656e5187a3a31c5204f261aef9f6ab50e50bcdffaf050a"},
-    {file = "rpds_py-0.20.0-cp38-none-win32.whl", hash = "sha256:54b43a2b07db18314669092bb2de584524d1ef414588780261e31e85846c26a5"},
-    {file = "rpds_py-0.20.0-cp38-none-win_amd64.whl", hash = "sha256:a1862d2d7ce1674cffa6d186d53ca95c6e17ed2b06b3f4c476173565c862d232"},
-    {file = "rpds_py-0.20.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:3fde368e9140312b6e8b6c09fb9f8c8c2f00999d1823403ae90cc00480221b22"},
-    {file = "rpds_py-0.20.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9824fb430c9cf9af743cf7aaf6707bf14323fb51ee74425c380f4c846ea70789"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11ef6ce74616342888b69878d45e9f779b95d4bd48b382a229fe624a409b72c5"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52d3f2f82b763a24ef52f5d24358553e8403ce05f893b5347098014f2d9eff2"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d35cef91e59ebbeaa45214861874bc6f19eb35de96db73e467a8358d701a96c"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d72278a30111e5b5525c1dd96120d9e958464316f55adb030433ea905866f4de"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c29cbbba378759ac5786730d1c3cb4ec6f8ababf5c42a9ce303dc4b3d08cda"},
-    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6632f2d04f15d1bd6fe0eedd3b86d9061b836ddca4c03d5cf5c7e9e6b7c14580"},
-    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d0b67d87bb45ed1cd020e8fbf2307d449b68abc45402fe1a4ac9e46c3c8b192b"},
-    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec31a99ca63bf3cd7f1a5ac9fe95c5e2d060d3c768a09bc1d16e235840861420"},
-    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22e6c9976e38f4d8c4a63bd8a8edac5307dffd3ee7e6026d97f3cc3a2dc02a0b"},
-    {file = "rpds_py-0.20.0-cp39-none-win32.whl", hash = "sha256:569b3ea770c2717b730b61998b6c54996adee3cef69fc28d444f3e7920313cf7"},
-    {file = "rpds_py-0.20.0-cp39-none-win_amd64.whl", hash = "sha256:e6900ecdd50ce0facf703f7a00df12374b74bbc8ad9fe0f6559947fb20f82364"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:617c7357272c67696fd052811e352ac54ed1d9b49ab370261a80d3b6ce385045"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9426133526f69fcaba6e42146b4e12d6bc6c839b8b555097020e2b78ce908dcc"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deb62214c42a261cb3eb04d474f7155279c1a8a8c30ac89b7dcb1721d92c3c02"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcaeb7b57f1a1e071ebd748984359fef83ecb026325b9d4ca847c95bc7311c92"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d454b8749b4bd70dd0a79f428731ee263fa6995f83ccb8bada706e8d1d3ff89d"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d807dc2051abe041b6649681dce568f8e10668e3c1c6543ebae58f2d7e617855"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c20f0ddeb6e29126d45f89206b8291352b8c5b44384e78a6499d68b52ae511"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7f19250ceef892adf27f0399b9e5afad019288e9be756d6919cb58892129f51"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4f1ed4749a08379555cebf4650453f14452eaa9c43d0a95c49db50c18b7da075"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:dcedf0b42bcb4cfff4101d7771a10532415a6106062f005ab97d1d0ab5681c60"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:39ed0d010457a78f54090fafb5d108501b5aa5604cc22408fc1c0c77eac14344"},
-    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bb273176be34a746bdac0b0d7e4e2c467323d13640b736c4c477881a3220a989"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f918a1a130a6dfe1d7fe0f105064141342e7dd1611f2e6a21cd2f5c8cb1cfb3e"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f60012a73aa396be721558caa3a6fd49b3dd0033d1675c6d59c4502e870fcf0c"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d2b1ad682a3dfda2a4e8ad8572f3100f95fad98cb99faf37ff0ddfe9cbf9d03"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:614fdafe9f5f19c63ea02817fa4861c606a59a604a77c8cdef5aa01d28b97921"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa518bcd7600c584bf42e6617ee8132869e877db2f76bcdc281ec6a4113a53ab"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0475242f447cc6cb8a9dd486d68b2ef7fbee84427124c232bff5f63b1fe11e5"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f90a4cd061914a60bd51c68bcb4357086991bd0bb93d8aa66a6da7701370708f"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:def7400461c3a3f26e49078302e1c1b38f6752342c77e3cf72ce91ca69fb1bc1"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:65794e4048ee837494aea3c21a28ad5fc080994dfba5b036cf84de37f7ad5074"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:faefcc78f53a88f3076b7f8be0a8f8d35133a3ecf7f3770895c25f8813460f08"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5b4f105deeffa28bbcdff6c49b34e74903139afa690e35d2d9e3c2c2fba18cec"},
-    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fdfc3a892927458d98f3d55428ae46b921d1f7543b89382fdb483f5640daaec8"},
-    {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
+    {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"},
+    {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14511a539afee6f9ab492b543060c7491c99924314977a55c98bfa2ee29ce78c"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3ccb8ac2d3c71cda472b75af42818981bdacf48d2e21c36331b50b4f16930163"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c142b88039b92e7e0cb2552e8967077e3179b22359e945574f5e2764c3953dcf"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f19169781dddae7478a32301b499b2858bc52fc45a112955e798ee307e294977"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c56de6518e14b9bf6edde23c4c39dac5b48dcf04160ea7bce8fca8397cdf86"},
+    {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:925d176a549f4832c6f69fa6026071294ab5910e82a0fe6c6228fce17b0706bd"},
+    {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:78f0b6877bfce7a3d1ff150391354a410c55d3cdce386f862926a4958ad5ab7e"},
+    {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3dd645e2b0dcb0fd05bf58e2e54c13875847687d0b71941ad2e757e5d89d4356"},
+    {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4f676e21db2f8c72ff0936f895271e7a700aa1f8d31b40e4e43442ba94973899"},
+    {file = "rpds_py-0.20.1-cp310-none-win32.whl", hash = "sha256:648386ddd1e19b4a6abab69139b002bc49ebf065b596119f8f37c38e9ecee8ff"},
+    {file = "rpds_py-0.20.1-cp310-none-win_amd64.whl", hash = "sha256:d9ecb51120de61e4604650666d1f2b68444d46ae18fd492245a08f53ad2b7711"},
+    {file = "rpds_py-0.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:762703bdd2b30983c1d9e62b4c88664df4a8a4d5ec0e9253b0231171f18f6d75"},
+    {file = "rpds_py-0.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0b581f47257a9fce535c4567782a8976002d6b8afa2c39ff616edf87cbeff712"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:842c19a6ce894493563c3bd00d81d5100e8e57d70209e84d5491940fdb8b9e3a"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42cbde7789f5c0bcd6816cb29808e36c01b960fb5d29f11e052215aa85497c93"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c8e9340ce5a52f95fa7d3b552b35c7e8f3874d74a03a8a69279fd5fca5dc751"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ba6f89cac95c0900d932c9efb7f0fb6ca47f6687feec41abcb1bd5e2bd45535"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a916087371afd9648e1962e67403c53f9c49ca47b9680adbeef79da3a7811b0"},
+    {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:200a23239781f46149e6a415f1e870c5ef1e712939fe8fa63035cd053ac2638e"},
+    {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:58b1d5dd591973d426cbb2da5e27ba0339209832b2f3315928c9790e13f159e8"},
+    {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6b73c67850ca7cae0f6c56f71e356d7e9fa25958d3e18a64927c2d930859b8e4"},
+    {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d8761c3c891cc51e90bc9926d6d2f59b27beaf86c74622c8979380a29cc23ac3"},
+    {file = "rpds_py-0.20.1-cp311-none-win32.whl", hash = "sha256:cd945871335a639275eee904caef90041568ce3b42f402c6959b460d25ae8732"},
+    {file = "rpds_py-0.20.1-cp311-none-win_amd64.whl", hash = "sha256:7e21b7031e17c6b0e445f42ccc77f79a97e2687023c5746bfb7a9e45e0921b84"},
+    {file = "rpds_py-0.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:36785be22066966a27348444b40389f8444671630063edfb1a2eb04318721e17"},
+    {file = "rpds_py-0.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:142c0a5124d9bd0e2976089484af5c74f47bd3298f2ed651ef54ea728d2ea42c"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbddc10776ca7ebf2a299c41a4dde8ea0d8e3547bfd731cb87af2e8f5bf8962d"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15a842bb369e00295392e7ce192de9dcbf136954614124a667f9f9f17d6a216f"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be5ef2f1fc586a7372bfc355986226484e06d1dc4f9402539872c8bb99e34b01"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbcf360c9e3399b056a238523146ea77eeb2a596ce263b8814c900263e46031a"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd27a66740ffd621d20b9a2f2b5ee4129a56e27bfb9458a3bcc2e45794c96cb"},
+    {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0b937b2a1988f184a3e9e577adaa8aede21ec0b38320d6009e02bd026db04fa"},
+    {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6889469bfdc1eddf489729b471303739bf04555bb151fe8875931f8564309afc"},
+    {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19b73643c802f4eaf13d97f7855d0fb527fbc92ab7013c4ad0e13a6ae0ed23bd"},
+    {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3c6afcf2338e7f374e8edc765c79fbcb4061d02b15dd5f8f314a4af2bdc7feb5"},
+    {file = "rpds_py-0.20.1-cp312-none-win32.whl", hash = "sha256:dc73505153798c6f74854aba69cc75953888cf9866465196889c7cdd351e720c"},
+    {file = "rpds_py-0.20.1-cp312-none-win_amd64.whl", hash = "sha256:8bbe951244a838a51289ee53a6bae3a07f26d4e179b96fc7ddd3301caf0518eb"},
+    {file = "rpds_py-0.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6ca91093a4a8da4afae7fe6a222c3b53ee4eef433ebfee4d54978a103435159e"},
+    {file = "rpds_py-0.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b9c2fe36d1f758b28121bef29ed1dee9b7a2453e997528e7d1ac99b94892527c"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f009c69bc8c53db5dfab72ac760895dc1f2bc1b62ab7408b253c8d1ec52459fc"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6740a3e8d43a32629bb9b009017ea5b9e713b7210ba48ac8d4cb6d99d86c8ee8"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32b922e13d4c0080d03e7b62991ad7f5007d9cd74e239c4b16bc85ae8b70252d"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe00a9057d100e69b4ae4a094203a708d65b0f345ed546fdef86498bf5390982"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49fe9b04b6fa685bd39237d45fad89ba19e9163a1ccaa16611a812e682913496"},
+    {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa7ac11e294304e615b43f8c441fee5d40094275ed7311f3420d805fde9b07b4"},
+    {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aa97af1558a9bef4025f8f5d8c60d712e0a3b13a2fe875511defc6ee77a1ab7"},
+    {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:483b29f6f7ffa6af845107d4efe2e3fa8fb2693de8657bc1849f674296ff6a5a"},
+    {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37fe0f12aebb6a0e3e17bb4cd356b1286d2d18d2e93b2d39fe647138458b4bcb"},
+    {file = "rpds_py-0.20.1-cp313-none-win32.whl", hash = "sha256:a624cc00ef2158e04188df5e3016385b9353638139a06fb77057b3498f794782"},
+    {file = "rpds_py-0.20.1-cp313-none-win_amd64.whl", hash = "sha256:b71b8666eeea69d6363248822078c075bac6ed135faa9216aa85f295ff009b1e"},
+    {file = "rpds_py-0.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:5b48e790e0355865197ad0aca8cde3d8ede347831e1959e158369eb3493d2191"},
+    {file = "rpds_py-0.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3e310838a5801795207c66c73ea903deda321e6146d6f282e85fa7e3e4854804"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249280b870e6a42c0d972339e9cc22ee98730a99cd7f2f727549af80dd5a963"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e79059d67bea28b53d255c1437b25391653263f0e69cd7dec170d778fdbca95e"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b431c777c9653e569986ecf69ff4a5dba281cded16043d348bf9ba505486f36"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da584ff96ec95e97925174eb8237e32f626e7a1a97888cdd27ee2f1f24dd0ad8"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a0629ec053fc013808a85178524e3cb63a61dbc35b22499870194a63578fb9"},
+    {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fbf15aff64a163db29a91ed0868af181d6f68ec1a3a7d5afcfe4501252840bad"},
+    {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:07924c1b938798797d60c6308fa8ad3b3f0201802f82e4a2c41bb3fafb44cc28"},
+    {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4a5a844f68776a7715ecb30843b453f07ac89bad393431efbf7accca3ef599c1"},
+    {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:518d2ca43c358929bf08f9079b617f1c2ca6e8848f83c1225c88caeac46e6cbc"},
+    {file = "rpds_py-0.20.1-cp38-none-win32.whl", hash = "sha256:3aea7eed3e55119635a74bbeb80b35e776bafccb70d97e8ff838816c124539f1"},
+    {file = "rpds_py-0.20.1-cp38-none-win_amd64.whl", hash = "sha256:7dca7081e9a0c3b6490a145593f6fe3173a94197f2cb9891183ef75e9d64c425"},
+    {file = "rpds_py-0.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b41b6321805c472f66990c2849e152aff7bc359eb92f781e3f606609eac877ad"},
+    {file = "rpds_py-0.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a90c373ea2975519b58dece25853dbcb9779b05cc46b4819cb1917e3b3215b6"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16d4477bcb9fbbd7b5b0e4a5d9b493e42026c0bf1f06f723a9353f5153e75d30"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84b8382a90539910b53a6307f7c35697bc7e6ffb25d9c1d4e998a13e842a5e83"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4888e117dd41b9d34194d9e31631af70d3d526efc363085e3089ab1a62c32ed1"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5265505b3d61a0f56618c9b941dc54dc334dc6e660f1592d112cd103d914a6db"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e75ba609dba23f2c95b776efb9dd3f0b78a76a151e96f96cc5b6b1b0004de66f"},
+    {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1791ff70bc975b098fe6ecf04356a10e9e2bd7dc21fa7351c1742fdeb9b4966f"},
+    {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d126b52e4a473d40232ec2052a8b232270ed1f8c9571aaf33f73a14cc298c24f"},
+    {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c14937af98c4cc362a1d4374806204dd51b1e12dded1ae30645c298e5a5c4cb1"},
+    {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3d089d0b88996df627693639d123c8158cff41c0651f646cd8fd292c7da90eaf"},
+    {file = "rpds_py-0.20.1-cp39-none-win32.whl", hash = "sha256:653647b8838cf83b2e7e6a0364f49af96deec64d2a6578324db58380cff82aca"},
+    {file = "rpds_py-0.20.1-cp39-none-win_amd64.whl", hash = "sha256:fa41a64ac5b08b292906e248549ab48b69c5428f3987b09689ab2441f267d04d"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a07ced2b22f0cf0b55a6a510078174c31b6d8544f3bc00c2bcee52b3d613f74"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:68cb0a499f2c4a088fd2f521453e22ed3527154136a855c62e148b7883b99f9a"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa3060d885657abc549b2a0f8e1b79699290e5d83845141717c6c90c2df38311"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95f3b65d2392e1c5cec27cff08fdc0080270d5a1a4b2ea1d51d5f4a2620ff08d"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cc3712a4b0b76a1d45a9302dd2f53ff339614b1c29603a911318f2357b04dd2"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d4eea0761e37485c9b81400437adb11c40e13ef513375bbd6973e34100aeb06"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f5179583d7a6cdb981151dd349786cbc318bab54963a192692d945dd3f6435d"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fbb0ffc754490aff6dabbf28064be47f0f9ca0b9755976f945214965b3ace7e"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:a94e52537a0e0a85429eda9e49f272ada715506d3b2431f64b8a3e34eb5f3e75"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:92b68b79c0da2a980b1c4197e56ac3dd0c8a149b4603747c4378914a68706979"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:93da1d3db08a827eda74356f9f58884adb254e59b6664f64cc04cdff2cc19b0d"},
+    {file = "rpds_py-0.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:754bbed1a4ca48479e9d4182a561d001bbf81543876cdded6f695ec3d465846b"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ca449520e7484534a2a44faf629362cae62b660601432d04c482283c47eaebab"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9c4cb04a16b0f199a8c9bf807269b2f63b7b5b11425e4a6bd44bd6961d28282c"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb63804105143c7e24cee7db89e37cb3f3941f8e80c4379a0b355c52a52b6780"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:55cd1fa4ecfa6d9f14fbd97ac24803e6f73e897c738f771a9fe038f2f11ff07c"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f8f741b6292c86059ed175d80eefa80997125b7c478fb8769fd9ac8943a16c0"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fc212779bf8411667234b3cdd34d53de6c2b8b8b958e1e12cb473a5f367c338"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ad56edabcdb428c2e33bbf24f255fe2b43253b7d13a2cdbf05de955217313e6"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a3a1e9ee9728b2c1734f65d6a1d376c6f2f6fdcc13bb007a08cc4b1ff576dc5"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e13de156137b7095442b288e72f33503a469aa1980ed856b43c353ac86390519"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:07f59760ef99f31422c49038964b31c4dfcfeb5d2384ebfc71058a7c9adae2d2"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:59240685e7da61fb78f65a9f07f8108e36a83317c53f7b276b4175dc44151684"},
+    {file = "rpds_py-0.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:83cba698cfb3c2c5a7c3c6bac12fe6c6a51aae69513726be6411076185a8b24a"},
+    {file = "rpds_py-0.20.1.tar.gz", hash = "sha256:e1791c4aabd117653530dccd24108fa03cc6baf21f58b950d0a73c3b3b29a350"},
 ]
 
 [[package]]
@@ -4311,13 +4311,13 @@ files = [
 
 [[package]]
 name = "tqdm"
-version = "4.66.5"
+version = "4.66.6"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"},
-    {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"},
+    {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"},
+    {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"},
 ]
 
 [package.dependencies]
@@ -4527,13 +4527,13 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "virtualenv"
-version = "20.27.0"
+version = "20.27.1"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.27.0-py3-none-any.whl", hash = "sha256:44a72c29cceb0ee08f300b314848c86e57bf8d1f13107a5e671fb9274138d655"},
-    {file = "virtualenv-20.27.0.tar.gz", hash = "sha256:2ca56a68ed615b8fe4326d11a0dca5dfbe8fd68510fb6c6349163bed3c15f2b2"},
+    {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"},
+    {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"},
 ]
 
 [package.dependencies]
diff --git a/pyproject.toml b/pyproject.toml
index f51a15bc1c5f0..13108181df806 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ name = "llama-index"
 packages = [{from = "_llama-index", include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.11.20"
+version = "0.11.21"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"