From cae78d5d7875b1ad98b62aebd16931f7e30377f7 Mon Sep 17 00:00:00 2001
From: Anoop Sharma <anoopsharma527@gmail.com>
Date: Wed, 9 Oct 2024 03:50:29 +0530
Subject: [PATCH] LLM Module Readme Update (#16389)

---
 docs/docs/examples/llm/cohere.ipynb           |   2 +-
 .../llms/llama-index-llms-anthropic/README.md | 190 +++++++++++++++++-
 .../llama-index-llms-anthropic/pyproject.toml |   2 +-
 .../llms/llama-index-llms-anyscale/README.md  |  91 +++++++++
 .../llama-index-llms-anyscale/pyproject.toml  |   2 +-
 .../llama-index-llms-azure-openai/README.md   | 111 ++++++++++
 .../pyproject.toml                            |   2 +-
 .../README.md                                 | 184 +++++++++++++++++
 .../pyproject.toml                            |   2 +-
 .../llms/llama-index-llms-bedrock/README.md   | 127 ++++++++++++
 .../llama-index-llms-bedrock/pyproject.toml   |   2 +-
 .../llms/llama-index-llms-clarifai/README.md  | 102 ++++++++++
 .../llama-index-llms-clarifai/pyproject.toml  |   2 +-
 .../llms/llama-index-llms-cohere/README.md    | 146 ++++++++++++++
 .../llama-index-llms-cohere/pyproject.toml    |   2 +-
 .../llms/llama-index-llms-dashscope/README.md |  93 +++++++++
 .../llama-index-llms-dashscope/pyproject.toml |   2 +-
 .../llms/llama-index-llms-everlyai/README.md  | 111 ++++++++++
 .../llama-index-llms-everlyai/pyproject.toml  |   2 +-
 .../llms/llama-index-llms-fireworks/README.md | 137 +++++++++++++
 .../llama-index-llms-fireworks/pyproject.toml |   2 +-
 .../llms/llama-index-llms-friendli/README.md  | 114 +++++++++++
 .../llama-index-llms-friendli/pyproject.toml  |   2 +-
 .../llms/llama-index-llms-gemini/README.md    | 127 ++++++++++++
 .../llama-index-llms-gemini/pyproject.toml    |   2 +-
 .../llama-index-llms-huggingface/README.md    |  91 +++++++++
 .../pyproject.toml                            |   2 +-
 .../llms/llama-index-llms-konko/README.md     |  88 ++++++++
 .../llama-index-llms-konko/pyproject.toml     |   2 +-
 .../llms/llama-index-llms-langchain/README.md |  39 ++++
 .../llama-index-llms-langchain/pyproject.toml |   2 +-
 .../llms/llama-index-llms-litellm/README.md   | 100 +++++++++
 .../llama-index-llms-litellm/pyproject.toml   |   2 +-
 .../llms/llama-index-llms-llama-api/README.md | 156 ++++++++++++++
 .../llama-index-llms-llama-api/pyproject.toml |   2 +-
 .../llms/llama-index-llms-llama-cpp/README.md | 109 ++++++++++
 .../llama-index-llms-llama-cpp/pyproject.toml |   2 +-
 .../llms/llama-index-llms-llamafile/README.md | 106 ++++++++++
 .../llama-index-llms-llamafile/pyproject.toml |   2 +-
 .../llms/llama-index-llms-mistralai/README.md | 125 ++++++++++++
 .../llama-index-llms-mistralai/pyproject.toml |   2 +-
 .../llama-index-llms-modelscope/README.md     |  51 +++++
 .../pyproject.toml                            |   2 +-
 .../llms/llama-index-llms-mymagic/README.md   |  76 +++++++
 .../llama-index-llms-mymagic/pyproject.toml   |   2 +-
 .../llms/llama-index-llms-neutrino/README.md  |  85 ++++++++
 .../llama-index-llms-neutrino/pyproject.toml  |   2 +-
 .../llms/llama-index-llms-ollama/README.md    | 132 ++++++++++++
 .../llama-index-llms-ollama/pyproject.toml    |   2 +-
 .../llms/llama-index-llms-openai/README.md    | 130 ++++++++++++
 .../llms/llama-index-llms-openllm/README.md   |  81 ++++++++
 .../llama-index-llms-openllm/pyproject.toml   |   2 +-
 .../llama-index-llms-openrouter/README.md     |  81 ++++++++
 .../pyproject.toml                            |   2 +-
 .../llms/llama-index-llms-openvino/README.md  | 141 +++++++++++++
 .../llama-index-llms-openvino/pyproject.toml  |   2 +-
 .../llama-index-llms-optimum-intel/README.md  |  98 +++++++++
 .../pyproject.toml                            |   2 +-
 .../llms/llama-index-llms-palm/README.md      |  70 +++++++
 .../llms/llama-index-llms-palm/pyproject.toml |   2 +-
 .../llama-index-llms-perplexity/README.md     |  85 ++++++++
 .../pyproject.toml                            |   2 +-
 62 files changed, 3407 insertions(+), 32 deletions(-)

diff --git a/docs/docs/examples/llm/cohere.ipynb b/docs/docs/examples/llm/cohere.ipynb
index 57deb036579a1..c7cfcfe13d4ed 100644
--- a/docs/docs/examples/llm/cohere.ipynb
+++ b/docs/docs/examples/llm/cohere.ipynb
@@ -176,7 +176,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.llms.openai import OpenAI\n",
+    "from llama_index.llms.cohere import Cohere\n",
     "\n",
     "llm = Cohere(api_key=api_key)\n",
     "resp = llm.stream_complete(\"Paul Graham is \")"
diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/README.md b/llama-index-integrations/llms/llama-index-llms-anthropic/README.md
index 9d84428b146ef..8199b08deb255 100644
--- a/llama-index-integrations/llms/llama-index-llms-anthropic/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-anthropic/README.md
@@ -1 +1,189 @@
-# LlamaIndex Llms Integration: Anthropic
+# LlamaIndex LLM Integration: Anthropic
+
+Anthropic is an AI research company focused on developing advanced language models, notably the Claude series. Their flagship model, Claude, is designed to generate human-like text while prioritizing safety and alignment with human intentions. Anthropic aims to create AI systems that are not only powerful but also responsible, addressing potential risks associated with artificial intelligence.
+
+### Installation
+
+```sh
+%pip install llama-index-llms-anthropic
+!pip install llama-index
+```
+
+```
+# Set Tokenizer
+# First we want to set the tokenizer, which is slightly different than TikToken.
+# NOTE: The Claude 3 tokenizer has not been updated yet; using the existing Anthropic tokenizer leads
+# to context overflow errors for 200k tokens. We've temporarily set the max tokens for Claude 3 to 180k.
+```
+
+### Basic Usage
+
+```py
+from llama_index.llms.anthropic import Anthropic
+from llama_index.core import Settings
+
+tokenizer = Anthropic().tokenizer
+Settings.tokenizer = tokenizer
+
+# Call complete with a prompt
+import os
+
+os.environ["ANTHROPIC_API_KEY"] = "YOUR ANTHROPIC API KEY"
+from llama_index.llms.anthropic import Anthropic
+
+# To customize your API key, do this
+# otherwise it will lookup ANTHROPIC_API_KEY from your env variable
+# llm = Anthropic(api_key="<api_key>")
+llm = Anthropic(model="claude-3-opus-20240229")
+
+resp = llm.complete("Paul Graham is ")
+print(resp)
+
+# Sample response
+# Paul Graham is a well-known entrepreneur, programmer, venture capitalist, and essayist.
+# He is best known for co-founding Viaweb, one of the first web application companies, which was later
+# sold to Yahoo! in 1998 and became Yahoo! Store. Graham is also the co-founder of Y Combinator, a highly
+# successful startup accelerator that has helped launch numerous successful companies, such as Dropbox,
+# Airbnb, and Reddit.
+```
+
+### Using Anthropic model through Vertex AI
+
+```py
+import os
+
+os.environ["ANTHROPIC_PROJECT_ID"] = "YOUR PROJECT ID HERE"
+os.environ["ANTHROPIC_REGION"] = "YOUR PROJECT REGION HERE"
+# Set region and project_id to make Anthropic use the Vertex AI client
+
+llm = Anthropic(
+    model="claude-3-5-sonnet@20240620",
+    region=os.getenv("ANTHROPIC_REGION"),
+    project_id=os.getenv("ANTHROPIC_PROJECT_ID"),
+)
+
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### Chat example with a list of messages
+
+```py
+from llama_index.core.llms import ChatMessage
+from llama_index.llms.anthropic import Anthropic
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+resp = Anthropic(model="claude-3-opus-20240229").chat(messages)
+print(resp)
+```
+
+### Streaming example
+
+```py
+from llama_index.llms.anthropic import Anthropic
+
+llm = Anthropic(model="claude-3-opus-20240229", max_tokens=100)
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Chat streaming with pirate story
+
+```py
+llm = Anthropic(model="claude-3-opus-20240229")
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Configure Model
+
+```py
+from llama_index.llms.anthropic import Anthropic
+
+llm = Anthropic(model="claude-3-sonnet-20240229")
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Async completion
+
+```py
+from llama_index.llms.anthropic import Anthropic
+
+llm = Anthropic("claude-3-sonnet-20240229")
+resp = await llm.acomplete("Paul Graham is ")
+print(resp)
+```
+
+### Structured Prediction Example
+
+```py
+from llama_index.llms.anthropic import Anthropic
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.bridge.pydantic import BaseModel
+from typing import List
+
+
+class MenuItem(BaseModel):
+    """A menu item in a restaurant."""
+
+    course_name: str
+    is_vegetarian: bool
+
+
+class Restaurant(BaseModel):
+    """A restaurant with name, city, and cuisine."""
+
+    name: str
+    city: str
+    cuisine: str
+    menu_items: List[MenuItem]
+
+
+llm = Anthropic("claude-3-5-sonnet-20240620")
+prompt_tmpl = PromptTemplate(
+    "Generate a restaurant in a given city {city_name}"
+)
+
+# Option 1: Use `as_structured_llm`
+restaurant_obj = (
+    llm.as_structured_llm(Restaurant)
+    .complete(prompt_tmpl.format(city_name="Miami"))
+    .raw
+)
+print(restaurant_obj)
+
+# Option 2: Use `structured_predict`
+# restaurant_obj = llm.structured_predict(Restaurant, prompt_tmpl, city_name="Miami")
+
+# Streaming Structured Prediction
+from llama_index.core.llms import ChatMessage
+from IPython.display import clear_output
+from pprint import pprint
+
+input_msg = ChatMessage.from_str("Generate a restaurant in San Francisco")
+
+sllm = llm.as_structured_llm(Restaurant)
+stream_output = sllm.stream_chat([input_msg])
+for partial_output in stream_output:
+    clear_output(wait=True)
+    pprint(partial_output.raw.dict())
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/anthropic/
diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
index 8c1871fec9eaa..6be5d87942ff4 100644
--- a/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-anthropic"
 readme = "README.md"
-version = "0.3.4"
+version = "0.3.5"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-anyscale/README.md b/llama-index-integrations/llms/llama-index-llms-anyscale/README.md
index cce5f17894316..28ca1312e336c 100644
--- a/llama-index-integrations/llms/llama-index-llms-anyscale/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-anyscale/README.md
@@ -1 +1,92 @@
 # LlamaIndex Llms Integration: Anyscale
+
+### Installation
+
+```bash
+%pip install llama-index-llms-anyscale
+!pip install llama-index
+```
+
+### Basic Usage
+
+```py
+from llama_index.llms.anyscale import Anyscale
+from llama_index.core.llms import ChatMessage
+
+# Call chat with ChatMessage List
+# You need to either set env var ANYSCALE_API_KEY or set api_key in the class constructor
+
+# Example of setting API key through environment variable
+# import os
+# os.environ['ANYSCALE_API_KEY'] = '<your-api-key>'
+
+# Initialize the Anyscale LLM with your API key
+llm = Anyscale(api_key="<your-api-key>")
+
+# Chat Example
+message = ChatMessage(role="user", content="Tell me a joke")
+resp = llm.chat([message])
+print(resp)
+
+# Expected Output:
+# assistant: Sure, here's a joke for you:
+#
+# Why couldn't the bicycle stand up by itself?
+#
+# Because it was two-tired!
+#
+# I hope that brought a smile to your face! Is there anything else I can assist you with?
+```
+
+### Streaming Example
+
+```py
+message = ChatMessage(role="user", content="Tell me a story in 250 words")
+resp = llm.stream_chat([message])
+for r in resp:
+    print(r.delta, end="")
+
+# Output Example:
+# Once upon a time, there was a young girl named Maria who lived in a small village surrounded by lush green forests.
+# Maria was a kind and gentle soul, loved by everyone in the village. She spent most of her days exploring the forests,
+# discovering new species of plants and animals, and helping the villagers with their daily chores...
+# (Story continues until it reaches the word limit.)
+```
+
+### Completion Example
+
+```py
+resp = llm.complete("Tell me a joke")
+print(resp)
+
+# Expected Output:
+# assistant: Sure, here's a joke for you:
+#
+# Why couldn't the bicycle stand up by itself?
+#
+# Because it was two-tired!
+```
+
+### Streaming Completion Example
+
+```py
+resp = llm.stream_complete("Tell me a story in 250 words")
+for r in resp:
+    print(r.delta, end="")
+
+# Example Output:
+# Once upon a time, there was a young girl named Maria who lived in a small village...
+# (Stream continues as the story is generated.)
+```
+
+### Model Configuration
+
+```py
+llm = Anyscale(model="codellama/CodeLlama-34b-Instruct-hf")
+resp = llm.complete("Show me the c++ code to send requests to HTTP Server")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/anyscale/
diff --git a/llama-index-integrations/llms/llama-index-llms-anyscale/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-anyscale/pyproject.toml
index 72ac739bb1d5c..d009637190e8b 100644
--- a/llama-index-integrations/llms/llama-index-llms-anyscale/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-anyscale/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-anyscale"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-azure-openai/README.md b/llama-index-integrations/llms/llama-index-llms-azure-openai/README.md
index bd3c00cbca342..c825e0667a16d 100644
--- a/llama-index-integrations/llms/llama-index-llms-azure-openai/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-azure-openai/README.md
@@ -1 +1,112 @@
 # LlamaIndex Llms Integration: Azure Openai
+
+### Installation
+
+```bash
+%pip install llama-index-llms-azure-openai
+!pip install llama-index
+```
+
+### Prerequisites
+
+Follow this to setup your Azure account: [Setup Azure account](https://docs.llamaindex.ai/en/stable/examples/llm/azure_openai/#prerequisites)
+
+### Set the environment variables
+
+```py
+OPENAI_API_VERSION = "2023-07-01-preview"
+AZURE_OPENAI_ENDPOINT = "https://YOUR_RESOURCE_NAME.openai.azure.com/"
+OPENAI_API_KEY = "<your-api-key>"
+
+import os
+
+os.environ["OPENAI_API_KEY"] = "<your-api-key>"
+os.environ[
+    "AZURE_OPENAI_ENDPOINT"
+] = "https://<your-resource-name>.openai.azure.com/"
+os.environ["OPENAI_API_VERSION"] = "2023-07-01-preview"
+
+# Use your LLM
+from llama_index.llms.azure_openai import AzureOpenAI
+
+# Unlike normal OpenAI, you need to pass an engine argument in addition to model.
+# The engine is the name of your model deployment you selected in Azure OpenAI Studio.
+
+llm = AzureOpenAI(
+    engine="simon-llm", model="gpt-35-turbo-16k", temperature=0.0
+)
+
+# Alternatively, you can also skip setting environment variables, and pass the parameters in directly via constructor.
+llm = AzureOpenAI(
+    engine="my-custom-llm",
+    model="gpt-35-turbo-16k",
+    temperature=0.0,
+    azure_endpoint="https://<your-resource-name>.openai.azure.com/",
+    api_key="<your-api-key>",
+    api_version="2023-07-01-preview",
+)
+
+# Use the complete endpoint for text completion
+response = llm.complete("The sky is a beautiful blue and")
+print(response)
+
+# Expected Output:
+# the sun is shining brightly. Fluffy white clouds float lazily across the sky,
+# creating a picturesque scene. The vibrant blue color of the sky brings a sense
+# of calm and tranquility...
+```
+
+### Streaming completion
+
+```py
+response = llm.stream_complete("The sky is a beautiful blue and")
+for r in response:
+    print(r.delta, end="")
+
+# Expected Output (Stream):
+# the sun is shining brightly. Fluffy white clouds float lazily across the sky,
+# creating a picturesque scene. The vibrant blue color of the sky brings a sense
+# of calm and tranquility...
+
+# Use the chat endpoint for conversation
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="Hello"),
+]
+
+response = llm.chat(messages)
+print(response)
+
+# Expected Output:
+# assistant: Ahoy there, matey! How be ye on this fine day? I be Captain Jolly Roger,
+# the most colorful pirate ye ever did lay eyes on! What brings ye to me ship?
+```
+
+### Streaming chat
+
+```py
+response = llm.stream_chat(messages)
+for r in response:
+    print(r.delta, end="")
+
+# Expected Output (Stream):
+# Ahoy there, matey! How be ye on this fine day? I be Captain Jolly Roger,
+# the most colorful pirate ye ever did lay eyes on! What brings ye to me ship?
+
+# Rather than adding the same parameters to each chat or completion call,
+# you can set them at a per-instance level with additional_kwargs.
+llm = AzureOpenAI(
+    engine="simon-llm",
+    model="gpt-35-turbo-16k",
+    temperature=0.0,
+    additional_kwargs={"user": "your_user_id"},
+)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/azure_openai/
diff --git a/llama-index-integrations/llms/llama-index-llms-azure-openai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-azure-openai/pyproject.toml
index 10304ae111170..f7676ef658f89 100644
--- a/llama-index-integrations/llms/llama-index-llms-azure-openai/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-azure-openai/pyproject.toml
@@ -29,7 +29,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-azure-openai"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/README.md b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/README.md
index 4907ca0cd6c67..d249a64bbc55d 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/README.md
@@ -1 +1,185 @@
 # LlamaIndex Llms Integration: Bedrock Converse
+
+### Installation
+
+```bash
+%pip install llama-index-llms-bedrock-converse
+!pip install llama-index
+```
+
+### Usage
+
+```py
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+# Set your AWS profile name
+profile_name = "Your aws profile name"
+
+# Simple completion call
+resp = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+).complete("Paul Graham is ")
+print(resp)
+```
+
+### Call chat with a list of messages
+
+```py
+from llama_index.core.llms import ChatMessage
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+
+resp = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+).chat(messages)
+print(resp)
+```
+
+### Streaming
+
+```py
+# Using stream_complete endpoint
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+)
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+
+# Using stream_chat endpoint
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+)
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Configure Model
+
+```py
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+)
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### Connect to Bedrock with Access Keys
+
+```py
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    aws_access_key_id="AWS Access Key ID to use",
+    aws_secret_access_key="AWS Secret Access Key to use",
+    aws_session_token="AWS Session Token to use",
+    region_name="AWS Region to use, eg. us-east-1",
+)
+
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### Function Calling
+
+```py
+# Claude, Command, and Mistral Large models support native function calling through AWS Bedrock Converse.
+# There is seamless integration with LlamaIndex tools through the predict_and_call function on the LLM.
+
+from llama_index.llms.bedrock_converse import BedrockConverse
+from llama_index.core.tools import FunctionTool
+
+
+# Define some functions
+def multiply(a: int, b: int) -> int:
+    """Multiply two integers and return the result"""
+    return a * b
+
+
+def mystery(a: int, b: int) -> int:
+    """Mystery function on two integers."""
+    return a * b + a + b
+
+
+# Create tools from functions
+mystery_tool = FunctionTool.from_defaults(fn=mystery)
+multiply_tool = FunctionTool.from_defaults(fn=multiply)
+
+# Instantiate the BedrockConverse model
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    profile_name=profile_name,
+)
+
+# Use function tools with the LLM
+response = llm.predict_and_call(
+    [mystery_tool, multiply_tool],
+    user_msg="What happens if I run the mystery function on 5 and 7",
+)
+print(str(response))
+
+response = llm.predict_and_call(
+    [mystery_tool, multiply_tool],
+    user_msg=(
+        """What happens if I run the mystery function on the following pairs of numbers?
+        Generate a separate result for each row:
+        - 1 and 2
+        - 8 and 4
+        - 100 and 20
+
+        NOTE: you need to run the mystery function for all of the pairs above at the same time"""
+    ),
+    allow_parallel_tool_calls=True,
+)
+print(str(response))
+
+for s in response.sources:
+    print(f"Name: {s.tool_name}, Input: {s.raw_input}, Output: {str(s)}")
+```
+
+### Async usage
+
+```py
+from llama_index.llms.bedrock_converse import BedrockConverse
+
+llm = BedrockConverse(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    aws_access_key_id="AWS Access Key ID to use",
+    aws_secret_access_key="AWS Secret Access Key to use",
+    aws_session_token="AWS Session Token to use",
+    region_name="AWS Region to use, eg. us-east-1",
+)
+
+# Use async complete
+resp = await llm.acomplete("Paul Graham is ")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
index a31cee2b5a6c9..5ba5cbc2de88c 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock-converse/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-bedrock-converse"
 readme = "README.md"
-version = "0.3.1"
+version = "0.3.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/README.md b/llama-index-integrations/llms/llama-index-llms-bedrock/README.md
index 6d9be7684ff9a..d4129b6cb5560 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/README.md
@@ -1 +1,128 @@
 # LlamaIndex Llms Integration: Bedrock
+
+### Installation
+
+```bash
+%pip install llama-index-llms-bedrock
+!pip install llama-index
+```
+
+### Basic Usage
+
+```py
+from llama_index.llms.bedrock import Bedrock
+
+# Set your AWS profile name
+profile_name = "Your aws profile name"
+
+# Simple completion call
+resp = Bedrock(
+    model="amazon.titan-text-express-v1", profile_name=profile_name
+).complete("Paul Graham is ")
+print(resp)
+
+# Expected output:
+# Paul Graham is a computer scientist and entrepreneur, best known for co-founding
+# the Silicon Valley startup incubator Y Combinator. He is also a prominent writer
+# and speaker on technology and business topics...
+```
+
+### Call chat with a list of messages
+
+```py
+from llama_index.core.llms import ChatMessage
+from llama_index.llms.bedrock import Bedrock
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+
+resp = Bedrock(
+    model="amazon.titan-text-express-v1", profile_name=profile_name
+).chat(messages)
+print(resp)
+
+# Expected output:
+# assistant: Alright, matey! Here's a story for you: Once upon a time, there was a pirate
+# named Captain Jack Sparrow who sailed the seas in search of his next adventure...
+```
+
+### Streaming
+
+#### Using stream_complete endpoint
+
+```py
+from llama_index.llms.bedrock import Bedrock
+
+llm = Bedrock(model="amazon.titan-text-express-v1", profile_name=profile_name)
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+
+# Expected Output (Stream):
+# Paul Graham is a computer programmer, entrepreneur, investor, and writer, best known
+# for co-founding the internet firm Y Combinator...
+```
+
+### Streaming chat
+
+```py
+from llama_index.llms.bedrock import Bedrock
+
+llm = Bedrock(model="amazon.titan-text-express-v1", profile_name=profile_name)
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+
+# Expected Output (Stream):
+# Once upon a time, there was a pirate with a colorful personality who sailed the
+# high seas in search of adventure...
+```
+
+### Configure Model
+
+```py
+from llama_index.llms.bedrock import Bedrock
+
+llm = Bedrock(model="amazon.titan-text-express-v1", profile_name=profile_name)
+resp = llm.complete("Paul Graham is ")
+print(resp)
+
+# Expected Output:
+# Paul Graham is a computer scientist, entrepreneur, investor, and writer. He co-founded
+# Viaweb, the first commercial web browser...
+```
+
+### Connect to Bedrock with Access Keys
+
+```py
+from llama_index.llms.bedrock import Bedrock
+
+llm = Bedrock(
+    model="amazon.titan-text-express-v1",
+    aws_access_key_id="AWS Access Key ID to use",
+    aws_secret_access_key="AWS Secret Access Key to use",
+    aws_session_token="AWS Session Token to use",
+    region_name="AWS Region to use, e.g. us-east-1",
+)
+
+resp = llm.complete("Paul Graham is ")
+print(resp)
+
+# Expected Output:
+# Paul Graham is an American computer scientist, entrepreneur, investor, and author,
+# best known for co-founding Viaweb, the first commercial web browser...
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/bedrock/
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
index d2efb18d4b249..ab002a1ae7696 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-bedrock"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-clarifai/README.md b/llama-index-integrations/llms/llama-index-llms-clarifai/README.md
index a4b2735c5cd86..96669aeb2fa52 100644
--- a/llama-index-integrations/llms/llama-index-llms-clarifai/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-clarifai/README.md
@@ -1 +1,103 @@
 # LlamaIndex Llms Integration: Clarifai
+
+### Installation
+
+```bash
+%pip install llama-index-llms-clarifai
+!pip install llama-index
+!pip install clarifai
+```
+
+### Basic Usage
+
+```py
+# Set Clarifai PAT as an environment variable
+import os
+
+os.environ["CLARIFAI_PAT"] = "<YOUR CLARIFAI PAT>"
+
+# Import Clarifai package
+from llama_index.llms.clarifai import Clarifai
+
+# Example parameters
+params = dict(
+    user_id="clarifai",
+    app_id="ml",
+    model_name="llama2-7b-alternative-4k",
+    model_url="https://clarifai.com/clarifai/ml/models/llama2-7b-alternative-4k",
+)
+
+# Initialize the LLM
+# Method:1 using model_url parameter
+llm_model = Clarifai(model_url=params["model_url"])
+
+# Method:2 using model_name, app_id & user_id parameters
+llm_model = Clarifai(
+    model_name=params["model_name"],
+    app_id=params["app_id"],
+    user_id=params["user_id"],
+)
+
+# Call complete function
+llm_response = llm_model.complete(
+    prompt="write a 10 line rhyming poem about science"
+)
+print(llm_response)
+
+# Output
+# Science is fun, it's true!
+# From atoms to galaxies, it's all new!
+# With experiments and tests, we learn so fast,
+# And discoveries come from the past.
+# It helps us understand the world around,
+# And makes our lives more profound.
+# So let's embrace this wondrous art,
+# And see where it takes us in the start!
+```
+
+### Call chat function
+
+```py
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(role="user", content="write about climate change in 50 lines")
+]
+response = llm_model.chat(messages)
+print(response)
+
+# Output
+# user: or less.
+# Climate change is a serious threat to our planet and its inhabitants.
+# Rising temperatures are causing extreme weather events, such as hurricanes,
+# droughts, and wildfires. Sea levels are rising, threatening coastal
+# communities and ecosystems. The melting of polar ice caps is disrupting
+# global navigation and commerce. Climate change is also exacerbating air
+# pollution, which can lead to respiratory problems and other health issues.
+# It's essential that we take action now to reduce greenhouse gas emissions
+# and transition to renewable energy sources to mitigate the worst effects
+# of climate change.
+```
+
+### Using Inference parameters
+
+```py
+# Alternatively, you can call models with inference parameters.
+
+# Here is an inference parameter example for GPT model.
+inference_params = dict(temperature=str(0.3), max_tokens=20)
+
+llm_response = llm_model.complete(
+    prompt="What is nuclear fission and fusion?",
+    inference_params=inference_params,
+)
+
+messages = [ChatMessage(role="user", content="Explain about the big bang")]
+response = llm_model.chat(messages, inference_params=inference_params)
+
+print(response)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/clarifai/
diff --git a/llama-index-integrations/llms/llama-index-llms-clarifai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-clarifai/pyproject.toml
index a4d1fbc3ebb2d..aef4201902a0b 100644
--- a/llama-index-integrations/llms/llama-index-llms-clarifai/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-clarifai/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-clarifai"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/README.md b/llama-index-integrations/llms/llama-index-llms-cohere/README.md
index d3ee536fe7ac8..afa7ded18d15c 100644
--- a/llama-index-integrations/llms/llama-index-llms-cohere/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-cohere/README.md
@@ -1 +1,147 @@
 # LlamaIndex Llms Integration: Cohere
+
+### Installation
+
+```bash
+%pip install llama-index-llms-openai
+%pip install llama-index-llms-cohere
+!pip install llama-index
+```
+
+### Basic usage
+
+```py
+# Import Cohere
+from llama_index.llms.cohere import Cohere
+
+# Set your API key
+api_key = "Your api key"
+
+# Call complete function
+resp = Cohere(api_key=api_key).complete("Paul Graham is ")
+# Note: Your text contains a trailing whitespace, which has been trimmed to ensure high quality generations.
+print(resp)
+
+# Output
+# an English computer scientist, entrepreneur and investor.
+# He is best known for his work as a co-founder of the seed accelerator Y Combinator.
+# He is also the author of the free startup advice blog "Startups.com".
+# Paul Graham is known for his philanthropic efforts.
+# Has given away hundreds of millions of dollars to good causes.
+
+# Call chat with a list of messages
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(role="user", content="hello there"),
+    ChatMessage(
+        role="assistant", content="Arrrr, matey! How can I help ye today?"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+
+resp = Cohere(api_key=api_key).chat(
+    messages, preamble_override="You are a pirate with a colorful personality"
+)
+print(resp)
+
+# Output
+# assistant: Traditionally, ye refers to gender-nonconforming people of any gender,
+# and those who are genderless, whereas matey refers to a friend, commonly used to
+# address a fellow pirate. According to pop culture in works like "Pirates of the
+# Caribbean", the romantic interest of Jack Sparrow refers to themselves using the
+# gender-neutral pronoun "ye".
+
+# Are you interested in learning more about the pirate culture?
+```
+
+### Streaming: Using stream_complete endpoint
+
+```py
+from llama_index.llms.cohere import Cohere
+
+llm = Cohere(api_key=api_key)
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+
+# Output
+# an English computer scientist, essayist, and venture capitalist.
+# He is best known for his work as a co-founder of the Y Combinator startup incubator,
+# and his essays, which are widely read and influential in the startup community.
+
+# Using stream_chat endpoint
+messages = [
+    ChatMessage(role="user", content="hello there"),
+    ChatMessage(
+        role="assistant", content="Arrrr, matey! How can I help ye today?"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+
+resp = llm.stream_chat(
+    messages, preamble_override="You are a pirate with a colorful personality"
+)
+for r in resp:
+    print(r.delta, end="")
+
+# Output
+# Arrrr, matey! According to etiquette, we are suppose to exchange names first!
+# Mine remains a mystery for now.
+```
+
+### Configure Model
+
+```py
+llm = Cohere(model="command", api_key=api_key)
+resp = llm.complete("Paul Graham is ")
+# Note: Your text contains a trailing whitespace, which has been trimmed to ensure high quality generations.
+print(resp)
+
+# Output
+# an English computer scientist, entrepreneur and investor.
+# He is best known for his work as a co-founder of the seed accelerator Y Combinator.
+# He is also the co-founder of the online dating platform Match.com.
+
+# Async calls
+llm = Cohere(model="command", api_key=api_key)
+resp = await llm.acomplete("Paul Graham is ")
+# Note: Your text contains a trailing whitespace, which has been trimmed to ensure high quality generations.
+print(resp)
+
+# Output
+# an English computer scientist, entrepreneur and investor.
+# He is best known for his work as a co-founder of the startup incubator and seed fund
+# Y Combinator, and the programming language Lisp. He has also written numerous essays,
+# many of which have become highly influential in the software engineering field.
+
+# Streaming async
+resp = await llm.astream_complete("Paul Graham is ")
+async for delta in resp:
+    print(delta.delta, end="")
+
+# Output
+# an English computer scientist, essayist, and businessman.
+# He is best known for his work as a co-founder of the startup accelerator Y Combinator,
+# and his essay "Beating the Averages."
+```
+
+### Set API Key at a per-instance level
+
+```py
+# If desired, you can have separate LLM instances use separate API keys.
+from llama_index.llms.cohere import Cohere
+
+llm_good = Cohere(api_key=api_key)
+llm_bad = Cohere(model="command", api_key="BAD_KEY")
+
+resp = llm_good.complete("Paul Graham is ")
+print(resp)
+
+resp = llm_bad.complete("Paul Graham is ")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/cohere/
diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
index a7ce089672c78..ff35a822318d7 100644
--- a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-cohere"
 readme = "README.md"
-version = "0.3.0"
+version = "0.3.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/README.md b/llama-index-integrations/llms/llama-index-llms-dashscope/README.md
index 09efd45a0a2af..9301d9ab2d8a2 100644
--- a/llama-index-integrations/llms/llama-index-llms-dashscope/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/README.md
@@ -1 +1,94 @@
 # LlamaIndex Llms Integration: Dashscope
+
+## Installation
+
+1. Install the required Python package:
+
+   ```bash
+   pip install llama-index-llms-dashscope
+   ```
+
+2. Set the DashScope API key as an environment variable:
+
+   ```bash
+   export DASHSCOPE_API_KEY=YOUR_DASHSCOPE_API_KEY
+   ```
+
+   Alternatively, you can set it in your Python script:
+
+   ```python
+   import os
+
+   os.environ["DASHSCOPE_API_KEY"] = "YOUR_DASHSCOPE_API_KEY"
+   ```
+
+## Usage
+
+### Basic Recipe Generation
+
+To generate a basic vanilla cake recipe:
+
+```python
+from llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
+
+# Initialize DashScope object
+dashscope_llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
+
+# Generate a vanilla cake recipe
+resp = dashscope_llm.complete("How to make cake?")
+print(resp)
+```
+
+### Streaming Recipe Responses
+
+For real-time streamed responses:
+
+```python
+responses = dashscope_llm.stream_complete("How to make cake?")
+for response in responses:
+    print(response.delta, end="")
+```
+
+### Multi-Round Conversation
+
+To have a conversation with the assistant and ask for a sugar-free cake recipe:
+
+```python
+from llama_index.core.base.llms.types import MessageRole, ChatMessage
+
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM, content="You are a helpful assistant."
+    ),
+    ChatMessage(role=MessageRole.USER, content="How to make cake?"),
+]
+
+# Get first round response
+resp = dashscope_llm.chat(messages)
+print(resp)
+
+# Continue conversation
+messages.append(
+    ChatMessage(role=MessageRole.ASSISTANT, content=resp.message.content)
+)
+messages.append(
+    ChatMessage(role=MessageRole.USER, content="How to make it without sugar?")
+)
+
+# Get second round response
+resp = dashscope_llm.chat(messages)
+print(resp)
+```
+
+### Handling Sugar-Free Recipes
+
+For sugar-free cake recipes using honey as a sweetener:
+
+```python
+resp = dashscope_llm.complete("How to make cake without sugar?")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/dashscope/
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
index 2aea283c6583e..74f3e79a1bfc6 100644
--- a/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-dashscope"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-everlyai/README.md b/llama-index-integrations/llms/llama-index-llms-everlyai/README.md
index 508b894ca772f..c99b2610a7e8a 100644
--- a/llama-index-integrations/llms/llama-index-llms-everlyai/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-everlyai/README.md
@@ -1 +1,112 @@
 # LlamaIndex Llms Integration: Everlyai
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-everlyai
+   !pip install llama-index
+   ```
+
+2. Set the EverlyAI API key as an environment variable or pass it directly to the constructor:
+
+   ```python
+   import os
+
+   os.environ["EVERLYAI_API_KEY"] = "<your-api-key>"
+   ```
+
+   Or use it directly in your Python code:
+
+   ```python
+   llm = EverlyAI(api_key="your-api-key")
+   ```
+
+## Usage
+
+### Basic Chat
+
+To send a message and get a response (e.g., a joke):
+
+```python
+from llama_index.llms.everlyai import EverlyAI
+from llama_index.core.llms import ChatMessage
+
+# Initialize EverlyAI with API key
+llm = EverlyAI(api_key="your-api-key")
+
+# Create a message
+message = ChatMessage(role="user", content="Tell me a joke")
+
+# Call the chat method
+resp = llm.chat([message])
+print(resp)
+```
+
+Example output:
+
+```
+Why don't scientists trust atoms?
+Because they make up everything!
+```
+
+### Streamed Chat
+
+To stream a response for more dynamic conversations (e.g., storytelling):
+
+```python
+message = ChatMessage(role="user", content="Tell me a story in 250 words")
+resp = llm.stream_chat([message])
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+Example output (partial):
+
+```
+As the sun set over the horizon, a young girl named Lily sat on the beach, watching the waves roll in...
+```
+
+### Complete Tasks
+
+To use the `complete` method for simpler tasks like telling a joke:
+
+```python
+resp = llm.complete("Tell me a joke")
+print(resp)
+```
+
+Example output:
+
+```
+Why don't scientists trust atoms?
+Because they make up everything!
+```
+
+### Streamed Completion
+
+For generating responses like stories using `stream_complete`:
+
+```python
+resp = llm.stream_complete("Tell me a story in 250 words")
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+Example output (partial):
+
+```
+As the sun set over the horizon, a young girl named Maria sat on the beach, watching the waves roll in...
+```
+
+## Notes
+
+- Ensure the API key is set correctly before making any requests.
+- The `stream_chat` and `stream_complete` methods allow for real-time response streaming, making them ideal for dynamic and lengthy outputs like stories.
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/everlyai/
diff --git a/llama-index-integrations/llms/llama-index-llms-everlyai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-everlyai/pyproject.toml
index e8be4170950b3..4d5623763b2fa 100644
--- a/llama-index-integrations/llms/llama-index-llms-everlyai/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-everlyai/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-everlyai"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-fireworks/README.md b/llama-index-integrations/llms/llama-index-llms-fireworks/README.md
index c11776423793b..3ba7745304e7e 100644
--- a/llama-index-integrations/llms/llama-index-llms-fireworks/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-fireworks/README.md
@@ -1 +1,138 @@
 # LlamaIndex Llms Integration: Fireworks
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-fireworks
+   %pip install llama-index
+   ```
+
+2. Set the Fireworks API key as an environment variable or pass it directly to the class constructor.
+
+## Usage
+
+### Basic Completion
+
+To generate a simple completion, use the `complete` method:
+
+```python
+from llama_index.llms.fireworks import Fireworks
+
+resp = Fireworks().complete("Paul Graham is ")
+print(resp)
+```
+
+Example output:
+
+```
+Paul Graham is a well-known essayist, programmer, and startup entrepreneur. He co-founded Y Combinator, which supported startups like Dropbox, Airbnb, and Reddit.
+```
+
+### Basic Chat
+
+To simulate a chat with multiple messages:
+
+```python
+from llama_index.core.llms import ChatMessage
+from llama_index.llms.fireworks import Fireworks
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+resp = Fireworks().chat(messages)
+print(resp)
+```
+
+Example output:
+
+```
+Arr matey, ye be askin' for me name? Well, I be known as Captain Redbeard the Terrible!
+```
+
+### Streaming Completion
+
+To stream a response in real-time using `stream_complete`:
+
+```python
+from llama_index.llms.fireworks import Fireworks
+
+llm = Fireworks()
+resp = llm.stream_complete("Paul Graham is ")
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+Example output (partial):
+
+```
+Paul Graham is a well-known essayist, programmer, and venture capitalist...
+```
+
+### Streaming Chat
+
+For a streamed conversation, use `stream_chat`:
+
+```python
+from llama_index.llms.fireworks import Fireworks
+from llama_index.core.llms import ChatMessage
+
+llm = Fireworks()
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+resp = llm.stream_chat(messages)
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+Example output (partial):
+
+```
+Arr matey, ye be askin' for me name? Well, I be known as Captain Redbeard the Terrible...
+```
+
+### Model Configuration
+
+To configure the model for more specific behavior:
+
+```python
+from llama_index.llms.fireworks import Fireworks
+
+llm = Fireworks(model="accounts/fireworks/models/firefunction-v1")
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+Example output:
+
+```
+Paul Graham is an English-American computer scientist, entrepreneur, venture capitalist, and blogger.
+```
+
+### API Key Configuration
+
+To use separate API keys for different instances:
+
+```python
+from llama_index.llms.fireworks import Fireworks
+
+llm = Fireworks(
+    model="accounts/fireworks/models/firefunction-v1", api_key="YOUR_API_KEY"
+)
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/fireworks/
diff --git a/llama-index-integrations/llms/llama-index-llms-fireworks/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-fireworks/pyproject.toml
index fd4a2e7681522..a74e602dbfd8c 100644
--- a/llama-index-integrations/llms/llama-index-llms-fireworks/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-fireworks/pyproject.toml
@@ -26,7 +26,7 @@ description = "llama-index llms fireworks integration"
 license = "MIT"
 name = "llama-index-llms-fireworks"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-friendli/README.md b/llama-index-integrations/llms/llama-index-llms-friendli/README.md
index 59a6dcb89f879..5e1ce7e7ccb57 100644
--- a/llama-index-integrations/llms/llama-index-llms-friendli/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-friendli/README.md
@@ -1 +1,115 @@
 # LlamaIndex Llms Integration: Friendli
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-friendli
+   !pip install llama-index
+   ```
+
+2. Set the Friendli token as an environment variable:
+
+   ```bash
+   %env FRIENDLI_TOKEN=your_token_here
+   ```
+
+## Usage
+
+### Basic Chat
+
+To generate a chat response, use the following code:
+
+```python
+from llama_index.llms.friendli import Friendli
+from llama_index.core.llms import ChatMessage, MessageRole
+
+llm = Friendli()
+
+message = ChatMessage(role=MessageRole.USER, content="Tell me a joke.")
+resp = llm.chat([message])
+print(resp)
+```
+
+### Streaming Responses
+
+To stream chat responses in real-time:
+
+```python
+resp = llm.stream_chat([message])
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Asynchronous Chat
+
+For asynchronous chat interactions, use the following:
+
+```python
+resp = await llm.achat([message])
+print(resp)
+```
+
+### Async Streaming
+
+To handle async streaming of chat responses:
+
+```python
+resp = await llm.astream_chat([message])
+async for r in resp:
+    print(r.delta, end="")
+```
+
+### Complete with a Prompt
+
+To generate a completion based on a prompt:
+
+```python
+prompt = "Draft a cover letter for a role in software engineering."
+resp = llm.complete(prompt)
+print(resp)
+```
+
+### Streaming Completion
+
+To stream completions in real-time:
+
+```python
+resp = llm.stream_complete(prompt)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Async Completion
+
+To handle async completions:
+
+```python
+resp = await llm.acomplete(prompt)
+print(resp)
+```
+
+### Async Streaming Completion
+
+For async streaming of completions:
+
+```python
+resp = await llm.astream_complete(prompt)
+async for r in resp:
+    print(r.delta, end="")
+```
+
+### Model Configuration
+
+To configure a specific model:
+
+```python
+llm = Friendli(model="llama-2-70b-chat")
+resp = llm.chat([message])
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/friendli/
diff --git a/llama-index-integrations/llms/llama-index-llms-friendli/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-friendli/pyproject.toml
index e5423315ae111..6b50b65ac2c25 100644
--- a/llama-index-integrations/llms/llama-index-llms-friendli/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-friendli/pyproject.toml
@@ -27,7 +27,7 @@ license = "MIT"
 name = "llama-index-llms-friendli"
 packages = [{include = "llama_index/"}]
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/README.md b/llama-index-integrations/llms/llama-index-llms-gemini/README.md
index 2ad39265996a0..b8e72cfcae922 100644
--- a/llama-index-integrations/llms/llama-index-llms-gemini/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-gemini/README.md
@@ -1 +1,128 @@
 # LlamaIndex Llms Integration: Gemini
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-gemini
+   !pip install -q llama-index google-generativeai
+   ```
+
+2. Set the Google API key as an environment variable:
+
+   ```bash
+   %env GOOGLE_API_KEY=your_api_key_here
+   ```
+
+## Usage
+
+### Basic Content Generation
+
+To generate a poem using the Gemini model, use the following code:
+
+```python
+from llama_index.llms.gemini import Gemini
+
+resp = Gemini().complete("Write a poem about a magic backpack")
+print(resp)
+```
+
+### Chat with Messages
+
+To simulate a conversation, send a list of messages:
+
+```python
+from llama_index.core.llms import ChatMessage
+from llama_index.llms.gemini import Gemini
+
+messages = [
+    ChatMessage(role="user", content="Hello friend!"),
+    ChatMessage(role="assistant", content="Yarr what is shakin' matey?"),
+    ChatMessage(
+        role="user", content="Help me decide what to have for dinner."
+    ),
+]
+resp = Gemini().chat(messages)
+print(resp)
+```
+
+### Streaming Responses
+
+To stream content responses in real-time:
+
+```python
+from llama_index.llms.gemini import Gemini
+
+llm = Gemini()
+resp = llm.stream_complete(
+    "The story of Sourcrust, the bread creature, is really interesting. It all started when..."
+)
+for r in resp:
+    print(r.text, end="")
+```
+
+To stream chat responses:
+
+```python
+from llama_index.llms.gemini import Gemini
+from llama_index.core.llms import ChatMessage
+
+llm = Gemini()
+messages = [
+    ChatMessage(role="user", content="Hello friend!"),
+    ChatMessage(role="assistant", content="Yarr what is shakin' matey?"),
+    ChatMessage(
+        role="user", content="Help me decide what to have for dinner."
+    ),
+]
+resp = llm.stream_chat(messages)
+```
+
+### Using Other Models
+
+To find suitable models available in the Gemini model site:
+
+```python
+import google.generativeai as genai
+
+for m in genai.list_models():
+    if "generateContent" in m.supported_generation_methods:
+        print(m.name)
+```
+
+### Specific Model Usage
+
+To use a specific model, you can configure it like this:
+
+```python
+from llama_index.llms.gemini import Gemini
+
+llm = Gemini(model="models/gemini-pro")
+resp = llm.complete("Write a short, but joyous, ode to LlamaIndex")
+print(resp)
+```
+
+### Asynchronous API
+
+To use the asynchronous completion API:
+
+```python
+from llama_index.llms.gemini import Gemini
+
+llm = Gemini()
+resp = await llm.acomplete("Llamas are famous for ")
+print(resp)
+```
+
+For asynchronous streaming of responses:
+
+```python
+resp = await llm.astream_complete("Llamas are famous for ")
+async for chunk in resp:
+    print(chunk.text, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/gemini/
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml
index 92e4cfb6caaa1..289f8965aad50 100644
--- a/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-gemini"
 readme = "README.md"
-version = "0.3.6"
+version = "0.3.7"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface/README.md b/llama-index-integrations/llms/llama-index-llms-huggingface/README.md
index a77ce6621b121..f9867c91f795b 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface/README.md
@@ -1 +1,92 @@
 # LlamaIndex Llms Integration: Huggingface
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-huggingface
+   %pip install llama-index-llms-huggingface-api
+   !pip install "transformers[torch]" "huggingface_hub[inference]"
+   !pip install llama-index
+   ```
+
+2. Set the Hugging Face API token as an environment variable:
+
+   ```bash
+   export HUGGING_FACE_TOKEN=your_token_here
+   ```
+
+## Usage
+
+### Import Required Libraries
+
+```python
+import os
+from typing import List, Optional
+from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+```
+
+### Run a Model Locally
+
+To run the model locally on your machine:
+
+```python
+locally_run = HuggingFaceLLM(model_name="HuggingFaceH4/zephyr-7b-alpha")
+```
+
+### Run a Model Remotely
+
+To run the model remotely using Hugging Face's Inference API:
+
+```python
+HF_TOKEN: Optional[str] = os.getenv("HUGGING_FACE_TOKEN")
+remotely_run = HuggingFaceInferenceAPI(
+    model_name="HuggingFaceH4/zephyr-7b-alpha", token=HF_TOKEN
+)
+```
+
+### Anonymous Remote Execution
+
+You can also use the Inference API anonymously without providing a token:
+
+```python
+remotely_run_anon = HuggingFaceInferenceAPI(
+    model_name="HuggingFaceH4/zephyr-7b-alpha"
+)
+```
+
+### Use Recommended Model
+
+If you do not provide a model name, Hugging Face's recommended model is used:
+
+```python
+remotely_run_recommended = HuggingFaceInferenceAPI(token=HF_TOKEN)
+```
+
+### Generate Text Completion
+
+To generate a text completion using the remote model:
+
+```python
+completion_response = remotely_run_recommended.complete("To infinity, and")
+print(completion_response)
+```
+
+### Set Global Tokenizer
+
+If you modify the LLM, ensure you change the global tokenizer to match:
+
+```python
+from llama_index.core import set_global_tokenizer
+from transformers import AutoTokenizer
+
+set_global_tokenizer(
+    AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha").encode
+)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/huggingface/
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-huggingface/pyproject.toml
index fa5c0126b9d66..2a3e367b4c051 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface/pyproject.toml
@@ -28,7 +28,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-huggingface"
 readme = "README.md"
-version = "0.3.4"
+version = "0.3.5"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-konko/README.md b/llama-index-integrations/llms/llama-index-llms-konko/README.md
index 6de42a6bd82e2..d6ad865fd06f7 100644
--- a/llama-index-integrations/llms/llama-index-llms-konko/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-konko/README.md
@@ -1 +1,89 @@
 # LlamaIndex Llms Integration: Konko
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-konko
+   !pip install llama-index
+   ```
+
+2. Set the API keys as environment variables:
+
+   ```bash
+   export KONKO_API_KEY=<your-api-key>
+   export OPENAI_API_KEY=<your-api-key>
+   ```
+
+## Usage
+
+### Import Required Libraries
+
+```python
+import os
+from llama_index.llms.konko import Konko
+from llama_index.core.llms import ChatMessage
+```
+
+### Chat with Konko Model
+
+To chat with a Konko model:
+
+```python
+os.environ["KONKO_API_KEY"] = "<your-api-key>"
+llm = Konko(model="meta-llama/llama-2-13b-chat")
+messages = ChatMessage(role="user", content="Explain Big Bang Theory briefly")
+
+resp = llm.chat([messages])
+print(resp)
+```
+
+### Chat with OpenAI Model
+
+To chat with an OpenAI model:
+
+```python
+os.environ["OPENAI_API_KEY"] = "<your-api-key>"
+llm = Konko(model="gpt-3.5-turbo")
+message = ChatMessage(role="user", content="Explain Big Bang Theory briefly")
+
+resp = llm.chat([message])
+print(resp)
+```
+
+### Streaming Responses
+
+To stream a response for longer messages:
+
+```python
+message = ChatMessage(role="user", content="Tell me a story in 250 words")
+resp = llm.stream_chat([message], max_tokens=1000)
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Complete with Prompt
+
+To generate a completion based on a system prompt:
+
+```python
+llm = Konko(model="phind/phind-codellama-34b-v2", max_tokens=100)
+text = """### System Prompt
+You are an intelligent programming assistant.
+
+### User Message
+Implement a linked list in C++
+
+### Assistant
+..."""
+
+resp = llm.stream_complete(text, max_tokens=1000)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/konko/
diff --git a/llama-index-integrations/llms/llama-index-llms-konko/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-konko/pyproject.toml
index b0fca4e4af599..c68635cb4c955 100644
--- a/llama-index-integrations/llms/llama-index-llms-konko/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-konko/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-konko"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-langchain/README.md b/llama-index-integrations/llms/llama-index-llms-langchain/README.md
index fae410d461587..4ae39a1b73ae1 100644
--- a/llama-index-integrations/llms/llama-index-llms-langchain/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-langchain/README.md
@@ -1 +1,40 @@
 # LlamaIndex Llms Integration: Langchain
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-langchain
+   ```
+
+## Usage
+
+### Import Required Libraries
+
+```python
+from langchain.llms import OpenAI
+from llama_index.llms.langchain import LangChainLLM
+```
+
+### Initialize LangChain LLM
+
+To create an instance of `LangChainLLM` with OpenAI:
+
+```python
+llm = LangChainLLM(llm=OpenAI())
+```
+
+### Generate Streaming Response
+
+To generate a streaming response, use the following code:
+
+```python
+response_gen = llm.stream_complete("Hi this is")
+for delta in response_gen:
+    print(delta.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/langchain/
diff --git a/llama-index-integrations/llms/llama-index-llms-langchain/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-langchain/pyproject.toml
index cd05e5566899f..0f238eace7b3c 100644
--- a/llama-index-integrations/llms/llama-index-llms-langchain/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-langchain/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-langchain"
 readme = "README.md"
-version = "0.4.1"
+version = "0.4.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/README.md b/llama-index-integrations/llms/llama-index-llms-litellm/README.md
index 1587d05c82990..0096f7b927a9a 100644
--- a/llama-index-integrations/llms/llama-index-llms-litellm/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-litellm/README.md
@@ -1 +1,101 @@
 # LlamaIndex Llms Integration: Litellm
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-llms-litellm
+   !pip install llama-index
+   ```
+
+## Usage
+
+### Import Required Libraries
+
+```python
+import os
+from llama_index.llms.litellm import LiteLLM
+from llama_index.core.llms import ChatMessage
+```
+
+### Set Up Environment Variables
+
+Set your API keys as environment variables:
+
+```python
+os.environ["OPENAI_API_KEY"] = "your-api-key"
+os.environ["COHERE_API_KEY"] = "your-api-key"
+```
+
+### Example: OpenAI Call
+
+To interact with the OpenAI model:
+
+```python
+message = ChatMessage(role="user", content="Hey! how's it going?")
+llm = LiteLLM("gpt-3.5-turbo")
+chat_response = llm.chat([message])
+print(chat_response)
+```
+
+### Example: Cohere Call
+
+To interact with the Cohere model:
+
+```python
+llm = LiteLLM("command-nightly")
+chat_response = llm.chat([message])
+print(chat_response)
+```
+
+### Example: Chat with System Message
+
+To have a chat with a system role:
+
+```python
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="Tell me a story"),
+]
+resp = LiteLLM("gpt-3.5-turbo").chat(messages)
+print(resp)
+```
+
+### Streaming Responses
+
+To use the streaming feature with `stream_complete`:
+
+```python
+llm = LiteLLM("gpt-3.5-turbo")
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Streaming Chat Example
+
+To stream chat messages:
+
+```python
+llm = LiteLLM("gpt-3.5-turbo")
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Asynchronous Example
+
+For asynchronous calls, use:
+
+```python
+llm = LiteLLM("gpt-3.5-turbo")
+resp = await llm.acomplete("Paul Graham is ")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/litellm/
diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml
index 3b2928eb86350..0da7555706d5b 100644
--- a/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-litellm"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-api/README.md b/llama-index-integrations/llms/llama-index-llms-llama-api/README.md
index 7bf0ab861e54e..37c0a8d7622c6 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-api/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-llama-api/README.md
@@ -1 +1,157 @@
 # LlamaIndex Llms Integration: Llama Api
+
+## Prerequisites
+
+1. **API Key**: Obtain an API key from [Llama API](https://www.llama-api.com/).
+2. **Python 3.x**: Ensure you have Python installed on your system.
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-program-openai
+   %pip install llama-index-llms-llama-api
+   !pip install llama-index
+   ```
+
+## Basic Usage
+
+### Import Required Libraries
+
+```python
+from llama_index.llms.llama_api import LlamaAPI
+from llama_index.core.llms import ChatMessage
+```
+
+### Initialize LlamaAPI
+
+Set up the API key:
+
+```python
+api_key = "LL-your-key"
+llm = LlamaAPI(api_key=api_key)
+```
+
+### Complete with a Prompt
+
+Generate a response using a prompt:
+
+```python
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### Chat with a List of Messages
+
+Interact with the model using a chat interface:
+
+```python
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+resp = llm.chat(messages)
+print(resp)
+```
+
+### Function Calling
+
+Define a function using Pydantic and call it through LlamaAPI:
+
+```python
+from pydantic import BaseModel
+from llama_index.core.llms.openai_utils import to_openai_function
+
+
+class Song(BaseModel):
+    """A song with name and artist"""
+
+    name: str
+    artist: str
+
+
+song_fn = to_openai_function(Song)
+response = llm.complete("Generate a song", functions=[song_fn])
+function_call = response.additional_kwargs["function_call"]
+print(function_call)
+```
+
+### Structured Data Extraction
+
+Define schemas for structured output using Pydantic:
+
+```python
+from pydantic import BaseModel
+from typing import List
+
+
+class Song(BaseModel):
+    """Data model for a song."""
+
+    title: str
+    length_mins: int
+
+
+class Album(BaseModel):
+    """Data model for an album."""
+
+    name: str
+    artist: str
+    songs: List[Song]
+```
+
+Define the prompt template for extracting structured data:
+
+```python
+from llama_index.program.openai import OpenAIPydanticProgram
+
+prompt_template_str = """\
+Extract album and songs from the text provided.
+For each song, make sure to specify the title and the length_mins.
+{text}
+"""
+
+llm = LlamaAPI(api_key=api_key, temperature=0.0)
+
+program = OpenAIPydanticProgram.from_defaults(
+    output_cls=Album,
+    llm=llm,
+    prompt_template_str=prompt_template_str,
+    verbose=True,
+)
+```
+
+### Run Program to Get Structured Output
+
+Execute the program to extract structured data from the provided text:
+
+```python
+output = program(
+    text="""
+    "Echoes of Eternity" is a compelling and thought-provoking album, skillfully crafted by the renowned artist, Seraphina Rivers. \
+    This captivating musical collection takes listeners on an introspective journey, delving into the depths of the human experience \
+    and the vastness of the universe. With her mesmerizing vocals and poignant songwriting, Seraphina Rivers infuses each track with \
+    raw emotion and a sense of cosmic wonder. The album features several standout songs, including the hauntingly beautiful "Stardust \
+    Serenade," a celestial ballad that lasts for six minutes, carrying listeners through a celestial dreamscape. "Eclipse of the Soul" \
+    captivates with its enchanting melodies and spans over eight minutes, inviting introspection and contemplation. Another gem, "Infinity \
+    Embrace," unfolds like a cosmic odyssey, lasting nearly ten minutes, drawing listeners deeper into its ethereal atmosphere. "Echoes of Eternity" \
+    is a masterful testament to Seraphina Rivers' artistic prowess, leaving an enduring impact on all who embark on this musical voyage through \
+    time and space.
+    """
+)
+```
+
+### Output Example
+
+You can print the structured output like this:
+
+```python
+print(output)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/llama_api/
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-api/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-llama-api/pyproject.toml
index 6820fa01ada20..7571a629b0191 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-api/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-llama-api/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-llama-api"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-cpp/README.md b/llama-index-integrations/llms/llama-index-llms-llama-cpp/README.md
index a7fe20919cd91..7a94af5fcfc78 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-cpp/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-llama-cpp/README.md
@@ -1 +1,110 @@
 # LlamaIndex Llms Integration: Llama Cpp
+
+## Installation
+
+1. Install the required Python packages:
+
+   ```bash
+   %pip install llama-index-embeddings-huggingface
+   %pip install llama-index-llms-llama-cpp
+   !pip install llama-index
+   ```
+
+## Basic Usage
+
+### Import Required Libraries
+
+```python
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
+from llama_index.llms.llama_cpp import LlamaCPP
+from llama_index.llms.llama_cpp.llama_utils import (
+    messages_to_prompt,
+    completion_to_prompt,
+)
+```
+
+### Initialize LlamaCPP
+
+Set up the model URL and initialize the LlamaCPP LLM:
+
+```python
+model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
+llm = LlamaCPP(
+    model_url=model_url,
+    temperature=0.1,
+    max_new_tokens=256,
+    context_window=3900,
+    generate_kwargs={},
+    model_kwargs={"n_gpu_layers": 1},
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
+```
+
+### Generate Completions
+
+Use the `complete` method to generate a response:
+
+```python
+response = llm.complete("Hello! Can you tell me a poem about cats and dogs?")
+print(response.text)
+```
+
+### Stream Completions
+
+You can also stream completions for a prompt:
+
+```python
+response_iter = llm.stream_complete("Can you write me a poem about fast cars?")
+for response in response_iter:
+    print(response.delta, end="", flush=True)
+```
+
+### Set Up Query Engine with LlamaCPP
+
+Change the global tokenizer to match the LLM:
+
+```python
+from llama_index.core import set_global_tokenizer
+from transformers import AutoTokenizer
+
+set_global_tokenizer(
+    AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode
+)
+```
+
+### Use Hugging Face Embeddings
+
+Set up the embedding model and load documents:
+
+```python
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+documents = SimpleDirectoryReader(
+    "../../../examples/paul_graham_essay/data"
+).load_data()
+```
+
+### Create Vector Store Index
+
+Create a vector store index from the loaded documents:
+
+```python
+index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
+```
+
+### Set Up Query Engine
+
+Set up the query engine with the LlamaCPP LLM:
+
+```python
+query_engine = index.as_query_engine(llm=llm)
+response = query_engine.query("What did the author do growing up?")
+print(response)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/llama_2_llama_cpp/
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-cpp/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-llama-cpp/pyproject.toml
index e2b8fc9ff326e..0719b206e903e 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-cpp/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-llama-cpp/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-llama-cpp"
 readme = "README.md"
-version = "0.2.2"
+version = "0.2.3"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-llamafile/README.md b/llama-index-integrations/llms/llama-index-llms-llamafile/README.md
index 4ae977bc1fa93..d1653775fc341 100644
--- a/llama-index-integrations/llms/llama-index-llms-llamafile/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-llamafile/README.md
@@ -1 +1,107 @@
 # LlamaIndex Llms Integration: llamafile
+
+## Setup Steps
+
+### 1. Download a LlamaFile
+
+Use the following command to download a LlamaFile from Hugging Face:
+
+```bash
+wget https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile
+```
+
+### 2. Make the File Executable
+
+On Unix-like systems, run the following command:
+
+```bash
+chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile
+```
+
+For Windows, simply rename the file to end with `.exe`.
+
+### 3. Start the Model Server
+
+Run the following command to start the model server, which will listen on `http://localhost:8080` by default:
+
+```bash
+./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding
+```
+
+## Using LlamaIndex
+
+If you are using Google Colab or want to interact with LlamaIndex, you will need to install the necessary packages:
+
+```bash
+%pip install llama-index-llms-llamafile
+!pip install llama-index
+```
+
+### Import Required Libraries
+
+```python
+from llama_index.llms.llamafile import Llamafile
+from llama_index.core.llms import ChatMessage
+```
+
+### Initialize the LLM
+
+Create an instance of the LlamaFile LLM:
+
+```python
+llm = Llamafile(temperature=0, seed=0)
+```
+
+### Generate Completions
+
+To generate a completion for a prompt, use the `complete` method:
+
+```python
+resp = llm.complete("Who is Octavia Butler?")
+print(resp)
+```
+
+### Call Chat with a List of Messages
+
+You can also interact with the LLM using a list of messages:
+
+```python
+messages = [
+    ChatMessage(
+        role="system",
+        content="Pretend you are a pirate with a colorful personality.",
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.chat(messages)
+print(resp)
+```
+
+### Streaming Responses
+
+To use the streaming capabilities, you can call the `stream_complete` method:
+
+```python
+response = llm.stream_complete("Who is Octavia Butler?")
+for r in response:
+    print(r.delta, end="")
+```
+
+You can also stream chat responses:
+
+```python
+messages = [
+    ChatMessage(
+        role="system",
+        content="Pretend you are a pirate with a colorful personality.",
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/llamafile/
diff --git a/llama-index-integrations/llms/llama-index-llms-llamafile/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-llamafile/pyproject.toml
index b3943726cd4cd..d5a3e9fa8a8d4 100644
--- a/llama-index-integrations/llms/llama-index-llms-llamafile/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-llamafile/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-llamafile"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-mistralai/README.md b/llama-index-integrations/llms/llama-index-llms-mistralai/README.md
index 157f1bb62cdec..63903bfc077a8 100644
--- a/llama-index-integrations/llms/llama-index-llms-mistralai/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-mistralai/README.md
@@ -1 +1,126 @@
 # LlamaIndex Llms Integration: Mistral
+
+## Installation
+
+Install the required packages using the following commands:
+
+```bash
+%pip install llama-index-llms-mistralai
+!pip install llama-index
+```
+
+## Basic Usage
+
+### Initialize the MistralAI Model
+
+To use the MistralAI model, create an instance and provide your API key:
+
+```python
+from llama_index.llms.mistralai import MistralAI
+
+llm = MistralAI(api_key="<replace-with-your-key>")
+```
+
+### Generate Completions
+
+To generate a text completion for a prompt, use the `complete` method:
+
+```python
+resp = llm.complete("Paul Graham is ")
+print(resp)
+```
+
+### Chat with the Model
+
+You can also chat with the model using a list of messages. Here’s an example:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(role="system", content="You are CEO of MistralAI."),
+    ChatMessage(role="user", content="Tell me the story about La plateforme"),
+]
+resp = MistralAI().chat(messages)
+print(resp)
+```
+
+### Using Random Seed
+
+To set a random seed for reproducibility, initialize the model with the `random_seed` parameter:
+
+```python
+resp = MistralAI(random_seed=42).chat(messages)
+print(resp)
+```
+
+## Streaming Responses
+
+### Stream Completions
+
+You can stream responses using the `stream_complete` method:
+
+```python
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Stream Chat Responses
+
+To stream chat messages, use the following code:
+
+```python
+messages = [
+    ChatMessage(role="system", content="You are CEO of MistralAI."),
+    ChatMessage(role="user", content="Tell me the story about La plateforme"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+## Configure Model
+
+To use a specific model configuration, initialize the model with the desired model name:
+
+```python
+llm = MistralAI(model="mistral-medium")
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+## Function Calling
+
+You can call functions from the model by defining tools. Here’s an example:
+
+```python
+from llama_index.llms.mistralai import MistralAI
+from llama_index.core.tools import FunctionTool
+
+
+def multiply(a: int, b: int) -> int:
+    """Multiply two integers and return the result."""
+    return a * b
+
+
+def mystery(a: int, b: int) -> int:
+    """Mystery function on two integers."""
+    return a * b + a + b
+
+
+mystery_tool = FunctionTool.from_defaults(fn=mystery)
+multiply_tool = FunctionTool.from_defaults(fn=multiply)
+
+llm = MistralAI(model="mistral-large-latest")
+response = llm.predict_and_call(
+    [mystery_tool, multiply_tool],
+    user_msg="What happens if I run the mystery function on 5 and 7",
+)
+print(str(response))
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/mistralai/
diff --git a/llama-index-integrations/llms/llama-index-llms-mistralai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-mistralai/pyproject.toml
index 85e85ff4c96c2..e6796efc23828 100644
--- a/llama-index-integrations/llms/llama-index-llms-mistralai/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-mistralai/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-mistralai"
 readme = "README.md"
-version = "0.2.5"
+version = "0.2.6"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/README.md b/llama-index-integrations/llms/llama-index-llms-modelscope/README.md
index 58064ad24a95a..7a3dfab6737de 100644
--- a/llama-index-integrations/llms/llama-index-llms-modelscope/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-modelscope/README.md
@@ -1 +1,52 @@
 # LlamaIndex Llms Integration: ModelScope
+
+## Installation
+
+To install the required package, run:
+
+```bash
+!pip install llama-index-llms-modelscope
+```
+
+## Basic Usage
+
+### Initialize the ModelScopeLLM
+
+To use the ModelScopeLLM model, create an instance by specifying the model name and revision:
+
+```python
+import sys
+from llama_index.llms.modelscope import ModelScopeLLM
+
+llm = ModelScopeLLM(model_name="qwen/Qwen1.5-7B-Chat", model_revision="master")
+```
+
+### Generate Completions
+
+To generate a text completion for a prompt, use the `complete` method:
+
+```python
+rsp = llm.complete("Hello, who are you?")
+print(rsp)
+```
+
+### Using Message Requests
+
+You can chat with the model by using a list of messages. Here’s how to set it up:
+
+```python
+from llama_index.core.base.llms.types import MessageRole, ChatMessage
+
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM, content="You are a helpful assistant."
+    ),
+    ChatMessage(role=MessageRole.USER, content="How to make cake?"),
+]
+resp = llm.chat(messages)
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/modelscope/
diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml
index 1b873077b1551..6c595aff72c3a 100644
--- a/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-modelscope"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<3.12"
diff --git a/llama-index-integrations/llms/llama-index-llms-mymagic/README.md b/llama-index-integrations/llms/llama-index-llms-mymagic/README.md
index c016b53ecc3ea..26598d6fecfcb 100644
--- a/llama-index-integrations/llms/llama-index-llms-mymagic/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-mymagic/README.md
@@ -1 +1,77 @@
 # LlamaIndex Llms Integration: Mymagic
+
+## Installation
+
+To install the required package, run:
+
+```bash
+%pip install llama-index-llms-mymagic
+!pip install llama-index
+```
+
+## Setup
+
+Before you begin, set up your cloud storage bucket and grant MyMagic API secure access. For detailed instructions, visit the [MyMagic documentation](https://docs.mymagic.ai).
+
+### Initialize MyMagicAI
+
+Create an instance of MyMagicAI by providing your API key and storage configuration:
+
+```python
+from llama_index.llms.mymagic import MyMagicAI
+
+llm = MyMagicAI(
+    api_key="your-api-key",
+    storage_provider="s3",  # Options: 's3' or 'gcs'
+    bucket_name="your-bucket-name",
+    session="your-session-name",  # Directory for batch inference
+    role_arn="your-role-arn",
+    system_prompt="your-system-prompt",
+    region="your-bucket-region",
+    return_output=False,  # Set to True to return output JSON
+    input_json_file=None,  # Input file stored on the bucket
+    list_inputs=None,  # List of inputs for small batch
+    structured_output=None,  # JSON schema of the output
+)
+```
+
+> **Note:** If `return_output` is set to `True`, `max_tokens` should be at least 100.
+
+### Generate Completions
+
+To generate a text completion for a question, use the `complete` method:
+
+```python
+resp = llm.complete(
+    question="your-question",
+    model="choose-model",  # Supported models: mistral7b, llama7b, mixtral8x7b, codellama70b, llama70b, etc.
+    max_tokens=5,  # Number of tokens to generate (default is 10)
+)
+print(
+    resp
+)  # The response indicates if the final output is stored in your bucket or raises an exception if the job failed
+```
+
+### Asynchronous Requests
+
+For asynchronous operations, use the `acomplete` endpoint:
+
+```python
+import asyncio
+
+
+async def main():
+    response = await llm.acomplete(
+        question="your-question",
+        model="choose-model",  # Supported models listed in the documentation
+        max_tokens=5,  # Number of tokens to generate (default is 10)
+    )
+    print("Async completion response:", response)
+
+
+await main()
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/mymagic/
diff --git a/llama-index-integrations/llms/llama-index-llms-mymagic/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-mymagic/pyproject.toml
index 1ece88a163cf7..2768e8fc17a96 100644
--- a/llama-index-integrations/llms/llama-index-llms-mymagic/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-mymagic/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-mymagic"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-neutrino/README.md b/llama-index-integrations/llms/llama-index-llms-neutrino/README.md
index a1e193ccd4e6b..fca39fd15487a 100644
--- a/llama-index-integrations/llms/llama-index-llms-neutrino/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-neutrino/README.md
@@ -1 +1,86 @@
 # LlamaIndex Llms Integration: Neutrino
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-neutrino
+!pip install llama-index
+```
+
+## Setup
+
+### Create Neutrino API Key
+
+You can create an API key by visiting [platform.neutrinoapp.com](https://platform.neutrinoapp.com). Once you have the API key, set it as an environment variable:
+
+```python
+import os
+
+os.environ["NEUTRINO_API_KEY"] = "<your-neutrino-api-key>"
+```
+
+## Using Your Router
+
+A router is a collection of LLMs that you can route queries to. You can create a router in the Neutrino dashboard or use the default router, which includes all supported models. You can treat a router as a single LLM.
+
+### Initialize Neutrino
+
+Create an instance of the Neutrino model:
+
+```python
+from llama_index.llms.neutrino import Neutrino
+
+llm = Neutrino(
+    # api_key="<your-neutrino-api-key>",
+    # router="<your-router-id>"  # Use 'default' for the default router
+)
+```
+
+### Generate Completions
+
+To generate a text completion for a prompt, use the `complete` method:
+
+```python
+response = llm.complete("In short, a Neutrino is")
+print(f"Optimal model: {response.raw['model']}")
+print(response)
+```
+
+### Chat Responses
+
+To send a chat message and receive a response, create a `ChatMessage` and use the `chat` method:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+message = ChatMessage(
+    role="user",
+    content="Explain the difference between statically typed and dynamically typed languages.",
+)
+
+resp = llm.chat([message])
+print(f"Optimal model: {resp.raw['model']}")
+print(resp)
+```
+
+### Streaming Responses
+
+To stream responses for a chat message, use the `stream_chat` method:
+
+```python
+message = ChatMessage(
+    role="user", content="What is the approximate population of Mexico?"
+)
+
+resp = llm.stream_chat([message])
+for i, r in enumerate(resp):
+    if i == 0:
+        print(f"Optimal model: {r.raw['model']}")
+    print(r.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/neutrino/
diff --git a/llama-index-integrations/llms/llama-index-llms-neutrino/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-neutrino/pyproject.toml
index ddcc39d1895ec..f8be75be97c09 100644
--- a/llama-index-integrations/llms/llama-index-llms-neutrino/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-neutrino/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-neutrino"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-ollama/README.md b/llama-index-integrations/llms/llama-index-llms-ollama/README.md
index 2f9986193fdb4..a6cdcae6bf5b7 100644
--- a/llama-index-integrations/llms/llama-index-llms-ollama/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-ollama/README.md
@@ -1 +1,133 @@
 # LlamaIndex Llms Integration: Ollama
+
+## Installation
+
+To install the required package, run:
+
+```bash
+%pip install llama-index-llms-ollama
+```
+
+## Setup
+
+1. Follow the [Ollama README](https://ollama.com) to set up and run a local Ollama instance.
+2. When the Ollama app is running on your local machine, it will serve all of your local models on `localhost:11434`.
+3. Select your model when creating the `Ollama` instance by specifying `model=":"`.
+4. You can increase the default timeout (30 seconds) by setting `Ollama(..., request_timeout=300.0)`.
+5. If you set `llm = Ollama(..., model="<model family>")` without a version, it will automatically look for the latest version.
+
+## Usage
+
+### Initialize Ollama
+
+```python
+from llama_index.llms.ollama import Ollama
+
+llm = Ollama(model="llama3.1:latest", request_timeout=120.0)
+```
+
+### Generate Completions
+
+To generate a text completion for a prompt, use the `complete` method:
+
+```python
+resp = llm.complete("Who is Paul Graham?")
+print(resp)
+```
+
+### Chat Responses
+
+To send a chat message and receive a response, create a list of `ChatMessage` instances and use the `chat` method:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.chat(messages)
+print(resp)
+```
+
+### Streaming Responses
+
+#### Stream Complete
+
+To stream responses for a prompt, use the `stream_complete` method:
+
+```python
+response = llm.stream_complete("Who is Paul Graham?")
+for r in response:
+    print(r.delta, end="")
+```
+
+#### Stream Chat
+
+To stream chat responses, use the `stream_chat` method:
+
+```python
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### JSON Mode
+
+Ollama supports a JSON mode to ensure all responses are valid JSON, which is useful for tools that need to parse structured outputs:
+
+```python
+llm = Ollama(model="llama3.1:latest", request_timeout=120.0, json_mode=True)
+response = llm.complete(
+    "Who is Paul Graham? Output as a structured JSON object."
+)
+print(str(response))
+```
+
+### Structured Outputs
+
+You can attach a Pydantic class to the LLM to ensure structured outputs:
+
+```python
+from llama_index.core.bridge.pydantic import BaseModel
+from llama_index.core.tools import FunctionTool
+
+
+class Song(BaseModel):
+    """A song with name and artist."""
+
+    name: str
+    artist: str
+
+
+llm = Ollama(model="llama3.1:latest", request_timeout=120.0)
+sllm = llm.as_structured_llm(Song)
+
+response = sllm.chat([ChatMessage(role="user", content="Name a random song!")])
+print(
+    response.message.content
+)  # e.g., {"name": "Yesterday", "artist": "The Beatles"}
+```
+
+### Asynchronous Chat
+
+You can also use asynchronous chat:
+
+```python
+response = await sllm.achat(
+    [ChatMessage(role="user", content="Name a random song!")]
+)
+print(response.message.content)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/ollama/
diff --git a/llama-index-integrations/llms/llama-index-llms-ollama/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-ollama/pyproject.toml
index 17182735835a5..8e7868fb200d7 100644
--- a/llama-index-integrations/llms/llama-index-llms-ollama/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-ollama/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-ollama"
 readme = "README.md"
-version = "0.3.3"
+version = "0.3.4"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/README.md b/llama-index-integrations/llms/llama-index-llms-openai/README.md
index 53f8fbc0663a9..ba8b186702ad7 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-openai/README.md
@@ -1 +1,131 @@
 # LlamaIndex Llms Integration: Openai
+
+## Installation
+
+To install the required package, run:
+
+```bash
+%pip install llama-index-llms-openai
+```
+
+## Setup
+
+1. Set your OpenAI API key as an environment variable. You can replace `"sk-..."` with your actual API key:
+
+```python
+import os
+
+os.environ["OPENAI_API_KEY"] = "sk-..."
+```
+
+## Basic Usage
+
+### Generate Completions
+
+To generate a completion for a prompt, use the `complete` method:
+
+```python
+from llama_index.llms.openai import OpenAI
+
+resp = OpenAI().complete("Paul Graham is ")
+print(resp)
+```
+
+### Chat Responses
+
+To send a chat message and receive a response, create a list of `ChatMessage` instances and use the `chat` method:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = OpenAI().chat(messages)
+print(resp)
+```
+
+## Streaming Responses
+
+### Stream Complete
+
+To stream responses for a prompt, use the `stream_complete` method:
+
+```python
+from llama_index.llms.openai import OpenAI
+
+llm = OpenAI()
+resp = llm.stream_complete("Paul Graham is ")
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Stream Chat
+
+To stream chat responses, use the `stream_chat` method:
+
+```python
+from llama_index.llms.openai import OpenAI
+from llama_index.core.llms import ChatMessage
+
+llm = OpenAI()
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+## Configure Model
+
+You can specify a particular model when creating the `OpenAI` instance:
+
+```python
+llm = OpenAI(model="gpt-3.5-turbo")
+resp = llm.complete("Paul Graham is ")
+print(resp)
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality."
+    ),
+    ChatMessage(role="user", content="What is your name?"),
+]
+resp = llm.chat(messages)
+print(resp)
+```
+
+## Asynchronous Usage
+
+You can also use asynchronous methods for completion:
+
+```python
+from llama_index.llms.openai import OpenAI
+
+llm = OpenAI(model="gpt-3.5-turbo")
+resp = await llm.acomplete("Paul Graham is ")
+print(resp)
+```
+
+## Set API Key at a Per-Instance Level
+
+If desired, you can have separate LLM instances use different API keys:
+
+```python
+from llama_index.llms.openai import OpenAI
+
+llm = OpenAI(model="gpt-3.5-turbo", api_key="BAD_KEY")
+resp = OpenAI().complete("Paul Graham is ")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/openai/
diff --git a/llama-index-integrations/llms/llama-index-llms-openllm/README.md b/llama-index-integrations/llms/llama-index-llms-openllm/README.md
index 5112f894913e5..74f699089630c 100644
--- a/llama-index-integrations/llms/llama-index-llms-openllm/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-openllm/README.md
@@ -1 +1,82 @@
 # LlamaIndex LLM Integration: OpenLLM
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-openllm
+!pip install llama-index
+```
+
+## Setup
+
+### Initialize OpenLLM
+
+First, import the necessary libraries and set up your `OpenLLM` instance. Replace `my-model`, `https://hostname.com/v1`, and `na` with your model name, API base URL, and API key, respectively:
+
+```python
+import os
+from typing import List, Optional
+from llama_index.llms.openllm import OpenLLM
+from llama_index.core.llms import ChatMessage
+
+llm = OpenLLM(
+    model="my-model", api_base="https://hostname.com/v1", api_key="na"
+)
+```
+
+## Generate Completions
+
+To generate a completion, use the `complete` method:
+
+```python
+completion_response = llm.complete("To infinity, and")
+print(completion_response)
+```
+
+### Stream Completions
+
+You can also stream completions using the `stream_complete` method:
+
+```python
+async for it in llm.stream_complete(
+    "The meaning of time is", max_new_tokens=128
+):
+    print(it, end="", flush=True)
+```
+
+## Chat Functionality
+
+OpenLLM supports chat APIs, allowing you to handle conversation-like interactions. Here’s how to use it:
+
+### Synchronous Chat
+
+You can perform a synchronous chat by constructing a list of `ChatMessage` instances:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+chat_messages = [
+    ChatMessage(role="system", content="You are acting as Ernest Hemmingway."),
+    ChatMessage(role="user", content="Hi there!"),
+    ChatMessage(role="assistant", content="Yes?"),
+    ChatMessage(role="user", content="What is the meaning of life?"),
+]
+
+for it in llm.chat(chat_messages):
+    print(it.message.content, flush=True, end="")
+```
+
+### Asynchronous Chat
+
+To perform an asynchronous chat, use the `astream_chat` method:
+
+```python
+async for it in llm.astream_chat(chat_messages):
+    print(it.message.content, flush=True, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/openllm/
diff --git a/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml
index 1abdac55f8b03..ab927b5e08907 100644
--- a/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-openllm"
 readme = "README.md"
-version = "0.3.0"
+version = "0.3.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-openrouter/README.md b/llama-index-integrations/llms/llama-index-llms-openrouter/README.md
index 82d190fe92323..1236617bd9d72 100644
--- a/llama-index-integrations/llms/llama-index-llms-openrouter/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-openrouter/README.md
@@ -1 +1,82 @@
 # LlamaIndex Llms Integration: Openrouter
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-openrouter
+!pip install llama-index
+```
+
+## Setup
+
+### Initialize OpenRouter
+
+You need to set either the environment variable `OPENROUTER_API_KEY` or pass your API key directly in the class constructor. Replace `<your-api-key>` with your actual API key:
+
+```python
+from llama_index.llms.openrouter import OpenRouter
+from llama_index.core.llms import ChatMessage
+
+llm = OpenRouter(
+    api_key="<your-api-key>",
+    max_tokens=256,
+    context_window=4096,
+    model="gryphe/mythomax-l2-13b",
+)
+```
+
+## Generate Chat Responses
+
+You can generate a chat response by sending a list of `ChatMessage` instances:
+
+```python
+message = ChatMessage(role="user", content="Tell me a joke")
+resp = llm.chat([message])
+print(resp)
+```
+
+### Streaming Responses
+
+To stream responses, use the `stream_chat` method:
+
+```python
+message = ChatMessage(role="user", content="Tell me a story in 250 words")
+resp = llm.stream_chat([message])
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Complete with Prompt
+
+You can also generate completions with a prompt using the `complete` method:
+
+```python
+resp = llm.complete("Tell me a joke")
+print(resp)
+```
+
+### Streaming Completion
+
+To stream completions, use the `stream_complete` method:
+
+```python
+resp = llm.stream_complete("Tell me a story in 250 words")
+for r in resp:
+    print(r.delta, end="")
+```
+
+## Model Configuration
+
+To use a specific model, you can specify it during initialization. For example, to use Mistral's Mixtral model, you can set it like this:
+
+```python
+llm = OpenRouter(model="mistralai/mixtral-8x7b-instruct")
+resp = llm.complete("Write a story about a dragon who can code in Rust")
+print(resp)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/openrouter/
diff --git a/llama-index-integrations/llms/llama-index-llms-openrouter/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openrouter/pyproject.toml
index f6f47c817117a..fff78196c23e0 100644
--- a/llama-index-integrations/llms/llama-index-llms-openrouter/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-openrouter/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-openrouter"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-openvino/README.md b/llama-index-integrations/llms/llama-index-llms-openvino/README.md
index 0df8348471373..b039ac0f62467 100644
--- a/llama-index-integrations/llms/llama-index-llms-openvino/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-openvino/README.md
@@ -1 +1,142 @@
 # LlamaIndex Llms Integration: Openvino
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-openvino transformers huggingface_hub
+!pip install llama-index
+```
+
+## Setup
+
+### Define Functions for Prompt Handling
+
+You will need functions to convert messages and completions into prompts:
+
+```python
+from llama_index.llms.openvino import OpenVINOLLM
+
+
+def messages_to_prompt(messages):
+    prompt = ""
+    for message in messages:
+        if message.role == "system":
+            prompt += f"<|system|>\n{message.content}</s>\n"
+        elif message.role == "user":
+            prompt += f"<|user|>\n{message.content}</s>\n"
+        elif message.role == "assistant":
+            prompt += f"<|assistant|>\n{message.content}</s>\n"
+
+    # Ensure we start with a system prompt, insert blank if needed
+    if not prompt.startswith("<|system|>\n"):
+        prompt = "<|system|>\n</s>\n" + prompt
+
+    # Add final assistant prompt
+    prompt = prompt + "<|assistant|>\n"
+
+    return prompt
+
+
+def completion_to_prompt(completion):
+    return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
+```
+
+### Model Loading
+
+Models can be loaded by specifying parameters using the `OpenVINOLLM` method. If you have an Intel GPU, specify `device_map="gpu"` to run inference on it:
+
+```python
+ov_config = {
+    "PERFORMANCE_HINT": "LATENCY",
+    "NUM_STREAMS": "1",
+    "CACHE_DIR": "",
+}
+
+ov_llm = OpenVINOLLM(
+    model_id_or_path="HuggingFaceH4/zephyr-7b-beta",
+    context_window=3900,
+    max_new_tokens=256,
+    model_kwargs={"ov_config": ov_config},
+    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    device_map="cpu",
+)
+
+response = ov_llm.complete("What is the meaning of life?")
+print(str(response))
+```
+
+### Inference with Local OpenVINO Model
+
+Export your model to the OpenVINO IR format using the CLI and load it from a local folder. It’s recommended to apply 8 or 4-bit weight quantization to reduce inference latency and model footprint:
+
+```bash
+!optimum-cli export openvino --model HuggingFaceH4/zephyr-7b-beta ov_model_dir
+!optimum-cli export openvino --model HuggingFaceH4/zephyr-7b-beta --weight-format int8 ov_model_dir
+!optimum-cli export openvino --model HuggingFaceH4/zephyr-7b-beta --weight-format int4 ov_model_dir
+```
+
+You can then load the model from the specified directory:
+
+```python
+ov_llm = OpenVINOLLM(
+    model_id_or_path="ov_model_dir",
+    context_window=3900,
+    max_new_tokens=256,
+    model_kwargs={"ov_config": ov_config},
+    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    device_map="gpu",
+)
+```
+
+### Additional Optimization
+
+You can get additional inference speed improvements with dynamic quantization of activations and KV-cache quantization. Enable these options with `ov_config` as follows:
+
+```python
+ov_config = {
+    "KV_CACHE_PRECISION": "u8",
+    "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32",
+    "PERFORMANCE_HINT": "LATENCY",
+    "NUM_STREAMS": "1",
+    "CACHE_DIR": "",
+}
+```
+
+## Streaming Responses
+
+To use the streaming capabilities, you can use the `stream_complete` and `stream_chat` methods:
+
+### Using `stream_complete`
+
+```python
+response = ov_llm.stream_complete("Who is Paul Graham?")
+for r in response:
+    print(r.delta, end="")
+```
+
+### Using `stream_chat`
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
+    ChatMessage(role="user", content="What is your name"),
+]
+
+resp = ov_llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/openvino/
diff --git a/llama-index-integrations/llms/llama-index-llms-openvino/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openvino/pyproject.toml
index 3779982fac0f7..3071e01aa1647 100644
--- a/llama-index-integrations/llms/llama-index-llms-openvino/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-openvino/pyproject.toml
@@ -30,7 +30,7 @@ license = "MIT"
 name = "llama-index-llms-openvino"
 packages = [{include = "llama_index/"}]
 readme = "README.md"
-version = "0.3.1"
+version = "0.3.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-optimum-intel/README.md b/llama-index-integrations/llms/llama-index-llms-optimum-intel/README.md
index 800819e214879..c5c296529cbfa 100644
--- a/llama-index-integrations/llms/llama-index-llms-optimum-intel/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-optimum-intel/README.md
@@ -1 +1,99 @@
 # LlamaIndex Llms Integration: Optimum Intel IPEX backend
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-optimum-intel
+!pip install llama-index
+```
+
+## Setup
+
+### Define Functions for Prompt Handling
+
+You will need functions to convert messages and completions into prompts:
+
+```python
+from llama_index.llms.optimum_intel import OptimumIntelLLM
+
+
+def messages_to_prompt(messages):
+    prompt = ""
+    for message in messages:
+        if message.role == "system":
+            prompt += f"<|system|>\n{message.content}</s>\n"
+        elif message.role == "user":
+            prompt += f"<|user|>\n{message.content}</s>\n"
+        elif message.role == "assistant":
+            prompt += f"<|assistant|>\n{message.content}</s>\n"
+
+    # Ensure we start with a system prompt, insert blank if needed
+    if not prompt.startswith("<|system|>\n"):
+        prompt = "<|system|>\n</s>\n" + prompt
+
+    # Add final assistant prompt
+    prompt = prompt + "<|assistant|>\n"
+
+    return prompt
+
+
+def completion_to_prompt(completion):
+    return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
+```
+
+### Model Loading
+
+Models can be loaded by specifying parameters using the `OptimumIntelLLM` method:
+
+```python
+oi_llm = OptimumIntelLLM(
+    model_name="Intel/neural-chat-7b-v3-3",
+    tokenizer_name="Intel/neural-chat-7b-v3-3",
+    context_window=3900,
+    max_new_tokens=256,
+    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    device_map="cpu",
+)
+
+response = oi_llm.complete("What is the meaning of life?")
+print(str(response))
+```
+
+### Streaming Responses
+
+To use the streaming capabilities, you can use the `stream_complete` and `stream_chat` methods:
+
+#### Using `stream_complete`
+
+```python
+response = oi_llm.stream_complete("Who is Mother Teresa?")
+for r in response:
+    print(r.delta, end="")
+```
+
+#### Using `stream_chat`
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages = [
+    ChatMessage(
+        role="system",
+        content="You are an American chef in a small restaurant in New Orleans",
+    ),
+    ChatMessage(role="user", content="What is your dish of the day?"),
+]
+
+resp = oi_llm.stream_chat(messages)
+
+for r in resp:
+    print(r.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/optimum_intel/
diff --git a/llama-index-integrations/llms/llama-index-llms-optimum-intel/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-optimum-intel/pyproject.toml
index d32c680e1b1ce..8c9423bf363b4 100644
--- a/llama-index-integrations/llms/llama-index-llms-optimum-intel/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-optimum-intel/pyproject.toml
@@ -30,7 +30,7 @@ license = "MIT"
 name = "llama-index-llms-optimum-intel"
 packages = [{include = "llama_index/"}]
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-palm/README.md b/llama-index-integrations/llms/llama-index-llms-palm/README.md
index 67311fc30768a..bacd05b6c6b21 100644
--- a/llama-index-integrations/llms/llama-index-llms-palm/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-palm/README.md
@@ -1 +1,71 @@
 # LlamaIndex Llms Integration: Palm
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-palm
+!pip install llama-index
+!pip install -q google-generativeai
+```
+
+> **Note:** If you're using Colab, the above commands will install the necessary packages. If you see a notice about updating `pip`, you can do so with:
+>
+> ```bash
+> pip install --upgrade pip
+> ```
+
+## Setup
+
+### Import Libraries and Configure API Key
+
+Import the necessary libraries and configure your PaLM API key:
+
+```python
+import pprint
+import google.generativeai as palm
+
+palm_api_key = ""  # Add your API key here
+palm.configure(api_key=palm_api_key)
+```
+
+### Define the Model
+
+List and select the available models that support text generation:
+
+```python
+models = [
+    m
+    for m in palm.list_models()
+    if "generateText" in m.supported_generation_methods
+]
+
+model = models[0].name
+print(model)
+```
+
+You should see output similar to:
+
+```
+models/text-bison-001
+```
+
+### Using the PaLM LLM Abstraction
+
+Now you can use the PaLM model to generate text. Here’s how to complete a prompt:
+
+```python
+from llama_index.llms.palm import PaLM
+
+model = PaLM(api_key=palm_api_key)
+
+# Example prompt
+prompt = "Once upon a time in a faraway land, there lived a"
+response = model.complete(prompt)
+print(response)
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/palm/
diff --git a/llama-index-integrations/llms/llama-index-llms-palm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-palm/pyproject.toml
index ff9e820600e57..9832b640103ca 100644
--- a/llama-index-integrations/llms/llama-index-llms-palm/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-palm/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-palm"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-perplexity/README.md b/llama-index-integrations/llms/llama-index-llms-perplexity/README.md
index 601edaf06ba84..e0c68ff3a9dc6 100644
--- a/llama-index-integrations/llms/llama-index-llms-perplexity/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-perplexity/README.md
@@ -1 +1,86 @@
 # LlamaIndex Llms Integration: Perplexity
+
+## Installation
+
+To install the required packages, run:
+
+```bash
+%pip install llama-index-llms-perplexity
+!pip install llama-index
+```
+
+## Setup
+
+### Import Libraries and Configure API Key
+
+Import the necessary libraries and set your Perplexity API key:
+
+```python
+from llama_index.llms.perplexity import Perplexity
+
+pplx_api_key = "your-perplexity-api-key"  # Replace with your actual API key
+```
+
+### Initialize the Perplexity LLM
+
+Create an instance of the Perplexity LLM with your API key and desired model settings:
+
+```python
+llm = Perplexity(
+    api_key=pplx_api_key, model="mistral-7b-instruct", temperature=0.5
+)
+```
+
+## Chat Example
+
+### Sending a Chat Message
+
+You can send a chat message using the `chat` method. Here’s how to do that:
+
+```python
+from llama_index.core.llms import ChatMessage
+
+messages_dict = [
+    {"role": "system", "content": "Be precise and concise."},
+    {"role": "user", "content": "Tell me 5 sentences about Perplexity."},
+]
+
+messages = [ChatMessage(**msg) for msg in messages_dict]
+
+# Get response from the model
+response = llm.chat(messages)
+print(response)
+```
+
+### Async Chat
+
+To send messages asynchronously, you can use the `achat` method:
+
+```python
+response = await llm.achat(messages)
+print(response)
+```
+
+### Stream Chat
+
+For streaming responses, you can use the `stream_chat` method:
+
+```python
+resp = llm.stream_chat(messages)
+for r in resp:
+    print(r.delta, end="")
+```
+
+### Async Stream Chat
+
+To stream responses asynchronously, use the `astream_chat` method:
+
+```python
+resp = await llm.astream_chat(messages)
+async for delta in resp:
+    print(delta.delta, end="")
+```
+
+### LLM Implementation example
+
+https://docs.llamaindex.ai/en/stable/examples/llm/perplexity/
diff --git a/llama-index-integrations/llms/llama-index-llms-perplexity/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-perplexity/pyproject.toml
index dcd10699bda77..c55208965527a 100644
--- a/llama-index-integrations/llms/llama-index-llms-perplexity/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-perplexity/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-perplexity"
 readme = "README.md"
-version = "0.2.0"
+version = "0.2.1"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"