diff --git a/docs/docs/module_guides/models/llms/modules.md b/docs/docs/module_guides/models/llms/modules.md
index 0b712beb23ef2..1a5656e700e24 100644
--- a/docs/docs/module_guides/models/llms/modules.md
+++ b/docs/docs/module_guides/models/llms/modules.md
@@ -49,6 +49,7 @@ We support integrations with OpenAI, Anthropic, Hugging Face, PaLM, and more.
 - [Replicate Vector Index Llama2](../../../examples/vector_stores/SimpleIndexDemoLlama2.ipynb)
 - [RunGPT](../../../examples/llm/rungpt.ipynb)
 - [SageMaker](../../../examples/llm/sagemaker_endpoint_llm.ipynb)
+- [SambaNova Systems](../../../examples/llm/sambanovasystems.ipynb)
 - [Solar](../../../examples/llm/solar.ipynb)
 - [Together.ai](../../../examples/llm/together.ipynb)
 - [Unify AI](../../../examples/llm/unify.ipynb)
diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py
index fc846e0347a2e..817594ebb8b0a 100644
--- a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py
@@ -39,7 +39,6 @@ def _convert_message_to_dict(message: ChatMessage) -> Dict[str, Any]:
 
     Args:
         message: ChatMessage
-
     Returns:
         messages_dict:  role / content dict
     """
@@ -55,7 +54,6 @@ def _create_message_dicts(messages: Sequence[ChatMessage]) -> List[Dict[str, Any
 
     Args:
         messages: list of ChatMessages
-
     Returns:
         messages_dicts:  list of role / content dicts
     """
@@ -63,28 +61,29 @@ def _create_message_dicts(messages: Sequence[ChatMessage]) -> List[Dict[str, Any
 
 
 class SambaNovaCloud(LLM):
-    """
-    SambaNova Cloud model.
+    """SambaNova Cloud models.
 
     Setup:
         To use, you should have the environment variables:
-        ``SAMBANOVA_URL`` set with your SambaNova Cloud URL.
-        ``SAMBANOVA_API_KEY`` set with your SambaNova Cloud API Key.
-        http://cloud.sambanova.ai/
-
-    Example:
-        .. code-block:: python
-            SambaNovaCloud(
-                sambanova_url = SambaNova cloud endpoint URL,
-                sambanova_api_key = set with your SambaNova cloud API key,
-                model = model name,
-                max_tokens = max number of tokens to generate,
-                temperature = model temperature,
-                top_p = model top p,
-                top_k = model top k,
-                stream_options = include usage to get generation metrics
-            )
-
+        `SAMBANOVA_URL` set with your SambaNova Cloud URL.
+        `SAMBANOVA_API_KEY` set with your SambaNova Cloud API Key.
+        http://cloud.sambanova.ai/.
+        Additionally, download the following packages:
+        `pip install llama-index-llms-sambanovasystems`
+        `pip install sseclient-py`
+    Examples:
+    ```python
+    SambaNovaCloud(
+        sambanova_url = SambaNova cloud endpoint URL,
+        sambanova_api_key = set with your SambaNova cloud API key,
+        model = model name,
+        max_tokens = max number of tokens to generate,
+        temperature = model temperature,
+        top_p = model top p,
+        top_k = model top k,
+        stream_options = include usage to get generation metrics
+    )
+    ```
     Key init args — completion params:
         model: str
             The name of the model to use, e.g., Meta-Llama-3-70B-Instruct.
@@ -100,18 +99,14 @@ class SambaNovaCloud(LLM):
             model top k
         stream_options: dict
             stream options, include usage to get generation metrics
-
     Key init args — client params:
         sambanova_url: str
             SambaNova Cloud Url
         sambanova_api_key: str
             SambaNova Cloud api key
-
     Instantiate:
-        .. code-block:: python
-
+            ```python
             from llama_index.llms.sambanovacloud import SambaNovaCloud
-
             llm = SambaNovaCloud(
                 sambanova_url = SambaNova cloud endpoint URL,
                 sambanova_api_key = set with your SambaNova cloud API key,
@@ -123,21 +118,22 @@ class SambaNovaCloud(LLM):
                 stream_options = include usage to get generation metrics
                 context_window = model context window
             )
+            ```
     Complete:
-        .. code-block:: python
+            ```python
             prompt = "Tell me about Naruto Uzumaki in one sentence"
             response = llm.complete(prompt)
-
+            ```
     Chat:
-        .. code-block:: python
+            ```python
             messages = [
                 ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
                 ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
             ]
             response = llm.chat(messages)
-
+            ```
     Stream:
-        .. code-block:: python
+        ```python
         prompt = "Tell me about Naruto Uzumaki in one sentence"
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
@@ -147,27 +143,26 @@ class SambaNovaCloud(LLM):
             print(chunk.text)
         for chunk in llm.stream_chat(messages):
             print(chunk.message.content)
-
+        ```
     Async:
-        .. code-block:: python
+        ```python
         prompt = "Tell me about Naruto Uzumaki in one sentence"
         asyncio.run(llm.acomplete(prompt))
-
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
             ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
         ]
         asyncio.run(llm.achat(chat_text_msgs))
-
+        ```
     Response metadata and usage
-        .. code-block:: python
-
+        ```python
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
             ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
         ]
         metadata_and_usage = llm.chat(messages).message.additional_kwargs
         print(metadata_and_usage)
+        ```
     """
 
     model_config = ConfigDict(
@@ -238,7 +233,6 @@ def _handle_request(
         Args:
             messages_dicts: List of role / content dicts to use as input.
             stop: list of stop tokens
-
         Returns:
             A response dict.
         """
@@ -284,7 +278,6 @@ async def _handle_request_async(
         Args:
             messages_dicts: List of role / content dicts to use as input.
             stop: list of stop tokens
-
         Returns:
             A response dict.
         """
@@ -329,7 +322,6 @@ def _handle_streaming_request(
         Args:
             messages_dicts: List of role / content dicts to use as input.
             stop: list of stop tokens
-
         Yields:
             An iterator of response dicts.
         """
@@ -413,7 +405,6 @@ async def _handle_streaming_request_async(
         Args:
             messages_dicts: List of role / content dicts to use as input.
             stop: list of stop tokens
-
         Yields:
             An iterator of response dicts.
         """
@@ -648,16 +639,15 @@ def astream_complete(
 
 
 class SambaStudio(LLM):
-    """
-    SambaStudio model.
+    """SambaStudio model.
 
     Setup:
         To use, you should have the environment variables:
         ``SAMBASTUDIO_URL`` set with your SambaStudio deployed endpoint URL.
         ``SAMBASTUDIO_API_KEY`` set with your SambaStudio deployed endpoint Key.
         https://docs.sambanova.ai/sambastudio/latest/index.html
-        Example:
-        .. code-block:: python
+        Examples:
+            ```python
             SambaStudio(
                 sambastudio_url = set with your SambaStudio deployed endpoint URL,
                 sambastudio_api_key = set with your SambaStudio deployed endpoint Key.
@@ -676,7 +666,7 @@ class SambaStudio(LLM):
                      set to false or for StandAlone v1 and v2 endpoints)
                 model_kwargs: Optional = Extra Key word arguments to pass to the model.
             )
-
+            ```
     Key init args — completion params:
         model: str
             The name of the model to use, e.g., Meta-Llama-3-70B-Instruct-4096
@@ -705,51 +695,48 @@ class SambaStudio(LLM):
              or for StandAlone v1 and v2 endpoints) default to llama3 special tokens
         model_kwargs: dict
             Extra Key word arguments to pass to the model.
-
     Key init args — client params:
         sambastudio_url: str
-            SambaStudio endpoint Url
+            SambaStudio endpoint URL
         sambastudio_api_key: str
             SambaStudio endpoint api key
-
     Instantiate:
-        .. code-block:: python
-
-            from llama_index.llms.sambanova import SambaStudio
-
-            llm = SambaStudio=(
-                sambastudio_url = set with your SambaStudio deployed endpoint URL,
-                sambastudio_api_key = set with your SambaStudio deployed endpoint Key.
-                model = model or expert name (set for CoE endpoints),
-                max_tokens = max number of tokens to generate,
-                temperature = model temperature,
-                context_window = model context window,
-                top_p = model top p,
-                top_k = model top k,
-                do_sample = whether to do sample
-                process_prompt = whether to process prompt
-                    (set for CoE generic v1 and v2 endpoints)
-                stream_options = include usage to get generation metrics
-                special_tokens = start, start_role, end_role, and special tokens
-                    (set for CoE generic v1 and v2 endpoints when process prompt
-                     set to false or for StandAlone v1 and v2 endpoints)
-                model_kwargs: Optional = Extra Key word arguments to pass to the model.
-            )
+        ```python
+        from llama_index.llms.sambanova import SambaStudio
+        llm = SambaStudio=(
+            sambastudio_url = set with your SambaStudio deployed endpoint URL,
+            sambastudio_api_key = set with your SambaStudio deployed endpoint Key.
+            model = model or expert name (set for CoE endpoints),
+            max_tokens = max number of tokens to generate,
+            temperature = model temperature,
+            context_window = model context window,
+            top_p = model top p,
+            top_k = model top k,
+            do_sample = whether to do sample
+            process_prompt = whether to process prompt
+                (set for CoE generic v1 and v2 endpoints)
+            stream_options = include usage to get generation metrics
+            special_tokens = start, start_role, end_role, and special tokens
+                (set for CoE generic v1 and v2 endpoints when process prompt
+                 set to false or for StandAlone v1 and v2 endpoints)
+            model_kwargs: Optional = Extra Key word arguments to pass to the model.
+        )
+        ```
     Complete:
-        .. code-block:: python
-            prompt = "Tell me about Naruto Uzumaki in one sentence"
-            response = llm.complete(prompt)
-
+        ```python
+        prompt = "Tell me about Naruto Uzumaki in one sentence"
+        response = llm.complete(prompt)
+        ```
     Chat:
-        .. code-block:: python
-            messages = [
-                ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
-                ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
-            ]
-            response = llm.chat(messages)
-
+        ```python
+        messages = [
+            ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
+            ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
+        ]
+        response = llm.chat(messages)
+        ```
     Stream:
-        .. code-block:: python
+        ```python
         prompt = "Tell me about Naruto Uzumaki in one sentence"
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
@@ -759,27 +746,26 @@ class SambaStudio(LLM):
             print(chunk.text)
         for chunk in llm.stream_chat(messages):
             print(chunk.message.content)
-
+        ```
     Async:
-        .. code-block:: python
+        ```python
         prompt = "Tell me about Naruto Uzumaki in one sentence"
         asyncio.run(llm.acomplete(prompt))
-
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
             ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
         ]
         asyncio.run(llm.achat(chat_text_msgs))
-
+        ```
     Response metadata and usage
-        .. code-block:: python
-
+        ```python
         messages = [
             ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")),
             ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence")
         ]
         metadata_and_usage = llm.chat(messages).message.additional_kwargs
         print(metadata_and_usage)
+        ```
     """
 
     model_config = ConfigDict(
@@ -881,7 +867,6 @@ def _messages_to_string(self, messages: Sequence[ChatMessage]) -> str:
 
         Args:
             messages: sequence of ChatMessages
-
         Returns:
             str: string to send as model input depending on process_prompt param
         """
@@ -915,7 +900,6 @@ def _get_sambastudio_urls(self, url: str) -> Tuple[str, str]:
 
         Args:
             url: string with sambastudio base or streaming endpoint url
-
         Returns:
             base_url: string with url to do non streaming calls
             streaming_url: string with url to do streaming calls
@@ -947,7 +931,6 @@ def _handle_request(
         messages_dicts: List of role / content dicts to use as input.
         stop: list of stop tokens
         streaming: whether to do a streaming call
-
         Returns:
             A request Response object
         """
@@ -1055,7 +1038,6 @@ async def _handle_request_async(
         messages_dicts: List of role / content dicts to use as input.
         stop: list of stop tokens
         streaming: whether to do a streaming call
-
         Returns:
             A request Response object
         """
@@ -1164,7 +1146,6 @@ def _process_response(self, response: Response) -> ChatMessage:
 
         Args:
             response: A request Response object
-
         Returns:
             generation: a ChatMessage with model generation
         """
@@ -1215,7 +1196,6 @@ def _process_stream_response(self, response: Response) -> Iterator[ChatMessage]:
 
         Args:
             response: An iterable request Response object
-
         Yields:
             generation: an Iterator[ChatMessage] with model partial generation
         """
@@ -1422,7 +1402,6 @@ async def _process_response_async(
 
         Args:
             response: A request Response object
-
         Returns:
             generation: a ChatMessage with model generation
         """