diff --git a/docs/docs/module_guides/models/llms/modules.md b/docs/docs/module_guides/models/llms/modules.md index 0b712beb23ef2..1a5656e700e24 100644 --- a/docs/docs/module_guides/models/llms/modules.md +++ b/docs/docs/module_guides/models/llms/modules.md @@ -49,6 +49,7 @@ We support integrations with OpenAI, Anthropic, Hugging Face, PaLM, and more. - [Replicate Vector Index Llama2](../../../examples/vector_stores/SimpleIndexDemoLlama2.ipynb) - [RunGPT](../../../examples/llm/rungpt.ipynb) - [SageMaker](../../../examples/llm/sagemaker_endpoint_llm.ipynb) +- [SambaNova Systems](../../../examples/llm/sambanovasystems.ipynb) - [Solar](../../../examples/llm/solar.ipynb) - [Together.ai](../../../examples/llm/together.ipynb) - [Unify AI](../../../examples/llm/unify.ipynb) diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py index fc846e0347a2e..817594ebb8b0a 100644 --- a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py +++ b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py @@ -39,7 +39,6 @@ def _convert_message_to_dict(message: ChatMessage) -> Dict[str, Any]: Args: message: ChatMessage - Returns: messages_dict: role / content dict """ @@ -55,7 +54,6 @@ def _create_message_dicts(messages: Sequence[ChatMessage]) -> List[Dict[str, Any Args: messages: list of ChatMessages - Returns: messages_dicts: list of role / content dicts """ @@ -63,28 +61,29 @@ def _create_message_dicts(messages: Sequence[ChatMessage]) -> List[Dict[str, Any class SambaNovaCloud(LLM): - """ - SambaNova Cloud model. + """SambaNova Cloud models. Setup: To use, you should have the environment variables: - ``SAMBANOVA_URL`` set with your SambaNova Cloud URL. - ``SAMBANOVA_API_KEY`` set with your SambaNova Cloud API Key. - http://cloud.sambanova.ai/ - - Example: - .. code-block:: python - SambaNovaCloud( - sambanova_url = SambaNova cloud endpoint URL, - sambanova_api_key = set with your SambaNova cloud API key, - model = model name, - max_tokens = max number of tokens to generate, - temperature = model temperature, - top_p = model top p, - top_k = model top k, - stream_options = include usage to get generation metrics - ) - + `SAMBANOVA_URL` set with your SambaNova Cloud URL. + `SAMBANOVA_API_KEY` set with your SambaNova Cloud API Key. + http://cloud.sambanova.ai/. + Additionally, download the following packages: + `pip install llama-index-llms-sambanovasystems` + `pip install sseclient-py` + Examples: + ```python + SambaNovaCloud( + sambanova_url = SambaNova cloud endpoint URL, + sambanova_api_key = set with your SambaNova cloud API key, + model = model name, + max_tokens = max number of tokens to generate, + temperature = model temperature, + top_p = model top p, + top_k = model top k, + stream_options = include usage to get generation metrics + ) + ``` Key init args — completion params: model: str The name of the model to use, e.g., Meta-Llama-3-70B-Instruct. @@ -100,18 +99,14 @@ class SambaNovaCloud(LLM): model top k stream_options: dict stream options, include usage to get generation metrics - Key init args — client params: sambanova_url: str SambaNova Cloud Url sambanova_api_key: str SambaNova Cloud api key - Instantiate: - .. code-block:: python - + ```python from llama_index.llms.sambanovacloud import SambaNovaCloud - llm = SambaNovaCloud( sambanova_url = SambaNova cloud endpoint URL, sambanova_api_key = set with your SambaNova cloud API key, @@ -123,21 +118,22 @@ class SambaNovaCloud(LLM): stream_options = include usage to get generation metrics context_window = model context window ) + ``` Complete: - .. code-block:: python + ```python prompt = "Tell me about Naruto Uzumaki in one sentence" response = llm.complete(prompt) - + ``` Chat: - .. code-block:: python + ```python messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") ] response = llm.chat(messages) - + ``` Stream: - .. code-block:: python + ```python prompt = "Tell me about Naruto Uzumaki in one sentence" messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), @@ -147,27 +143,26 @@ class SambaNovaCloud(LLM): print(chunk.text) for chunk in llm.stream_chat(messages): print(chunk.message.content) - + ``` Async: - .. code-block:: python + ```python prompt = "Tell me about Naruto Uzumaki in one sentence" asyncio.run(llm.acomplete(prompt)) - messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") ] asyncio.run(llm.achat(chat_text_msgs)) - + ``` Response metadata and usage - .. code-block:: python - + ```python messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") ] metadata_and_usage = llm.chat(messages).message.additional_kwargs print(metadata_and_usage) + ``` """ model_config = ConfigDict( @@ -238,7 +233,6 @@ def _handle_request( Args: messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens - Returns: A response dict. """ @@ -284,7 +278,6 @@ async def _handle_request_async( Args: messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens - Returns: A response dict. """ @@ -329,7 +322,6 @@ def _handle_streaming_request( Args: messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens - Yields: An iterator of response dicts. """ @@ -413,7 +405,6 @@ async def _handle_streaming_request_async( Args: messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens - Yields: An iterator of response dicts. """ @@ -648,16 +639,15 @@ def astream_complete( class SambaStudio(LLM): - """ - SambaStudio model. + """SambaStudio model. Setup: To use, you should have the environment variables: ``SAMBASTUDIO_URL`` set with your SambaStudio deployed endpoint URL. ``SAMBASTUDIO_API_KEY`` set with your SambaStudio deployed endpoint Key. https://docs.sambanova.ai/sambastudio/latest/index.html - Example: - .. code-block:: python + Examples: + ```python SambaStudio( sambastudio_url = set with your SambaStudio deployed endpoint URL, sambastudio_api_key = set with your SambaStudio deployed endpoint Key. @@ -676,7 +666,7 @@ class SambaStudio(LLM): set to false or for StandAlone v1 and v2 endpoints) model_kwargs: Optional = Extra Key word arguments to pass to the model. ) - + ``` Key init args — completion params: model: str The name of the model to use, e.g., Meta-Llama-3-70B-Instruct-4096 @@ -705,51 +695,48 @@ class SambaStudio(LLM): or for StandAlone v1 and v2 endpoints) default to llama3 special tokens model_kwargs: dict Extra Key word arguments to pass to the model. - Key init args — client params: sambastudio_url: str - SambaStudio endpoint Url + SambaStudio endpoint URL sambastudio_api_key: str SambaStudio endpoint api key - Instantiate: - .. code-block:: python - - from llama_index.llms.sambanova import SambaStudio - - llm = SambaStudio=( - sambastudio_url = set with your SambaStudio deployed endpoint URL, - sambastudio_api_key = set with your SambaStudio deployed endpoint Key. - model = model or expert name (set for CoE endpoints), - max_tokens = max number of tokens to generate, - temperature = model temperature, - context_window = model context window, - top_p = model top p, - top_k = model top k, - do_sample = whether to do sample - process_prompt = whether to process prompt - (set for CoE generic v1 and v2 endpoints) - stream_options = include usage to get generation metrics - special_tokens = start, start_role, end_role, and special tokens - (set for CoE generic v1 and v2 endpoints when process prompt - set to false or for StandAlone v1 and v2 endpoints) - model_kwargs: Optional = Extra Key word arguments to pass to the model. - ) + ```python + from llama_index.llms.sambanova import SambaStudio + llm = SambaStudio=( + sambastudio_url = set with your SambaStudio deployed endpoint URL, + sambastudio_api_key = set with your SambaStudio deployed endpoint Key. + model = model or expert name (set for CoE endpoints), + max_tokens = max number of tokens to generate, + temperature = model temperature, + context_window = model context window, + top_p = model top p, + top_k = model top k, + do_sample = whether to do sample + process_prompt = whether to process prompt + (set for CoE generic v1 and v2 endpoints) + stream_options = include usage to get generation metrics + special_tokens = start, start_role, end_role, and special tokens + (set for CoE generic v1 and v2 endpoints when process prompt + set to false or for StandAlone v1 and v2 endpoints) + model_kwargs: Optional = Extra Key word arguments to pass to the model. + ) + ``` Complete: - .. code-block:: python - prompt = "Tell me about Naruto Uzumaki in one sentence" - response = llm.complete(prompt) - + ```python + prompt = "Tell me about Naruto Uzumaki in one sentence" + response = llm.complete(prompt) + ``` Chat: - .. code-block:: python - messages = [ - ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), - ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") - ] - response = llm.chat(messages) - + ```python + messages = [ + ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), + ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") + ] + response = llm.chat(messages) + ``` Stream: - .. code-block:: python + ```python prompt = "Tell me about Naruto Uzumaki in one sentence" messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), @@ -759,27 +746,26 @@ class SambaStudio(LLM): print(chunk.text) for chunk in llm.stream_chat(messages): print(chunk.message.content) - + ``` Async: - .. code-block:: python + ```python prompt = "Tell me about Naruto Uzumaki in one sentence" asyncio.run(llm.acomplete(prompt)) - messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") ] asyncio.run(llm.achat(chat_text_msgs)) - + ``` Response metadata and usage - .. code-block:: python - + ```python messages = [ ChatMessage(role=MessageRole.SYSTEM, content=("You're a helpful assistant")), ChatMessage(role=MessageRole.USER, content="Tell me about Naruto Uzumaki in one sentence") ] metadata_and_usage = llm.chat(messages).message.additional_kwargs print(metadata_and_usage) + ``` """ model_config = ConfigDict( @@ -881,7 +867,6 @@ def _messages_to_string(self, messages: Sequence[ChatMessage]) -> str: Args: messages: sequence of ChatMessages - Returns: str: string to send as model input depending on process_prompt param """ @@ -915,7 +900,6 @@ def _get_sambastudio_urls(self, url: str) -> Tuple[str, str]: Args: url: string with sambastudio base or streaming endpoint url - Returns: base_url: string with url to do non streaming calls streaming_url: string with url to do streaming calls @@ -947,7 +931,6 @@ def _handle_request( messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens streaming: whether to do a streaming call - Returns: A request Response object """ @@ -1055,7 +1038,6 @@ async def _handle_request_async( messages_dicts: List of role / content dicts to use as input. stop: list of stop tokens streaming: whether to do a streaming call - Returns: A request Response object """ @@ -1164,7 +1146,6 @@ def _process_response(self, response: Response) -> ChatMessage: Args: response: A request Response object - Returns: generation: a ChatMessage with model generation """ @@ -1215,7 +1196,6 @@ def _process_stream_response(self, response: Response) -> Iterator[ChatMessage]: Args: response: An iterable request Response object - Yields: generation: an Iterator[ChatMessage] with model partial generation """ @@ -1422,7 +1402,6 @@ async def _process_response_async( Args: response: A request Response object - Returns: generation: a ChatMessage with model generation """