From 794f441f5fcd8192a2bb6f81deaee12283b37d96 Mon Sep 17 00:00:00 2001
From: Cheng Qian <c29qian@uwaterloo.ca>
Date: Fri, 8 Nov 2024 01:16:35 -0500
Subject: [PATCH] feat: watsonx support

---
 .github/workflows/contrib-tests.yml           |  36 ++
 autogen/oai/client.py                         |  12 +
 autogen/oai/client_utils.py                   |   2 +-
 autogen/oai/watsonx.py                        | 369 +++++++++++++
 autogen/runtime_logging.py                    |   2 +
 setup.py                                      |   1 +
 test/oai/test_watsonx.py                      |  67 +++
 .../non-openai-models/cloud-watsonx.ipynb     | 500 ++++++++++++++++++
 8 files changed, 988 insertions(+), 1 deletion(-)
 create mode 100644 autogen/oai/watsonx.py
 create mode 100644 test/oai/test_watsonx.py
 create mode 100644 website/docs/topics/non-openai-models/cloud-watsonx.ipynb

diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
index 7d779bc5fae..b5668c5fae1 100644
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@@ -741,3 +741,39 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
+
+  WatsonxTest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install pytest-cov>=5
+      - name: Install packages and dependencies for Watsonx
+        run: |
+          pip install -e .[watsonx,test]
+      - name: Set AUTOGEN_USE_DOCKER based on OS
+        shell: bash
+        run: |
+          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
+            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
+          fi
+      - name: Coverage
+        run: |
+          pytest test/oai/test_watsonx.py --skip-openai
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 481e55728c5..759b02f8ecf 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -107,6 +107,13 @@
 except ImportError as e:
     bedrock_import_exception = e
 
+try:
+    from autogen.oai.watsonx import WatsonxClient
+
+    watsonx_import_exception: Optional[ImportError] = None
+except ImportError as e:
+    watsonx_import_exception = e
+
 logger = logging.getLogger(__name__)
 if not logger.handlers:
     # Add the console handler.
@@ -563,6 +570,11 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
                     raise ImportError("Please install `boto3` to use the Amazon Bedrock API.")
                 client = BedrockClient(**openai_config)
                 self._clients.append(client)
+            elif api_type is not None and api_type.startswith("watsonx"):
+                if watsonx_import_exception:
+                    raise ImportError("Please install `ibm-watsonx-ai` to use the Watsonx API.")
+                client = WatsonxClient(**openai_config)
+                self._clients.append(client)
             else:
                 client = OpenAI(**openai_config)
                 self._clients.append(OpenAIClient(client))
diff --git a/autogen/oai/client_utils.py b/autogen/oai/client_utils.py
index 5da4c49c2a9..55ad6758c2c 100644
--- a/autogen/oai/client_utils.py
+++ b/autogen/oai/client_utils.py
@@ -16,7 +16,7 @@ def validate_parameter(
     allow_None: bool,
     default_value: Any,
     numerical_bound: Tuple,
-    allowed_values: list,
+    allowed_values: list | None,
 ) -> Any:
     """
     Validates a given config parameter, checking its type, values, and setting defaults
diff --git a/autogen/oai/watsonx.py b/autogen/oai/watsonx.py
new file mode 100644
index 00000000000..63f944a3b00
--- /dev/null
+++ b/autogen/oai/watsonx.py
@@ -0,0 +1,369 @@
+"""Create an OpenAI-compatible client using Watsonx's API.
+
+Example:
+    llm_config={
+        "config_list": [{
+            "api_type": "watsonx",
+            "model": "ibm/granite-3-8b-instruct",
+            "api_key": os.environ.get("WATSONX_API_KEY"),
+            "space_id": os.environ.get("WATSONX_SPACE_ID"),
+            }
+    ]}
+
+    agent = autogen.AssistantAgent("my_agent", llm_config=llm_config)
+
+Install Watsonx's python library using: pip install --upgrade ibm_watsonx_ai
+
+Resources:
+- https://cloud.ibm.com/apidocs/watsonx-ai#text-chat
+- https://ibm.github.io/watsonx-ai-python-sdk/fm_model_inference.html#ibm_watsonx_ai.foundation_models.inference.ModelInference.chat
+"""
+
+from __future__ import annotations
+
+import copy
+import logging
+import os
+import sys
+import time
+import warnings
+from typing import Any, Dict, Iterable, List, Optional
+
+from ibm_watsonx_ai.foundation_models.model import ModelInference
+from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
+from openai.types.chat.chat_completion_message_tool_call import Function
+from openai.types.completion_usage import CompletionUsage
+
+from .client_utils import logger_formatter, validate_parameter
+
+logger = logging.getLogger(__name__)
+if not logger.handlers:
+    # Add the console handler.
+    _ch = logging.StreamHandler(stream=sys.stdout)
+    _ch.setFormatter(logger_formatter)
+    logger.addHandler(_ch)
+
+
+# see full lists of models on https://www.ibm.com/products/watsonx-ai/foundation-models#generative
+# here only the latest IBM granite models are listed
+WATSONX_PRICING_1K = {
+    "ibm/granite-3-8b-instruct": (0.0002, 0.0002),
+}
+
+
+def calculate_watsonx_cost(prompt_tokens, completion_tokens, model_id):
+    total = 0.0
+
+    if model_id in WATSONX_PRICING_1K:
+        input_cost_per_k, output_cost_per_k = WATSONX_PRICING_1K[model_id]
+        input_cost = (prompt_tokens / 1000) * input_cost_per_k
+        output_cost = (completion_tokens / 1000) * output_cost_per_k
+        total = input_cost + output_cost
+    else:
+        warnings.warn(f"Cost calculation not available for {model_id} model", UserWarning)
+
+    return total
+
+
+class WatsonxClient:
+    """Client for Watsonx's API."""
+
+    def __init__(self, **kwargs):
+        """Requires api_key or environment variable to be set.
+        Requires one of space_id or project_id
+        URL is optional and defaults to US south Watsonx SaaS deployment
+
+        Args:
+            api_key (str): The API key for using Watsonx (or environment variable WATSONX_API_KEY needs to be set)
+            url (str): The Watsonx instance url for using Watsonx (or environment variable WATSONX_URL can be set)
+            space_id (str): The space id for using Watsonx (or environment variable WATSONX_SPACE_ID needs to be set)
+            project_id (str): The project id for using Watsonx (or environment variable WATSONX_PROJECT_ID needs to be set)
+        """
+        # url
+        self.url = kwargs.get("url", None)
+        if not self.url:
+            self.url = os.getenv("WATSONX_URL")
+        if not self.url:
+            self.url = "https://us-south.ml.cloud.ibm.com"
+
+        # api key is required
+        self.api_key = kwargs.get("api_key", None)
+        if not self.api_key:
+            self.api_key = os.getenv("WATSONX_API_KEY")
+        assert (
+            self.api_key
+        ), "Please include the api_key in your config list entry for Watsonx or set the WATSONX_API_KEY env variable."
+
+        # one of space_id or project_id should be provided
+        self.space_id = kwargs.get("space_id", None)
+        if not self.space_id:
+            self.space_id = os.getenv("WATSONX_SPACE_ID")
+        self.project_id = kwargs.get("project_id", None)
+        if not self.project_id:
+            self.project_id = os.getenv("WATSONX_PROJECT_ID")
+        assert (
+            self.space_id or self.project_id
+        ), "Please include the space_id/project_id in your config list entry for Watsonx or set the WATSONX_SPACE_ID/WATSONX_PROJECT_ID env variable."
+
+    def message_retrieval(self, response) -> List:
+        """
+        Retrieve and return a list of strings or a list of Choice.Message from the response.
+
+        NOTE: if a list of Choice.Message is returned, it currently needs to contain the fields of OpenAI's ChatCompletion Message object,
+        since that is expected for function or tool calling in the rest of the codebase at the moment, unless a custom agent is being used.
+        """
+        return [choice.message for choice in response.choices]
+
+    def cost(self, response) -> float:
+        return response.cost
+
+    @staticmethod
+    def get_usage(response) -> Dict:
+        """Return usage summary of the response using RESPONSE_USAGE_KEYS."""
+        # ...  # pragma: no cover
+        return {
+            "prompt_tokens": response.usage.prompt_tokens,
+            "completion_tokens": response.usage.completion_tokens,
+            "total_tokens": response.usage.total_tokens,
+            "cost": response.cost,
+            "model": response.model,
+        }
+
+    def parse_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Loads the parameters for Watsonx API from the passed in parameters and returns a validated set. Checks types, ranges, and sets defaults"""
+        wx_params = {}
+        # Validate allowed Watsonx parameters
+        # https://ibm.github.io/watsonx-ai-python-sdk/fm_schema.html#ibm_watsonx_ai.foundation_models.schema.TextChatParameters
+        # https://cloud.ibm.com/apidocs/watsonx-ai#text-chat
+        wx_params["frequency_penalty"] = validate_parameter(
+            params, "frequency_penalty", (int, float), True, None, (0, 1), None
+        )
+        wx_params["max_tokens"] = validate_parameter(params, "max_tokens", (int,), True, None, (0, None), None)
+        wx_params["presence_penalty"] = validate_parameter(
+            params, "presence_penalty", (int, float), True, None, (0, 1), None
+        )
+        wx_params["temperature"] = validate_parameter(params, "temperature", (int, float), True, None, (0, None), None)
+        wx_params["top_p"] = validate_parameter(params, "top_p", (int, float), True, None, (0.01, 0.99), None)
+
+        # ignored params:
+        # logprobs/top_logprobs: this is only for returning the logits
+        # response_format: leave as default, which is json https://ibm.github.io/watsonx-ai-python-sdk/fm_schema.html#ibm_watsonx_ai.foundation_models.schema.TextChatResponseFormatType
+        # time_limit
+        # n: How many chat completion choices to generate for each input message.
+
+        return wx_params
+
+    def create(self, params: Dict) -> ChatCompletion:
+        # get model id
+        model_id = params.get("model", None)
+        assert model_id, "Please specify `model` in the config list entry for which Watsonx model to use"
+        # chat/chat_stream args
+        _messages = params.get("messages", [])
+        wx_params = self.parse_params(params)
+        messages, tools, tool_choice, tool_choice_option = oai_messages_to_watsonx_messages(_messages, params)
+
+        # We use chat model by default
+        client = ModelInference(
+            model_id=model_id,
+            credentials={
+                "api_key": self.api_key,
+                "url": self.url,
+            },
+            space_id=self.space_id,
+            project_id=self.project_id,
+            params=wx_params,
+        )
+
+        # Stream if in parameters
+        streaming = True if "stream" in params and params["stream"] else False
+
+        # make the call to watsonx api
+        if streaming:
+            response = client.chat_stream(
+                messages=messages,
+                params=wx_params,
+                tools=tools,
+                tool_choice=tool_choice,
+                tool_choice_option=tool_choice_option,
+            )
+        else:
+            response = client.chat(
+                messages=messages,
+                params=wx_params,
+                tools=tools,
+                tool_choice=tool_choice,
+                tool_choice_option=tool_choice_option,
+            )
+
+        # response parsing
+        if streaming:
+            # components for full final response
+            response_id = ""
+            response_content = ""
+            finish_reason = ""
+            prompt_tokens = 0
+            completion_tokens = 0
+            total_tokens = 0
+            created = 0
+            full_tool_calls: Optional[List[Optional[Dict[str, Any]]]] = None
+
+            # Send the chat completion request to OpenAI's API and process the response in chunks
+            for chunk in response:
+                if chunk.get("choices", []):
+                    choice = chunk["choices"][0]
+
+                    # update metadata with the last chunk
+                    if choice["finish_reason"]:
+                        response_id = chunk["id"]
+                        finish_reason = choice["finish_reason"]
+                        prompt_tokens = choice["usage"]["prompt_tokens"]
+                        completion_tokens = choice["usage"]["completion_tokens"]
+                        total_tokens = choice["usage"]["total_tokens"]
+                        created = chunk["created"]
+
+                    # concatenate content
+                    _content = choice["delta"].get("content")
+                    if _content:
+                        _content = _content_str_repr(_content)
+                    if _content:
+                        response_content += _content
+
+                    # concatenate tool calls
+                    tool_calls_chunks = choice["delta"].get("tool_calls", [])
+                    if tool_calls_chunks:
+                        for tool_calls_chunk in tool_calls_chunks:
+                            # the current tool call to be reconstructed
+                            ix = tool_calls_chunk["index"]
+                            if full_tool_calls is None:
+                                full_tool_calls = []
+                            if ix >= len(full_tool_calls):
+                                # in case ix is not sequential
+                                full_tool_calls = full_tool_calls + [None] * (ix - len(full_tool_calls) + 1)
+                            if full_tool_calls[ix] is None:
+                                full_tool_calls[ix] = {}
+                            full_tool_calls[ix]["name"] += tool_calls_chunk["function"]["name"]
+                            full_tool_calls[ix]["arguments"] += tool_calls_chunk["function"]["arguments"]
+                            if "id" not in full_tool_calls[ix] and "id" in tool_calls_chunk:
+                                full_tool_calls[ix]["id"] = tool_calls_chunk["id"]
+
+            message = ChatCompletionMessage(
+                content=response_content,
+                role="assistant",
+                tool_calls=(
+                    [
+                        ChatCompletionMessageToolCall(
+                            id=tool_call["id"],
+                            function=Function(
+                                name=tool_call["name"],
+                                arguments=tool_call["arguments"],
+                            ),
+                            type="function",
+                        )
+                        for tool_call in full_tool_calls
+                    ]
+                    if full_tool_calls
+                    else None
+                ),
+            )
+            choice = Choice(finish_reason=finish_reason, index=0, message=message)
+            response_oai = ChatCompletion(
+                id=response_id,
+                model=model_id,
+                created=created,
+                object="chat.completion",
+                choices=[choice],
+                usage=CompletionUsage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=total_tokens,
+                ),
+                cost=calculate_watsonx_cost(prompt_tokens, completion_tokens, model_id),
+            )
+        else:
+            # Non-streaming finished
+            choice = response["choices"][0]
+            message = ChatCompletionMessage(
+                content=_content_str_repr(choice["message"]["content"]) if "content" in choice["message"] else None,
+                role="assistant",
+                tool_calls=(
+                    [
+                        ChatCompletionMessageToolCall(
+                            id=tool_call["id"],
+                            function=Function(
+                                name=tool_call["function"]["name"],
+                                arguments=tool_call["function"]["arguments"],
+                            ),
+                            type="function",
+                        )
+                        for tool_call in choice["message"]["tool_calls"]
+                    ]
+                    if choice["message"].get("tool_calls")
+                    else None
+                ),
+            )
+            choices = [Choice(finish_reason=choice["finish_reason"], index=0, message=message)]
+            prompt_tokens = response["usage"]["prompt_tokens"]
+            completion_tokens = response["usage"]["completion_tokens"]
+            total_tokens = response["usage"]["total_tokens"]
+            response_id = response["id"]
+
+            response_oai = ChatCompletion(
+                id=response_id,
+                model=model_id,
+                created=int(time.time()),
+                object="chat.completion",
+                choices=choices,
+                usage=CompletionUsage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=total_tokens,
+                ),
+                cost=calculate_watsonx_cost(prompt_tokens, completion_tokens, model_id),
+            )
+        return response_oai
+
+
+def oai_messages_to_watsonx_messages(
+    messages: list[Dict[str, Any]], params: Dict[str, Any]
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None, dict[str, Any] | None, str | None]:
+    """Convert messages from OAI format to Watsonx's format, which is mostly consistent with OAI format at the time of writing.
+     https://cloud.ibm.com/apidocs/watsonx-ai#text-chat
+
+    Parameters:
+        messages: list[Dict[str, Any]]: AutoGen messages
+        params: Dict[str, Any]:         AutoGen parameters dictionary
+
+    Returns:
+        List[Dict[str, Any]]:              Chat History messages
+        list[dict[str, Any]] | None:       tools (list of available tools)
+        dict[str, Any] | None:             Specifying a particular tool to force the model to call that tool.
+        str | None:                        tool choice option
+    """
+
+    # Tools
+    tools = params.get("tools", None)
+
+    oai_tool_choice = params.get("tool_choice", None)
+    tool_choice = None
+    tool_choice_option = None
+    if oai_tool_choice is not None:
+        if isinstance(oai_tool_choice, str):
+            tool_choice_option = oai_tool_choice
+        else:
+            tool_choice = oai_tool_choice
+
+    # messages
+    wx_messages = copy.deepcopy(messages)
+
+    return wx_messages, tools, tool_choice, tool_choice_option
+
+
+def _content_str_repr(content: str | list[dict[str, Any]]):
+    """content in message can be a string or a list of dictionaries"""
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, Iterable) and len(content) > 0:
+        return content[0].get("text")
+    else:
+        return None
diff --git a/autogen/runtime_logging.py b/autogen/runtime_logging.py
index 0a9b1687b75..73cddf3a37e 100644
--- a/autogen/runtime_logging.py
+++ b/autogen/runtime_logging.py
@@ -22,6 +22,7 @@
     from autogen.oai.mistral import MistralAIClient
     from autogen.oai.ollama import OllamaClient
     from autogen.oai.together import TogetherClient
+    from autogen.oai.watsonx import WatsonxClient
 
 logger = logging.getLogger(__name__)
 
@@ -127,6 +128,7 @@ def log_new_client(
         CohereClient,
         OllamaClient,
         BedrockClient,
+        WatsonxClient,
     ],
     wrapper: OpenAIWrapper,
     init_args: Dict[str, Any],
diff --git a/setup.py b/setup.py
index dfc5e912779..02118c49558 100644
--- a/setup.py
+++ b/setup.py
@@ -108,6 +108,7 @@
     "ollama": ["ollama>=0.3.3", "fix_busted_json>=0.0.18"],
     "bedrock": ["boto3>=1.34.149"],
     "kubernetes": ["kubernetes>=27.2.0"],
+    "watsonx": ["ibm-watsonx-ai>=1.1.22"],
 }
 
 setuptools.setup(
diff --git a/test/oai/test_watsonx.py b/test/oai/test_watsonx.py
new file mode 100644
index 00000000000..c36d0b9a6e5
--- /dev/null
+++ b/test/oai/test_watsonx.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3 -m pytest
+import math
+import os
+
+import pytest
+
+try:
+    from autogen.oai.watsonx import WatsonxClient, calculate_watsonx_cost
+
+    skip = False
+except ImportError:
+    WatsonxClient = object
+    skip = True
+
+
+reason = "Watsonx dependency not installed!"
+
+
+@pytest.fixture()
+def watsonx_client():
+    return WatsonxClient(api_key="dummy_api_key", space_id="dummy_space_id")
+
+
+@pytest.mark.skipif(skip, reason=reason)
+def test_initialization_missing_api_key():
+    os.environ.pop("WATSONX_API_KEY", None)
+    with pytest.raises(
+        AssertionError,
+        match="Please include the api_key in your config list entry for Watsonx or set the WATSONX_API_KEY env variable.",
+    ):
+        WatsonxClient(space_id="dummy_space_id")
+
+    WatsonxClient(api_key="dummy_api_key", space_id="dummy_space_id")
+
+
+@pytest.mark.skipif(skip, reason=reason)
+def test_intialization(watsonx_client):
+    assert watsonx_client.api_key == "dummy_api_key", "`api_key` should be correctly set in the config"
+    assert watsonx_client.space_id == "dummy_space_id", "`dummy_space_id` should be correctly set in the config"
+
+
+@pytest.mark.skipif(skip, reason=reason)
+def test_calculate_watsonx_cost():
+    assert (
+        calculate_watsonx_cost(0, 0, model_id="ibm/granite-3-8b-instruct") == 0.0
+    ), "Cost should be 0 for 0 input_tokens and 0 output_tokens"
+    assert math.isclose(calculate_watsonx_cost(1000, 2000, model_id="ibm/granite-3-8b-instruct"), 0.0006, rel_tol=0.01)
+
+
+@pytest.mark.skipif(skip, reason=reason)
+def test_load_config(watsonx_client):
+    params = {
+        "model": "ibm/granite-3-8b-instruct",
+        "stream": False,
+        "temperature": 1,
+        "top_p": 0.8,
+        "max_tokens": 100,
+    }
+    expected_params = {
+        "temperature": 1,
+        "top_p": 0.8,
+        "max_tokens": 100,
+        "frequency_penalty": None,
+        "presence_penalty": None,
+    }
+    result = watsonx_client.parse_params(params)
+    assert result == expected_params, "Config should be correctly loaded"
diff --git a/website/docs/topics/non-openai-models/cloud-watsonx.ipynb b/website/docs/topics/non-openai-models/cloud-watsonx.ipynb
new file mode 100644
index 00000000000..bd8b9db1861
--- /dev/null
+++ b/website/docs/topics/non-openai-models/cloud-watsonx.ipynb
@@ -0,0 +1,500 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# watsonx\n",
+    "\n",
+    "[IBM® watsonx.ai™](https://www.ibm.com/products/watsonx-ai) is an enterprise-grade studio for developing AI services and deploying them into your applications of choice―with a collection of the APIs, tools, models and runtimes you need to turn your ideas and requirements into reality."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Features\n",
+    "\n",
+    "When using this client class, AutoGen's messages are automatically tailored to accommodate the specific requirements of Watsonx's API.\n",
+    "\n",
+    "Additionally, this client class provides support for function/tool calling and will track token usage and cost correctly as per Watsonx's API costs (as of Nov 2024).\n",
+    "\n",
+    "## Getting started\n",
+    "\n",
+    "First you need to install the `autogen-agentchat~=0.2` package to use AutoGen with the Watsonx API library.\n",
+    "\n",
+    "``` bash\n",
+    "pip install autogen-agentchat[watsonx]~=0.2\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Watsonx.ai provides a number of models to use, included below. See the list of [models here](https://www.ibm.com/products/watsonx-ai/foundation-models#generative).\n",
+    "\n",
+    "See the sample `OAI_CONFIG_LIST` below showing how the Watsonx client class is used by specifying the `api_type` as `watsonx`.\n",
+    "\n",
+    "```python\n",
+    "[\n",
+    "    {\n",
+    "        \"api_type\": \"watsonx\",\n",
+    "        \"model\": \"ibm/granite-3-8b-instruct\",\n",
+    "        \"api_key\": \"your watsonx api key\",\n",
+    "        \"space_id\": \"your watsonx space id\",\n",
+    "    }\n",
+    "]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As an alternative to the `api_key` and `space_id` key and value in the config, you can set the environment variable `WATSONX_API_KEY` and `WATSONX_SPACE_ID` to your Watsonx credentials.\n",
+    "\n",
+    "## API parameters\n",
+    "\n",
+    "The following parameters can be added to your config for the Watsonx API. See [this link](https://ibm.github.io/watsonx-ai-python-sdk/fm_model_inference.html#) for further information on them and their default values.\n",
+    "\n",
+    "\n",
+    "Example:\n",
+    "```python\n",
+    "[\n",
+    "    {\n",
+    "        \"api_type\": \"watsonx\",\n",
+    "        \"model\": \"ibm/granite-3-8b-instruct\",\n",
+    "        \"api_key\": \"your watsonx api key\",\n",
+    "        \"space_id\": \"your watsonx space id\",\n",
+    "        \"name-of-param\": value_of_param,\n",
+    "    }\n",
+    "]\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Two-Agent Coding Example\n",
+    "\n",
+    "In this example, we run a two-agent chat with an AssistantAgent (primarily a coding agent) to generate code to count the number of prime numbers between 1 and 10,000 and then it will be executed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "config_list = [\n",
+    "    {\n",
+    "        # Watsonx also hosts a variety of models other than the granite series\n",
+    "        \"model\": \"meta-llama/llama-3-1-70b-instruct\",\n",
+    "        \"api_key\": os.environ.get(\"WATSONX_API_KEY\"),\n",
+    "        \"url\": os.environ.get(\"WATSONX_URL\"),\n",
+    "        \"space_id\": os.environ.get(\"WATSONX_SPACE_ID\"),\n",
+    "        \"api_type\": \"watsonx\",\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Importantly, we have tweaked the system message so that the model doesn't return the termination keyword, which we've changed to FINISH, with the code block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "from autogen import AssistantAgent, UserProxyAgent\n",
+    "from autogen.coding import LocalCommandLineCodeExecutor\n",
+    "\n",
+    "# Setting up the code executor\n",
+    "workdir = Path(\"coding\")\n",
+    "workdir.mkdir(exist_ok=True)\n",
+    "code_executor = LocalCommandLineCodeExecutor(work_dir=workdir)\n",
+    "\n",
+    "# Setting up the agents\n",
+    "\n",
+    "# The UserProxyAgent will execute the code that the AssistantAgent provides\n",
+    "user_proxy_agent = UserProxyAgent(\n",
+    "    name=\"User\",\n",
+    "    code_execution_config={\"executor\": code_executor},\n",
+    "    is_termination_msg=lambda msg: \"FINISH\" in msg.get(\"content\"),\n",
+    ")\n",
+    "\n",
+    "system_message = \"\"\"You are a helpful AI assistant who writes code and the user executes it.\n",
+    "Solve tasks using your coding and language skills.\n",
+    "In the following cases, suggest python code (in a python coding block) for the user to execute.\n",
+    "Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.\n",
+    "When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.\n",
+    "Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.\n",
+    "If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n",
+    "When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.\n",
+    "IMPORTANT: Wait for the user to execute your code and then you can reply with the word \"FINISH\". DO NOT OUTPUT \"FINISH\" after your code block.\"\"\"\n",
+    "\n",
+    "# The AssistantAgent will take the coding request and return code\n",
+    "assistant_agent = AssistantAgent(\n",
+    "    name=\"Watsonx Assistant\",\n",
+    "    system_message=system_message,\n",
+    "    llm_config={\"config_list\": config_list},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mUser\u001b[0m (to Watsonx Assistant):\n",
+      "\n",
+      "Provide code to count the number of prime numbers from 1 to 10000.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mWatsonx Assistant\u001b[0m (to User):\n",
+      "\n",
+      "```python\n",
+      "# Count the number of prime numbers from 1 to 10000\n",
+      "\n",
+      "def count_primes(n):\n",
+      "    primes = []\n",
+      "    for possiblePrime in range(2, n + 1):\n",
+      "        isPrime = True\n",
+      "        for num in range(2, int(possiblePrime ** 0.5) + 1):\n",
+      "            if possiblePrime % num == 0:\n",
+      "                isPrime = False\n",
+      "                break\n",
+      "        if isPrime:\n",
+      "            primes.append(possiblePrime)\n",
+      "    return len(primes)\n",
+      "\n",
+      "print(count_primes(10000))\n",
+      "```\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/qc/git/autogen/autogen/oai/watsonx.py:64: UserWarning: Cost calculation not available for meta-llama/llama-3-1-70b-instruct model\n",
+      "  warnings.warn(f\"Cost calculation not available for {model_id} model\", UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Replying as User. Provide feedback to Watsonx Assistant. Press enter to skip and use auto-reply, or type 'exit' to end the conversation:  \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31m\n",
+      ">>>>>>>> NO HUMAN INPUT RECEIVED.\u001b[0m\n",
+      "\u001b[31m\n",
+      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
+      "\u001b[31m\n",
+      ">>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...\u001b[0m\n",
+      "\u001b[33mUser\u001b[0m (to Watsonx Assistant):\n",
+      "\n",
+      "exitcode: 0 (execution succeeded)\n",
+      "Code output: 1229\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mWatsonx Assistant\u001b[0m (to User):\n",
+      "\n",
+      "FINISH\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/qc/git/autogen/autogen/oai/watsonx.py:64: UserWarning: Cost calculation not available for meta-llama/llama-3-1-70b-instruct model\n",
+      "  warnings.warn(f\"Cost calculation not available for {model_id} model\", UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Replying as User. Provide feedback to Watsonx Assistant. Press enter to skip and use auto-reply, or type 'exit' to end the conversation:  \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31m\n",
+      ">>>>>>>> NO HUMAN INPUT RECEIVED.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the chat, with the UserProxyAgent asking the AssistantAgent the message\n",
+    "chat_result = user_proxy_agent.initiate_chat(\n",
+    "    assistant_agent,\n",
+    "    message=\"Provide code to count the number of prime numbers from 1 to 10000.\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tool Call Example\n",
+    "\n",
+    "In this example, instead of writing code, we will show how LLMs can perform tool calling.\n",
+    "\n",
+    "We'll use a simple travel agent assistant program where we have a tool for weather."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from typing import Literal\n",
+    "\n",
+    "from typing_extensions import Annotated\n",
+    "\n",
+    "import autogen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create our two agents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the agent for tool calling\n",
+    "chatbot = autogen.AssistantAgent(\n",
+    "    name=\"chatbot\",\n",
+    "    system_message=\"\"\"For currency exchange and weather forecasting tasks,\n",
+    "        only use the functions you have been provided with.\n",
+    "        Output 'HAVE FUN!' when an answer has been provided.\"\"\",\n",
+    "    llm_config={\"config_list\": config_list},\n",
+    ")\n",
+    "\n",
+    "# Note that we have changed the termination string to be \"HAVE FUN!\"\n",
+    "user_proxy = autogen.UserProxyAgent(\n",
+    "    name=\"user_proxy\",\n",
+    "    code_execution_config={\"executor\": code_executor},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\") and \"HAVE FUN\" in x.get(\"content\", \"\"),\n",
+    "    max_consecutive_auto_reply=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a function, annotating it so that the description can be passed through to the LLM.\n",
+    "\n",
+    "We associate it with the agents using `register_for_execution` for the user_proxy so it can execute the function and `register_for_llm` for the chatbot (powered by the LLM) so it can pass the function definition to the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Weather function\n",
+    "\n",
+    "\n",
+    "# Example function to make available to model\n",
+    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
+    "    \"\"\"Get the weather for some location\"\"\"\n",
+    "    if \"chicago\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Chicago\", \"temperature\": \"13\", \"unit\": unit})\n",
+    "    elif \"san francisco\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"55\", \"unit\": unit})\n",
+    "    elif \"new york\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"New York\", \"temperature\": \"11\", \"unit\": unit})\n",
+    "    else:\n",
+    "        return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
+    "\n",
+    "\n",
+    "# Register the function with the agent\n",
+    "\n",
+    "\n",
+    "@user_proxy.register_for_execution()\n",
+    "@chatbot.register_for_llm(description=\"Weather forecast for US cities.\")\n",
+    "def weather_forecast(\n",
+    "    location: Annotated[str, \"City name\"],\n",
+    ") -> str:\n",
+    "    weather_details = get_current_weather(location=location)\n",
+    "    weather = json.loads(weather_details)\n",
+    "    return f\"{weather['location']} will be {weather['temperature']} degrees {weather['unit']}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We pass through our customer's message and run the chat.\n",
+    "\n",
+    "Finally, we ask the LLM to summarise the chat and print that out."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "What's the weather in New York\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
+      "\n",
+      "\u001b[32m***** Suggested tool call (chatcmpl-tool-baba90d4634d42b9935f8063dd80c69c): weather_forecast *****\u001b[0m\n",
+      "Arguments: \n",
+      "{\"location\": \"New York\"}\n",
+      "\u001b[32m**************************************************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[35m\n",
+      ">>>>>>>> EXECUTING FUNCTION weather_forecast...\u001b[0m\n",
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "\u001b[32m***** Response from calling tool (chatcmpl-tool-baba90d4634d42b9935f8063dd80c69c) *****\u001b[0m\n",
+      "New York will be 11 degrees fahrenheit\n",
+      "\u001b[32m***************************************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/qc/git/autogen/autogen/oai/watsonx.py:64: UserWarning: Cost calculation not available for meta-llama/llama-3-1-70b-instruct model\n",
+      "  warnings.warn(f\"Cost calculation not available for {model_id} model\", UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
+      "\n",
+      "HAVE FUN!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/qc/git/autogen/autogen/oai/watsonx.py:64: UserWarning: Cost calculation not available for meta-llama/llama-3-1-70b-instruct model\n",
+      "  warnings.warn(f\"Cost calculation not available for {model_id} model\", UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LLM SUMMARY: The user asked for the weather in New York and the AI provided a JSON for a function call to retrieve the weather forecast for New York, which resulted in a temperature of 11 degrees Fahrenheit.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/qc/git/autogen/autogen/oai/watsonx.py:64: UserWarning: Cost calculation not available for meta-llama/llama-3-1-70b-instruct model\n",
+      "  warnings.warn(f\"Cost calculation not available for {model_id} model\", UserWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# start the conversation\n",
+    "res = user_proxy.initiate_chat(\n",
+    "    chatbot,\n",
+    "    message=\"What's the weather in New York\",\n",
+    "    summary_method=\"reflection_with_llm\",\n",
+    ")\n",
+    "\n",
+    "print(f\"LLM SUMMARY: {res.summary['content']}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the agent recommended we call the tool and passed through the right parameters. The `user_proxy` executed them and this was passed back to the agent to interpret them and respond. Finally, the agent was asked to summarise the whole conversation."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}