diff --git a/autogen/agentchat/contrib/swarm_agent.py b/autogen/agentchat/contrib/swarm_agent.py index f604c13a57..94a75baa0f 100644 --- a/autogen/agentchat/contrib/swarm_agent.py +++ b/autogen/agentchat/contrib/swarm_agent.py @@ -123,7 +123,7 @@ def initiate_swarm_chat( user_agent: Optional[UserProxyAgent] = None, max_rounds: int = 20, context_variables: Optional[dict[str, Any]] = None, - after_work: Optional[Union[AFTER_WORK, Callable]] = AFTER_WORK(AfterWorkOption.TERMINATE), + after_work: Optional[Union[AfterWorkOption, Callable]] = AFTER_WORK(AfterWorkOption.TERMINATE), ) -> tuple[ChatResult, dict[str, Any], "SwarmAgent"]: """Initialize and run a swarm chat diff --git a/autogen/agentchat/realtime_agent/client.py b/autogen/agentchat/realtime_agent/client.py index ac2ed1674a..ca337a5769 100644 --- a/autogen/agentchat/realtime_agent/client.py +++ b/autogen/agentchat/realtime_agent/client.py @@ -8,15 +8,18 @@ # import asyncio import json import logging -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional -import anyio -import websockets -from asyncer import TaskGroup, asyncify, create_task_group, syncify +from asyncer import TaskGroup, asyncify, create_task_group +from websockets import connect +from websockets.asyncio.client import ClientConnection -from autogen.agentchat.contrib.swarm_agent import AfterWorkOption, initiate_swarm_chat +from ..contrib.swarm_agent import AfterWorkOption, SwarmAgent, initiate_swarm_chat -from .function_observer import FunctionObserver +if TYPE_CHECKING: + from .function_observer import FunctionObserver + from .realtime_agent import RealtimeAgent + from .realtime_observer import RealtimeObserver logger = logging.getLogger(__name__) @@ -24,47 +27,64 @@ class OpenAIRealtimeClient: """(Experimental) Client for OpenAI Realtime API.""" - def __init__(self, agent, audio_adapter, function_observer: FunctionObserver): + def __init__( + self, agent: "RealtimeAgent", audio_adapter: "RealtimeObserver", function_observer: "FunctionObserver" + ) -> None: """(Experimental) Client for OpenAI Realtime API. - args: - agent: Agent instance - the agent to be used for the conversation - audio_adapter: RealtimeObserver - adapter for streaming the audio from the client - function_observer: FunctionObserver - observer for handling function calls + Args: + agent (RealtimeAgent): The agent that the client is associated with. + audio_adapter (RealtimeObserver): The audio adapter for the client. + function_observer (FunctionObserver): The function observer for the client. + """ self._agent = agent - self._observers = [] - self._openai_ws = None # todo factor out to OpenAIClient + self._observers: list["RealtimeObserver"] = [] + self._openai_ws: Optional[ClientConnection] = None # todo factor out to OpenAIClient self.register(audio_adapter) self.register(function_observer) # LLM config llm_config = self._agent.llm_config - config = llm_config["config_list"][0] + config: dict[str, Any] = llm_config["config_list"][0] # type: ignore[index] - self.model = config["model"] - self.temperature = llm_config["temperature"] - self.api_key = config["api_key"] + self.model: str = config["model"] + self.temperature: float = llm_config["temperature"] # type: ignore[index] + self.api_key: str = config["api_key"] # create a task group to manage the tasks self.tg: Optional[TaskGroup] = None - def register(self, observer): + @property + def openai_ws(self) -> ClientConnection: + """Get the OpenAI WebSocket connection.""" + if self._openai_ws is None: + raise RuntimeError("OpenAI WebSocket is not initialized") + return self._openai_ws + + def register(self, observer: "RealtimeObserver") -> None: """Register an observer to the client.""" observer.register_client(self) self._observers.append(observer) - async def notify_observers(self, message): - """Notify all observers of a message from the OpenAI Realtime API.""" + async def notify_observers(self, message: dict[str, Any]) -> None: + """Notify all observers of a message from the OpenAI Realtime API. + + Args: + message (dict[str, Any]): The message from the OpenAI Realtime API. + + """ for observer in self._observers: await observer.update(message) - async def function_result(self, call_id, result): - """Send the result of a function call to the OpenAI Realtime API.""" + async def function_result(self, call_id: str, result: str) -> None: + """Send the result of a function call to the OpenAI Realtime API. + + Args: + call_id (str): The ID of the function call. + result (str): The result of the function call. + """ result_item = { "type": "conversation.item.create", "item": { @@ -73,11 +93,23 @@ async def function_result(self, call_id, result): "output": result, }, } + if self._openai_ws is None: + raise RuntimeError("OpenAI WebSocket is not initialized") + await self._openai_ws.send(json.dumps(result_item)) await self._openai_ws.send(json.dumps({"type": "response.create"})) - async def send_text(self, *, role: str, text: str): - """Send a text message to the OpenAI Realtime API.""" + async def send_text(self, *, role: str, text: str) -> None: + """Send a text message to the OpenAI Realtime API. + + Args: + role (str): The role of the message. + text (str): The text of the message. + """ + + if self._openai_ws is None: + raise RuntimeError("OpenAI WebSocket is not initialized") + await self._openai_ws.send(json.dumps({"type": "response.cancel"})) text_item = { "type": "conversation.item.create", @@ -87,7 +119,7 @@ async def send_text(self, *, role: str, text: str): await self._openai_ws.send(json.dumps({"type": "response.create"})) # todo override in specific clients - async def initialize_session(self): + async def initialize_session(self) -> None: """Control initial session with OpenAI.""" session_update = { # todo: move to config @@ -100,15 +132,25 @@ async def initialize_session(self): await self.session_update(session_update) # todo override in specific clients - async def session_update(self, session_options): - """Send a session update to the OpenAI Realtime API.""" + async def session_update(self, session_options: dict[str, Any]) -> None: + """Send a session update to the OpenAI Realtime API. + + Args: + session_options (dict[str, Any]): The session options to update. + """ + if self._openai_ws is None: + raise RuntimeError("OpenAI WebSocket is not initialized") + update = {"type": "session.update", "session": session_options} logger.info("Sending session update:", json.dumps(update)) await self._openai_ws.send(json.dumps(update)) logger.info("Sending session update finished") - async def _read_from_client(self): + async def _read_from_client(self) -> None: """Read messages from the OpenAI Realtime API.""" + if self._openai_ws is None: + raise RuntimeError("OpenAI WebSocket is not initialized") + try: async for openai_message in self._openai_ws: response = json.loads(openai_message) @@ -116,9 +158,9 @@ async def _read_from_client(self): except Exception as e: logger.warning(f"Error in _read_from_client: {e}") - async def run(self): + async def run(self) -> None: """Run the client.""" - async with websockets.connect( + async with connect( f"wss://api.openai.com/v1/realtime?model={self.model}", additional_headers={ "Authorization": f"Bearer {self.api_key}", @@ -127,17 +169,24 @@ async def run(self): ) as openai_ws: self._openai_ws = openai_ws await self.initialize_session() - # await asyncio.gather(self._read_from_client(), *[observer.run() for observer in self._observers]) async with create_task_group() as tg: self.tg = tg self.tg.soonify(self._read_from_client)() for observer in self._observers: self.tg.soonify(observer.run)() + + initial_agent = self._agent._initial_agent + agents = self._agent._agents + user_agent = self._agent + + if not (initial_agent and agents): + raise RuntimeError("Swarm not registered.") + if self._agent._start_swarm_chat: self.tg.soonify(asyncify(initiate_swarm_chat))( - initial_agent=self._agent._initial_agent, - agents=self._agent._agents, - user_agent=self._agent, + initial_agent=initial_agent, + agents=agents, + user_agent=user_agent, # type: ignore[arg-type] messages="Find out what the user wants.", after_work=AfterWorkOption.REVERT_TO_USER, ) diff --git a/autogen/agentchat/realtime_agent/function_observer.py b/autogen/agentchat/realtime_agent/function_observer.py index 14c70bca62..9e4c8d2649 100644 --- a/autogen/agentchat/realtime_agent/function_observer.py +++ b/autogen/agentchat/realtime_agent/function_observer.py @@ -8,38 +8,52 @@ import asyncio import json import logging +from typing import TYPE_CHECKING, Any from asyncer import asyncify from pydantic import BaseModel from .realtime_observer import RealtimeObserver +if TYPE_CHECKING: + from .realtime_agent import RealtimeAgent + logger = logging.getLogger(__name__) class FunctionObserver(RealtimeObserver): """Observer for handling function calls from the OpenAI Realtime API.""" - def __init__(self, agent): + def __init__(self, agent: "RealtimeAgent") -> None: """Observer for handling function calls from the OpenAI Realtime API. Args: - agent: Agent instance - the agent to be used for the conversation + agent (RealtimeAgent): The realtime agent attached to the observer. """ super().__init__() self._agent = agent - async def update(self, response): - """Handle function call events from the OpenAI Realtime API.""" + async def update(self, response: dict[str, Any]) -> None: + """Handle function call events from the OpenAI Realtime API. + + Args: + response (dict[str, Any]): The response from the OpenAI Realtime API. + """ if response.get("type") == "response.function_call_arguments.done": logger.info(f"Received event: {response['type']}", response) await self.call_function( call_id=response["call_id"], name=response["name"], kwargs=json.loads(response["arguments"]) ) - async def call_function(self, call_id, name, kwargs): - """Call a function registered with the agent.""" + async def call_function(self, call_id: str, name: str, kwargs: dict[str, Any]) -> None: + """Call a function registered with the agent. + + Args: + call_id (str): The ID of the function call. + name (str): The name of the function to call. + kwargs (Any[str, Any]): The arguments to pass to the function. + """ + if name in self._agent.realtime_functions: _, func = self._agent.realtime_functions[name] func = func if asyncio.iscoroutinefunction(func) else asyncify(func) @@ -54,19 +68,19 @@ async def call_function(self, call_id, name, kwargs): elif not isinstance(result, str): result = json.dumps(result) - await self._client.function_result(call_id, result) + await self.client.function_result(call_id, result) - async def run(self): + async def run(self) -> None: """Run the observer. Initialize the session with the OpenAI Realtime API. """ await self.initialize_session() - async def initialize_session(self): + async def initialize_session(self) -> None: """Add registered tools to OpenAI with a session update.""" session_update = { "tools": [schema for schema, _ in self._agent.realtime_functions.values()], "tool_choice": "auto", } - await self._client.session_update(session_update) + await self.client.session_update(session_update) diff --git a/autogen/agentchat/realtime_agent/realtime_agent.py b/autogen/agentchat/realtime_agent/realtime_agent.py index aadbc1f283..b4456715bb 100644 --- a/autogen/agentchat/realtime_agent/realtime_agent.py +++ b/autogen/agentchat/realtime_agent/realtime_agent.py @@ -52,8 +52,8 @@ def __init__( *, name: str, audio_adapter: RealtimeObserver, - system_message: Optional[Union[str, list]] = "You are a helpful AI Assistant.", - llm_config: Optional[Union[dict, Literal[False]]] = None, + system_message: Optional[Union[str, list[str]]] = "You are a helpful AI Assistant.", + llm_config: Optional[Union[dict[str, Any], Literal[False]]] = None, voice: str = "alloy", ): """(Experimental) Agent for interacting with the Realtime Clients. @@ -83,10 +83,10 @@ def __init__( silent=None, context_variables=None, ) - self.llm_config = llm_config + self.llm_config = llm_config # type: ignore[assignment] self._client = OpenAIRealtimeClient(self, audio_adapter, FunctionObserver(self)) self.voice = voice - self.realtime_functions = {} + self.realtime_functions: dict[str, tuple[dict[str, Any], Callable[..., Any]]] = {} self._oai_system_message = [{"content": system_message, "role": "system"}] # todo still needed? self.register_reply( @@ -96,8 +96,8 @@ def __init__( self._answer_event: anyio.Event = anyio.Event() self._answer: str = "" self._start_swarm_chat = False - self._initial_agent = None - self._agents = None + self._initial_agent: Optional[SwarmAgent] = None + self._agents: Optional[list[SwarmAgent]] = None def register_swarm( self, @@ -133,7 +133,7 @@ def register_swarm( self.set_answer ) - async def run(self): + async def run(self) -> None: """Run the agent.""" await self._client.run() @@ -143,11 +143,12 @@ def register_realtime_function( description: str, name: Optional[str] = None, ) -> Callable[[F], F]: - def _decorator(func: F, name=name) -> F: + def _decorator(func: F, name: Optional[str] = name) -> F: """Decorator for registering a function to be used by an agent. Args: - func: the function to be registered. + func (callable[..., Any]): the function to be registered. + name (str): the name of the function. Returns: The function to be registered, with the _description attribute set to the function description. @@ -183,7 +184,7 @@ async def get_answer(self) -> str: await self._answer_event.wait() return self._answer - async def ask_question(self, question: str, question_timeout: int) -> str: + async def ask_question(self, question: str, question_timeout: int) -> None: """ Send a question for the user to the agent and wait for the answer. If the answer is not received within the timeout, the question is repeated. @@ -196,7 +197,7 @@ async def ask_question(self, question: str, question_timeout: int) -> str: self.reset_answer() await self._client.send_text(role=QUESTION_ROLE, text=question) - async def _check_event_set(timeout: int = question_timeout) -> None: + async def _check_event_set(timeout: int = question_timeout) -> bool: for _ in range(timeout): if self._answer_event.is_set(): return True @@ -208,7 +209,7 @@ async def _check_event_set(timeout: int = question_timeout) -> None: def check_termination_and_human_reply( self, - messages: Optional[list[dict]] = None, + messages: Optional[list[dict[str, Any]]] = None, sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> tuple[bool, Union[str, None]]: @@ -225,7 +226,10 @@ def check_termination_and_human_reply( the config for the agent """ - async def get_input(): + if not messages: + return False, None + + async def get_input() -> None: async with create_task_group() as tg: tg.soonify(self.ask_question)( QUESTION_MESSAGE.format(messages[-1]["content"]), @@ -234,4 +238,4 @@ async def get_input(): syncify(get_input)() - return True, {"role": "user", "content": self._answer} + return True, {"role": "user", "content": self._answer} # type: ignore[return-value] diff --git a/autogen/agentchat/realtime_agent/realtime_observer.py b/autogen/agentchat/realtime_agent/realtime_observer.py index 80d59de95c..6061efb230 100644 --- a/autogen/agentchat/realtime_agent/realtime_observer.py +++ b/autogen/agentchat/realtime_agent/realtime_observer.py @@ -6,24 +6,36 @@ # SPDX-License-Identifier: MIT from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from .client import OpenAIRealtimeClient class RealtimeObserver(ABC): """Observer for the OpenAI Realtime API.""" - def __init__(self): - self._client = None + def __init__(self) -> None: + self._client: Optional["OpenAIRealtimeClient"] = None + + @property + def client(self) -> "OpenAIRealtimeClient": + """Get the client associated with the observer.""" + if self._client is None: + raise ValueError("Observer client is not registered.") + + return self._client - def register_client(self, client): + def register_client(self, client: "OpenAIRealtimeClient") -> None: """Register a client with the observer.""" self._client = client @abstractmethod - async def run(self, openai_ws): + async def run(self) -> None: """Run the observer.""" - pass + ... @abstractmethod - async def update(self, message): + async def update(self, message: dict[str, Any]) -> None: """Update the observer with a message from the OpenAI Realtime API.""" - pass + ... diff --git a/autogen/agentchat/realtime_agent/twilio_observer.py b/autogen/agentchat/realtime_agent/twilio_observer.py index 4d6c793898..7dff6d4bdd 100644 --- a/autogen/agentchat/realtime_agent/twilio_observer.py +++ b/autogen/agentchat/realtime_agent/twilio_observer.py @@ -8,11 +8,13 @@ import base64 import json import logging - -from fastapi import WebSocketDisconnect +from typing import TYPE_CHECKING, Any, Optional from .realtime_observer import RealtimeObserver +if TYPE_CHECKING: + from fastapi.websockets import WebSocket + LOG_EVENT_TYPES = [ "error", "response.content.done", @@ -31,7 +33,7 @@ class TwilioAudioAdapter(RealtimeObserver): """Adapter for streaming audio from Twilio to OpenAI Realtime API and vice versa.""" - def __init__(self, websocket): + def __init__(self, websocket: "WebSocket"): """Adapter for streaming audio from Twilio to OpenAI Realtime API and vice versa. Args: @@ -45,10 +47,10 @@ def __init__(self, websocket): self.stream_sid = None self.latest_media_timestamp = 0 self.last_assistant_item = None - self.mark_queue = [] - self.response_start_timestamp_twilio = None + self.mark_queue: list[str] = [] + self.response_start_timestamp_twilio: Optional[int] = None - async def update(self, response): + async def update(self, response: dict[str, Any]) -> None: """Receive events from the OpenAI Realtime API, send audio back to Twilio.""" if response["type"] in LOG_EVENT_TYPES: logger.info(f"Received event: {response['type']}", response) @@ -76,7 +78,7 @@ async def update(self, response): logger.info(f"Interrupting response with id: {self.last_assistant_item}") await self.handle_speech_started_event() - async def handle_speech_started_event(self): + async def handle_speech_started_event(self) -> None: """Handle interruption when the caller's speech starts.""" logger.info("Handling speech started event.") if self.mark_queue and self.response_start_timestamp_twilio is not None: @@ -104,19 +106,19 @@ async def handle_speech_started_event(self): self.last_assistant_item = None self.response_start_timestamp_twilio = None - async def send_mark(self): + async def send_mark(self) -> None: """Send a mark of audio interruption to the Twilio websocket.""" if self.stream_sid: mark_event = {"event": "mark", "streamSid": self.stream_sid, "mark": {"name": "responsePart"}} await self.websocket.send_json(mark_event) self.mark_queue.append("responsePart") - async def run(self): + async def run(self) -> None: """Run the adapter. Start reading messages from the Twilio websocket and send audio to OpenAI. """ - openai_ws = self._client._openai_ws + openai_ws = self.client.openai_ws await self.initialize_session() async for message in self.websocket.iter_text(): @@ -135,10 +137,10 @@ async def run(self): if self.mark_queue: self.mark_queue.pop(0) - async def initialize_session(self): + async def initialize_session(self) -> None: """Control initial session with OpenAI.""" session_update = { "input_audio_format": "g711_ulaw", "output_audio_format": "g711_ulaw", } - await self._client.session_update(session_update) + await self.client.session_update(session_update) diff --git a/autogen/agentchat/realtime_agent/websocket_observer.py b/autogen/agentchat/realtime_agent/websocket_observer.py index 9509e2b314..dd0b67a87d 100644 --- a/autogen/agentchat/realtime_agent/websocket_observer.py +++ b/autogen/agentchat/realtime_agent/websocket_observer.py @@ -7,8 +7,10 @@ import base64 import json +from typing import TYPE_CHECKING, Any, Optional -from fastapi import WebSocketDisconnect +if TYPE_CHECKING: + from fastapi.websockets import WebSocket from .realtime_observer import RealtimeObserver @@ -26,7 +28,7 @@ class WebsocketAudioAdapter(RealtimeObserver): - def __init__(self, websocket): + def __init__(self, websocket: "WebSocket"): super().__init__() self.websocket = websocket @@ -34,10 +36,10 @@ def __init__(self, websocket): self.stream_sid = None self.latest_media_timestamp = 0 self.last_assistant_item = None - self.mark_queue = [] - self.response_start_timestamp_socket = None + self.mark_queue: list[str] = [] + self.response_start_timestamp_socket: Optional[int] = None - async def update(self, response): + async def update(self, response: dict[str, Any]) -> None: """Receive events from the OpenAI Realtime API, send audio back to websocket.""" if response["type"] in LOG_EVENT_TYPES: print(f"Received event: {response['type']}", response) @@ -65,7 +67,7 @@ async def update(self, response): print(f"Interrupting response with id: {self.last_assistant_item}") await self.handle_speech_started_event() - async def handle_speech_started_event(self): + async def handle_speech_started_event(self) -> None: """Handle interruption when the caller's speech starts.""" print("Handling speech started event.") if self.mark_queue and self.response_start_timestamp_socket is not None: @@ -93,14 +95,14 @@ async def handle_speech_started_event(self): self.last_assistant_item = None self.response_start_timestamp_socket = None - async def send_mark(self): + async def send_mark(self) -> None: if self.stream_sid: mark_event = {"event": "mark", "streamSid": self.stream_sid, "mark": {"name": "responsePart"}} await self.websocket.send_json(mark_event) self.mark_queue.append("responsePart") - async def run(self): - openai_ws = self._client._openai_ws + async def run(self) -> None: + openai_ws = self.client.openai_ws await self.initialize_session() async for message in self.websocket.iter_text(): @@ -119,7 +121,7 @@ async def run(self): if self.mark_queue: self.mark_queue.pop(0) - async def initialize_session(self): + async def initialize_session(self) -> None: """Control initial session with OpenAI.""" session_update = {"input_audio_format": "pcm16", "output_audio_format": "pcm16"} # g711_ulaw # "g711_ulaw", - await self._client.session_update(session_update) + await self.client.session_update(session_update) diff --git a/notebook/tools_interoperability.ipynb b/notebook/tools_interoperability.ipynb index 28a7ddde97..0018d99de3 100644 --- a/notebook/tools_interoperability.ipynb +++ b/notebook/tools_interoperability.ipynb @@ -38,7 +38,8 @@ "### Imports\n", "\n", "Import necessary modules and tools.\n", - "- `WikipediaQueryRun` and `WikipediaAPIWrapper`: Tools for querying Wikipedia.\n", + "\n", + "- [WikipediaQueryRun](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.wikipedia.tool.WikipediaQueryRun.html) and [WikipediaAPIWrapper](https://python.langchain.com/api_reference/community/utilities/langchain_community.utilities.wikipedia.WikipediaAPIWrapper.html): Tools for querying Wikipedia.\n", "- `AssistantAgent` and `UserProxyAgent`: Agents that facilitate communication in the AG2 framework.\n", "- `Interoperability`: This module acts as a bridge, making it easier to integrate LangChain tools with AG2’s architecture." ] @@ -76,6 +77,7 @@ "### Agent Configuration\n", "\n", "Configure the agents for the interaction.\n", + "\n", "- `config_list` defines the LLM configurations, including the model and API key.\n", "- `UserProxyAgent` simulates user inputs without requiring actual human interaction (set to `NEVER`).\n", "- `AssistantAgent` represents the AI agent, configured with the LLM settings." @@ -106,9 +108,9 @@ "### Tool Integration\n", "\n", "- Initialize and register the LangChain tool with AG2.\n", - "- `WikipediaAPIWrapper`: Configured to fetch the top 1 result from Wikipedia with a maximum of 1000 characters per document.\n", - "- `WikipediaQueryRun`: A LangChain tool that executes Wikipedia queries.\n", - "- `LangchainInteroperability`: Converts the LangChain tool into a format compatible with the AG2 framework.\n", + "- [WikipediaAPIWrapper](https://python.langchain.com/api_reference/community/utilities/langchain_community.utilities.wikipedia.WikipediaAPIWrapper.html): Configured to fetch the top 1 result from Wikipedia with a maximum of 1000 characters per document.\n", + "- [WikipediaQueryRun](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.wikipedia.tool.WikipediaQueryRun.html): A LangChain tool that executes Wikipedia queries.\n", + "- `Interoperability`: Converts the LangChain tool into a format compatible with the AG2 framework.\n", "- `ag2_tool.register_for_execution(user_proxy)`: Registers the tool for use by the user_proxy agent.\n", "- `ag2_tool.register_for_llm(chatbot)`: Registers the tool for integration with the chatbot agent.\n" ] @@ -215,7 +217,8 @@ "### Imports\n", "\n", "Import necessary modules and tools.\n", - "- `ScrapeWebsiteTool` are the CrewAI tools for web scraping\n", + "\n", + "- [ScrapeWebsiteTool](https://docs.crewai.com/tools/scrapewebsitetool) are the CrewAI tools for web scraping\n", "- `AssistantAgent` and `UserProxyAgent` are core AG2 classes.\n", "- `Interoperability`: This module acts as a bridge, making it easier to integrate CrewAI tools with AG2’s architecture." ] @@ -241,6 +244,7 @@ "### Agent Configuration\n", "\n", "Configure the agents for the interaction.\n", + "\n", "- `config_list` defines the LLM configurations, including the model and API key.\n", "- `UserProxyAgent` simulates user inputs without requiring actual human interaction (set to `NEVER`).\n", "- `AssistantAgent` represents the AI agent, configured with the LLM settings." @@ -271,7 +275,8 @@ "### Tool Integration\n", "\n", "Initialize and register the CrewAI tool with AG2.\n", - "- `crewai_tool` is an instance of the `ScrapeWebsiteTool` from CrewAI.\n", + "\n", + "- `crewai_tool` is an instance of the [ScrapeWebsiteTool](https://docs.crewai.com/tools/scrapewebsitetool) from CrewAI.\n", "- `Interoperability` converts the CrewAI tool to make it usable in AG2.\n", "- `register_for_execution` and `register_for_llm` allow the tool to work with the UserProxyAgent and AssistantAgent." ] @@ -413,12 +418,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Imports\n", + "### Imports\n", "\n", "Import necessary modules and tools.\n", - "- `BaseModel`: Used to define data structures for tool inputs and outputs.\n", - "- `RunContext`: Provides context during the execution of tools.\n", - "- `PydanticAITool`: Represents a tool in the PydanticAI framework.\n", + "\n", + "- [BaseModel](https://docs.pydantic.dev/latest/api/base_model/): Used to define data structures for tool inputs and outputs.\n", + "- [RunContext](https://ai.pydantic.dev/api/tools/#pydantic_ai.tools.RunContext): Provides context during the execution of tools.\n", + "- [PydanticAITool](https://ai.pydantic.dev/api/tools/#pydantic_ai.tools.Tool): Represents a tool in the PydanticAI framework.\n", "- `AssistantAgent` and `UserProxyAgent`: Agents that facilitate communication in the AG2 framework.\n", "- `Interoperability`: This module acts as a bridge, making it easier to integrate PydanticAI tools with AG2’s architecture." ] @@ -447,6 +453,7 @@ "### Agent Configuration\n", "\n", "Configure the agents for the interaction.\n", + "\n", "- `config_list` defines the LLM configurations, including the model and API key.\n", "- `UserProxyAgent` simulates user inputs without requiring actual human interaction (set to `NEVER`).\n", "- `AssistantAgent` represents the AI agent, configured with the LLM settings." @@ -478,8 +485,8 @@ "\n", "Integrate the PydanticAI tool with AG2.\n", "\n", - "- Define a `Player` model using `BaseModel` to structure the input data.\n", - "- Use `RunContext` to securely inject dependencies (like the `Player` instance) into the tool function without exposing them to the LLM.\n", + "- Define a `Player` model using [BaseModel](https://docs.pydantic.dev/latest/api/base_model/) to structure the input data.\n", + "- Use [RunContext](https://ai.pydantic.dev/api/tools/#pydantic_ai.tools.RunContext) to securely inject dependencies (like the `Player` instance) into the tool function without exposing them to the LLM.\n", "- Implement `get_player` to define the tool's functionality, accessing `ctx.deps` for injected data.\n", "- Convert the tool to an AG2-compatible format with `Interoperability` and register it for execution and LLM communication.\n", "- Convert the PydanticAI tool into an AG2-compatible format using `convert_tool`.\n", @@ -525,7 +532,7 @@ "\n", "- Use the `initiate_chat` method to send a message from the `user_proxy` to the `chatbot`.\n", "- In this example, the user requests the chatbot to retrieve player information, providing \"goal keeper\" as additional context.\n", - "- The `Player` instance is securely injected into the tool using `RunContext`, ensuring the chatbot can retrieve and use this data during the interaction." + "- The `Player` instance is securely injected into the tool using [RunContext](https://ai.pydantic.dev/api/tools/#pydantic_ai.tools.RunContext), ensuring the chatbot can retrieve and use this data during the interaction." ] }, { @@ -595,6 +602,15 @@ } ], "metadata": { + "front_matter": { + "description": "Cross-Framework LLM Tool Integration with AG2", + "tags": [ + "tools", + "langchain", + "crewai", + "pydanticai" + ] + }, "kernelspec": { "display_name": "Python 3", "language": "python", diff --git a/pyproject.toml b/pyproject.toml index 8f1db523ec..20f110b2e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,11 +62,13 @@ files = [ "autogen/io", "autogen/tools", "autogen/interop", + "autogen/agentchat/realtime_agent", "test/test_pydantic.py", "test/test_function_utils.py", "test/io", "test/tools", "test/interop", + "test/agentchat/realtime_agent", ] exclude = [ "autogen/math_utils\\.py", diff --git a/setup.py b/setup.py index e8eda4d66f..b382c66a41 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,7 @@ "llama-index-core==0.12.5", ] +# used for agentchat_realtime_swarm notebook and realtime agent twilio demo twilio = ["fastapi>=0.115.0,<1", "uvicorn>=0.30.6,<1", "twilio>=9.3.2"] interop_crewai = ["crewai[tools]>=0.86,<1; python_version>='3.10' and python_version<'3.13'"] @@ -89,7 +90,7 @@ interop_pydantic_ai = ["pydantic-ai==0.0.13"] interop = interop_crewai + interop_langchain + interop_pydantic_ai -types = ["mypy==1.9.0"] + test + jupyter_executor + interop +types = ["mypy==1.9.0"] + test + jupyter_executor + interop + ["fastapi>=0.115.0,<1"] if current_os in ["Windows", "Darwin"]: retrieve_chat_pgvector.extend(["psycopg[binary]>=3.1.18"]) diff --git a/test/agentchat/realtime_agent/__init__.py b/test/agentchat/realtime_agent/__init__.py new file mode 100644 index 0000000000..87ec7612a0 --- /dev/null +++ b/test/agentchat/realtime_agent/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai +# +# SPDX-License-Identifier: Apache-2.0 +# +# Portions derived from https://github.com/microsoft/autogen are under the MIT License. +# SPDX-License-Identifier: MIT diff --git a/test/agentchat/realtime_agent/test_submodule.py b/test/agentchat/realtime_agent/test_submodule.py new file mode 100644 index 0000000000..eff9f04964 --- /dev/null +++ b/test/agentchat/realtime_agent/test_submodule.py @@ -0,0 +1,15 @@ +# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai +# +# SPDX-License-Identifier: Apache-2.0 +# +# Portions derived from https://github.com/microsoft/autogen are under the MIT License. +# SPDX-License-Identifier: MIT + + +def test_import() -> None: + from autogen.agentchat.realtime_agent import ( + FunctionObserver, + RealtimeAgent, + TwilioAudioAdapter, + WebsocketAudioAdapter, + ) diff --git a/website/blog/2024-12-20-Tools-interoperability/index.mdx b/website/blog/2024-12-20-Tools-interoperability/index.mdx index 3f6ae0962d..c57d6869f4 100644 --- a/website/blog/2024-12-20-Tools-interoperability/index.mdx +++ b/website/blog/2024-12-20-Tools-interoperability/index.mdx @@ -5,31 +5,44 @@ authors: tags: [LLM, tools, langchain, crewai, pydanticai] --- +