From 4167f20c98ded73fb944e7fcacc656bd83ea9d0a Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 18 Apr 2024 09:53:11 -0400 Subject: [PATCH 1/4] x --- langchain_benchmarks/tool_usage/__init__.py | 6 + .../tool_usage/agents/__init__.py | 14 - .../tool_usage/agents/anthropic_tool_user.py | 271 ------------------ .../tool_usage/agents/openai_assistant.py | 77 ----- .../tool_usage/agents/openai_functions.py | 166 ----------- .../tool_usage/agents/runnable_agent.py | 4 +- .../tool_usage/agents/tool_using_agent.py | 3 +- 7 files changed, 10 insertions(+), 531 deletions(-) delete mode 100644 langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py delete mode 100644 langchain_benchmarks/tool_usage/agents/openai_assistant.py delete mode 100644 langchain_benchmarks/tool_usage/agents/openai_functions.py diff --git a/langchain_benchmarks/tool_usage/__init__.py b/langchain_benchmarks/tool_usage/__init__.py index aa22c995..83da61d1 100644 --- a/langchain_benchmarks/tool_usage/__init__.py +++ b/langchain_benchmarks/tool_usage/__init__.py @@ -1,9 +1,15 @@ """Package for helping to evaluate agent runs.""" +from langchain_benchmarks.tool_usage.agents import ( + CustomRunnableAgentFactory, + StandardAgentFactory, +) from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter from langchain_benchmarks.tool_usage.evaluators import get_eval_config # Please keep this list sorted! __all__ = [ "apply_agent_executor_adapter", + "CustomRunnableAgentFactory", "get_eval_config", + "StandardAgentFactory", ] diff --git a/langchain_benchmarks/tool_usage/agents/__init__.py b/langchain_benchmarks/tool_usage/agents/__init__.py index c59133fe..7692514c 100644 --- a/langchain_benchmarks/tool_usage/agents/__init__.py +++ b/langchain_benchmarks/tool_usage/agents/__init__.py @@ -1,25 +1,11 @@ from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter -from langchain_benchmarks.tool_usage.agents.anthropic_tool_user import ( - AnthropicToolUserFactory, -) -from langchain_benchmarks.tool_usage.agents.experimental.factory import ( - CustomAgentFactory, -) -from langchain_benchmarks.tool_usage.agents.openai_assistant import ( - OpenAIAssistantFactory, -) -from langchain_benchmarks.tool_usage.agents.openai_functions import OpenAIAgentFactory from langchain_benchmarks.tool_usage.agents.runnable_agent import ( CustomRunnableAgentFactory, ) from langchain_benchmarks.tool_usage.agents.tool_using_agent import StandardAgentFactory __all__ = [ - "OpenAIAgentFactory", - "OpenAIAssistantFactory", "apply_agent_executor_adapter", - "CustomAgentFactory", - "AnthropicToolUserFactory", "CustomRunnableAgentFactory", "StandardAgentFactory", ] diff --git a/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py b/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py deleted file mode 100644 index 53773a53..00000000 --- a/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py +++ /dev/null @@ -1,271 +0,0 @@ -"""Wrapper around the anthropic tool user SDK. - -The anthropic tool user SDK is an alpha release so this code will likely be -changed or deleted in the future. It's here simply to make it easier to benchmark -the performance of the existing tool user SDK, to compare it with the performance -of other implementations. -""" - -from importlib.util import find_spec -from typing import Any, Dict, List, Optional, Sequence - -from langchain.tools import StructuredTool -from langchain_core.callbacks.manager import trace_as_chain_group -from langchain_core.runnables import Runnable, RunnableConfig, RunnableLambda - -from langchain_benchmarks import rate_limiting -from langchain_benchmarks.schema import ToolUsageTask -from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter - - -def convert_langchain_tool_to_tool_user_tool(lc_tool: StructuredTool) -> Any: - """Convert a langchain tool to a tool user tool.""" - from tool_use_package.tools.base_tool import BaseTool - - class DynamicTool(BaseTool): - def use_tool(self, **kwargs): - return lc_tool(kwargs) - - schema = lc_tool.args_schema.schema() - - properties = schema["properties"] - parameters = [] - # Is this needed or is string OK? - type_adapter = { - "string": "str", # str or string? - "integer": "int", - "number": "float", - "boolean": "bool", - } - for key, value in properties.items(): - parameters.append( - { - "name": key, - "type": type_adapter.get(value["type"], value["type"]), - "description": value.get("description", ""), - } - ) - - return DynamicTool(lc_tool.name, lc_tool.description, parameters) - - -def _handle_tool_inputs( - tool_inputs: List[Dict[str, Any]], - tools: Sequence[StructuredTool], - config: Optional[RunnableConfig] = None, -) -> Dict[str, Any]: - """Handle tool inputs.""" - tool_by_name = {tool.name: tool for tool in tools} - tool_error: Optional[str] = None - tool_outputs = [] - for tool_input in tool_inputs: - tool_name = tool_input["tool_name"] - tool_arguments = tool_input["tool_arguments"] - tool = tool_by_name[tool_name] - try: - tool_result = tool.invoke(tool_arguments, config=config) - except Exception as e: # Break on first error - tool_error = str(e) - tool_outputs = None - break - tool_outputs.append( - { - "tool_name": tool_name, - "tool_result": tool_result, - } - ) - return { - "role": "tool_outputs", - "tool_outputs": tool_outputs, - "tool_error": tool_error, - } - - -def run_anthropic_agent_simple( - tools: Sequence[StructuredTool], - user_message: str, - *, - max_iterations: int = 30, - config: Optional[RunnableConfig] = None, - **kwargs, -) -> List[dict]: - """Make an anthropic agent.""" - from tool_use_package.tool_user import ToolUser - - verbose = kwargs.pop("verbose", False) - - tool_user = ToolUser( - [convert_langchain_tool_to_tool_user_tool(tool) for tool in tools], **kwargs - ) - messages = [ - { - "role": "human", - "content": user_message, - "tool_error": None, - "tool_outputs": [], - "tool_inputs": [], - } - ] - with trace_as_chain_group( - "Anthropic Agent Run", - inputs={"user_message": user_message}, - callback_manager=config.get("callbacks", None) if config else None, - ) as group_manager: - for num_iteration in range(max_iterations): - with trace_as_chain_group( - f"Anthropic Agent Iteration {num_iteration}", - inputs={"messages": messages}, - callback_manager=group_manager.parent_run_manager.get_child(), - ) as iteration_manager: - last_message = tool_user.use_tools( - messages, execution_mode="manual", verbose=verbose - ) - new_messages = [last_message] - - if last_message["role"] == "tool_inputs": - tool_inputs = last_message["tool_inputs"] - new_message = _handle_tool_inputs( - tool_inputs, - tools, - config={ - "callbacks": iteration_manager.parent_run_manager.get_child(), - }, - ) - new_messages.append(new_message) - - iteration_manager.on_chain_end(outputs=new_messages) - messages.extend(new_messages) - - # Finally break if the last message is from the assistant - if last_message["role"] == "assistant": - break - else: - raise ValueError("Max iterations reached") - group_manager.on_chain_end(outputs=messages) - return messages - - -def convert_messages_to_finalized_output( - messages: List[Dict[str, Any]], -) -> Dict[str, Any]: - """Convert the history of messages into the expected output for eval. - - This matches the agent executor output which has the following structure: - - { - "output": "The output of the agent", - "intermediate_steps": [ - ( - AgentAction( - tool="add_x_y", - tool_input={"x": 2.0, "y": 5.0}, - log="Invoking tool `add_x_y` with `{'x': 2.0, 'y': 5.0}`", - ), - 9.0, - ) - ], - "state": Any, # Optional key for tasks that involve manipulation of an env. - } - """ - if not messages: - raise ValueError("Expected at least one message") - - last_message = messages[-1] - - if last_message["role"] != "assistant": - raise ValueError( - f"Expected the last message to be from the assistant. " - f"Instead got {last_message}." - ) - - actual_steps = [] - - for message in messages: - if "role" not in message: - raise ValueError(f"Expected role in message {message}") - role = message["role"] - - if role == "tool_inputs": - # Get the name of the tool used - for tool_input in message["tool_inputs"]: - actual_steps.append(tool_input["tool_name"]) - - return { - "output": last_message["content"], - "actual_steps": actual_steps, - } - - -def create_agent(tools: Sequence[StructuredTool]) -> RunnableLambda: - """Create an agent.""" - - def run_agent( - input: dict, config: Optional[RunnableConfig] = None, **kwargs - ) -> dict: - """Run the agent.""" - messages = run_anthropic_agent_simple( - tools, input["input"], config=config, **kwargs - ) - return convert_messages_to_finalized_output(messages) - - return RunnableLambda(run_agent) - - -class AnthropicToolUserFactory: - def __init__( - self, - task: ToolUsageTask, - *, - rate_limiter: Optional[rate_limiting.RateLimiter] = None, - ) -> None: - """Create an OpenAI agent factory for the given task. - - - Args: - task: The task to create an agent factory for. - rate_limiter: The rate limiter to use - """ - self.task = task - self.rate_limiter = rate_limiter - if not find_spec("tool_use_package"): - raise ImportError( - 'Could not import "tool_use_package". Please ' - "follow instructions here to install " - "https://github.com/anthropics/anthropic-tools/tree/main" - ) - - def __call__(self, **kwargs: Any) -> Runnable: - env = self.task.create_environment() - - def _add_task_instructions( - input: dict, config: Optional[RunnableConfig] = None, **kwargs - ) -> dict: - """Add task instructions to the question.""" - if not isinstance(input, dict) or "question" not in input: - raise ValueError( - f"Expected input to be a dict with key `question`. " - f"Found {type(input)}." - ) - - input = input.copy() - input["question"] = ( - f"{self.task.instructions}\nWrite down your answer, " - f"but do not explain it. Input: `{input['question']}`" - ) - return input - - agent = create_agent(env.tools) # type: ignore - # Returns `state` in the output if the environment has a state reader - # makes sure that `output` is always in the output - - if kwargs: - agent = agent.bind(**kwargs) - - runnable = _add_task_instructions | apply_agent_executor_adapter( - agent, state_reader=env.read_state - ) - - if self.rate_limiter: # Add a rate limiter - runnable = rate_limiting.with_rate_limit(runnable, self.rate_limiter) - - return runnable diff --git a/langchain_benchmarks/tool_usage/agents/openai_assistant.py b/langchain_benchmarks/tool_usage/agents/openai_assistant.py deleted file mode 100644 index 239846c6..00000000 --- a/langchain_benchmarks/tool_usage/agents/openai_assistant.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Code for creating an assistant factory for evaluating tool usage tasks. - -See: https://platform.openai.com/docs/assistants/how-it-works/creating-assistants -""" -from typing import Optional - -from langchain.agents import AgentExecutor -from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable -from langchain.schema.runnable import Runnable - -from langchain_benchmarks import rate_limiting -from langchain_benchmarks.schema import ToolUsageTask -from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter - - -class OpenAIAssistantFactory: - def __init__( - self, - task: ToolUsageTask, - *, - model: str, - rate_limiter: Optional[rate_limiting.RateLimiter] = None, - num_retries: int = 0, - ) -> None: - """Create an OpenAI agent factory for the given task. - - Args: - task: The task to create an agent factory for. - model: The model to use -- this must be an open AI model. - rate_limiter: The rate limiter to use - num_retries: The number of times to retry the assistant if it fails - """ - if not isinstance(model, str): - raise ValueError(f"Expected str for model, got {type(model)}") - self.task = task - tools = task.create_environment().tools - # Stateless, so we only need to create it once - self.agent = OpenAIAssistantRunnable.create_assistant( - name=f"{task.name} assistant", - instructions=self.task.instructions, - tools=tools, - model=model, - as_agent=True, - ) - self.rate_limiter = rate_limiter - self.num_retries = num_retries - - def __call__(self) -> Runnable: - env = self.task.create_environment() - - agent = self.agent - if self.rate_limiter is not None: - # Rate limited model - agent = rate_limiting.with_rate_limit(agent, self.rate_limiter) - - def _map_key(x: dict): - # Assistant expects the 'content' key explicitly - return { - "content": x["input"], - **{k: v for k, v in x.items() if k != "input"}, - } - - agent = _map_key | self.agent - if self.num_retries > 0: - agent = agent.with_retry( - stop_after_attempt=self.num_retries + 1, - ) - runnable = AgentExecutor( - agent=agent, - tools=env.tools, - handle_parsing_errors=True, - return_intermediate_steps=True, - ) - - # Returns `state` in the output if the environment has a state reader - # makes sure that `output` is always in the output - return apply_agent_executor_adapter(runnable, state_reader=env.read_state) diff --git a/langchain_benchmarks/tool_usage/agents/openai_functions.py b/langchain_benchmarks/tool_usage/agents/openai_functions.py deleted file mode 100644 index 8537dfb0..00000000 --- a/langchain_benchmarks/tool_usage/agents/openai_functions.py +++ /dev/null @@ -1,166 +0,0 @@ -"""Code for creating an agent factory for evaluating tool usage tasks.""" -from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union - -from langchain.agents import AgentExecutor -from langchain.agents.format_scratchpad.openai_tools import ( - format_to_openai_tool_messages, -) -from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder -from langchain.schema.runnable import Runnable -from langchain.tools.render import format_tool_to_openai_tool -from langchain_core.language_models import BaseChatModel, BaseLanguageModel -from langchain_core.language_models.base import LanguageModelInput -from langchain_core.messages import BaseMessage -from langchain_core.pydantic_v1 import BaseModel - -from langchain_benchmarks import model_registry, rate_limiting -from langchain_benchmarks.model_registration import RegisteredModel -from langchain_benchmarks.schema import ToolUsageTask -from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter - -# PUBLIC API - - -def _bind_tools( - llm: BaseChatModel, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], - tool_choice: Optional[str] = None, - json_mode: bool = False, - **kwargs: Any, -) -> Runnable[LanguageModelInput, BaseMessage]: - """Bind tools (and other objects) to this chat model. - - Args: - tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, or callable. Pydantic - models and callables will be automatically converted to - their schema dictionary representation. - tool_choice: Which tool to require the model to call. - Must be the name of the single provided tool or - "auto" to automatically determine which tool to call - (if any). - json_mode: Whether to set JSON mode for the tool call. - This guarantees the model will respond in valid JSON - (unless truncated). - kwargs: Any additional parameters to pass to the - :class:`~langchain.runnable.Runnable` constructor. - - """ - formatted_tools: List[Dict[str, Union[str, dict]]] = [ - format_tool_to_openai_tool(tool) for tool in tools - ] - if tool_choice is not None: - if not formatted_tools: - raise ValueError( - "When specifying `tool_choice`, you must provide at least one " "tool." - ) - tool_names = [tool["function"]["name"] for tool in formatted_tools] - if not any(tool_name == tool_choice for tool_name in tool_names): - raise ValueError( - f"Tool choice {tool_choice} was specified, but the only " - f"provided tools were {tool_names}." - ) - tool_choice_ = {"type": "function", "function": {"name": tool_choice}} - kwargs = {**kwargs, "tool_choice": tool_choice_} - if json_mode: - kwargs = {**kwargs, "response_format": {"type": "json_object"}} - return llm.bind( - tools=formatted_tools, - **kwargs, - ) - - -class OpenAIAgentFactory: - def __init__( - self, - task: ToolUsageTask, - *, - model: Union[ - str, RegisteredModel, BaseLanguageModel, BaseChatModel - ] = "gpt-3.5-turbo-16k", - rate_limiter: Optional[rate_limiting.RateLimiter] = None, - num_retries: int = 0, - ) -> None: - """Create an OpenAI agent factory for the given task. - - Args: - task: The task to create an agent factory for. - model: The model to use -- this must be an open AI model. - rate_limiter: The rate limiter to use - """ - self.task = task - self.model = model - self.rate_limiter = rate_limiter - self.num_retries = num_retries - - def _create_model(self) -> Union[BaseChatModel, BaseLanguageModel]: - if isinstance(self.model, RegisteredModel): - return self.model.get_model( - model_params={"temperature": 0, "model_kwargs": {"seed": 0}} - ) - elif isinstance(self.model, (BaseChatModel, BaseLanguageModel)): - return self.model - elif isinstance(self.model, str): - if self.model in model_registry: - registered_model = model_registry.get_model(self.model) - model_instance = registered_model.get_model( - model_params={"temperature": 0, "model_kwargs": {"seed": 0}} - ) - return model_instance - else: - raise ValueError(f"Unknown model: {self.model}") - else: - raise TypeError(f"Expected str or RegisteredModel, got {type(self.model)}") - - def create(self) -> Runnable: - """Agent Executor""" - # For backwards compatibility - return self() - - def __call__(self) -> Runnable: - model = self._create_model() - env = self.task.create_environment() - - model = _bind_tools(model, env.tools) - - if self.rate_limiter is not None: - # Rate limited model - model = rate_limiting.with_rate_limit(model, self.rate_limiter) - - prompt = ChatPromptTemplate.from_messages( - [ - ( - "system", - self.task.instructions, - ), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - ) - - runnable_agent = ( - { - "input": lambda x: x["input"], - "agent_scratchpad": lambda x: format_to_openai_tool_messages( - x["intermediate_steps"] - ), - } - | prompt - | model - | OpenAIToolsAgentOutputParser() - ) - if self.num_retries > 0: - runnable_agent = runnable_agent.with_retry( - stop_after_attempt=self.num_retries + 1, - ) - runnable = AgentExecutor( - agent=runnable_agent, - tools=env.tools, - handle_parsing_errors=True, - return_intermediate_steps=True, - ) - - # Returns `state` in the output if the environment has a state reader - # makes sure that `output` is always in the output - return apply_agent_executor_adapter(runnable, state_reader=env.read_state) diff --git a/langchain_benchmarks/tool_usage/agents/runnable_agent.py b/langchain_benchmarks/tool_usage/agents/runnable_agent.py index e0eadc28..8b130f55 100644 --- a/langchain_benchmarks/tool_usage/agents/runnable_agent.py +++ b/langchain_benchmarks/tool_usage/agents/runnable_agent.py @@ -46,4 +46,6 @@ def __call__(self) -> Runnable: return_intermediate_steps=True, ) - return apply_agent_executor_adapter(executor, state_reader=env.read_state) + return apply_agent_executor_adapter( + executor, state_reader=env.read_state + ).with_config({"run_name": "Agent", "metadata": {"task": self.task.name}}) diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py index 767f8aba..6b9283ec 100644 --- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py +++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py @@ -5,6 +5,7 @@ from typing import Optional from langchain.agents import AgentExecutor +from langchain.agents import create_tool_calling_agent from langchain_core.language_models import BaseChatModel from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import Runnable @@ -55,8 +56,6 @@ def __init__( def __call__(self) -> Runnable: """Call the factory to create Runnable agent.""" - # Temporarily import here until new langchain is released with create_tools_agent - from langchain.agents import create_tool_calling_agent env = self.task.create_environment() From c746018336a1ef98143bc5d7c76551d274c9f5b1 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 18 Apr 2024 09:53:19 -0400 Subject: [PATCH 2/4] x --- langchain_benchmarks/tool_usage/__init__.py | 2 +- langchain_benchmarks/tool_usage/agents/tool_using_agent.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/langchain_benchmarks/tool_usage/__init__.py b/langchain_benchmarks/tool_usage/__init__.py index 83da61d1..a0ee4aae 100644 --- a/langchain_benchmarks/tool_usage/__init__.py +++ b/langchain_benchmarks/tool_usage/__init__.py @@ -2,8 +2,8 @@ from langchain_benchmarks.tool_usage.agents import ( CustomRunnableAgentFactory, StandardAgentFactory, + apply_agent_executor_adapter, ) -from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter from langchain_benchmarks.tool_usage.evaluators import get_eval_config # Please keep this list sorted! diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py index 6b9283ec..78672e4a 100644 --- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py +++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py @@ -4,8 +4,7 @@ """ from typing import Optional -from langchain.agents import AgentExecutor -from langchain.agents import create_tool_calling_agent +from langchain.agents import AgentExecutor, create_tool_calling_agent from langchain_core.language_models import BaseChatModel from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import Runnable From 71fd2018325dd5e8ae591021df08c1e13f32d48d Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 18 Apr 2024 09:54:01 -0400 Subject: [PATCH 3/4] x --- .../agents/experimental/__init__.py | 0 .../tool_usage/agents/experimental/agent.py | 133 ---------- .../tool_usage/agents/experimental/encoder.py | 240 ------------------ .../tool_usage/agents/experimental/factory.py | 93 ------- .../tool_usage/agents/experimental/parser.py | 122 --------- .../tool_usage/agents/experimental/prompts.py | 42 --- .../agents/experimental/tool_utils.py | 57 ----- tests/unit_tests/agents/__init__.py | 0 .../agents/encoding_and_decoding/__init__.py | 0 .../encoding_and_decoding/test_decoding.py | 54 ---- .../test_typescript_encoding.py | 25 -- .../test_xml_encoding.py | 90 ------- tests/unit_tests/agents/test_tool_utils.py | 59 ----- 13 files changed, 915 deletions(-) delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/__init__.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/agent.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/encoder.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/factory.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/parser.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/prompts.py delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py delete mode 100644 tests/unit_tests/agents/__init__.py delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/__init__.py delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_decoding.py delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py delete mode 100644 tests/unit_tests/agents/test_tool_utils.py diff --git a/langchain_benchmarks/tool_usage/agents/experimental/__init__.py b/langchain_benchmarks/tool_usage/agents/experimental/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/langchain_benchmarks/tool_usage/agents/experimental/agent.py b/langchain_benchmarks/tool_usage/agents/experimental/agent.py deleted file mode 100644 index 87ada85e..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/agent.py +++ /dev/null @@ -1,133 +0,0 @@ -from typing import List, Literal, Optional, Sequence, Tuple, Union - -from langchain.agents import AgentOutputParser -from langchain.prompts.chat import ChatPromptTemplate -from langchain.schema.runnable import Runnable -from langchain.tools import StructuredTool -from langchain_core.agents import AgentAction, AgentFinish -from langchain_core.language_models import BaseChatModel, BaseLanguageModel -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage -from langchain_core.prompts import MessagesPlaceholder -from typing_extensions import NotRequired, TypedDict - -from langchain_benchmarks import RateLimiter -from langchain_benchmarks.rate_limiting import with_rate_limit -from langchain_benchmarks.tool_usage.agents.experimental.encoder import ( - AstPrinter, - FunctionResult, - TypeScriptEncoder, - XMLEncoder, -) -from langchain_benchmarks.tool_usage.agents.experimental.prompts import ( - _AGENT_INSTRUCTIONS_BLOB_STYLE, -) -from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import ( - convert_tool_to_function_definition, -) - - -def format_steps_for_chat( - intermediate_steps: List[Tuple[AgentAction, str]], - ast_printer: AstPrinter, -) -> List[BaseMessage]: - """Format the steps.""" - messages = [] - for action, observation in intermediate_steps: - # Action messages contains the tool invocation request from the LLM - # Now add the result of the tool invocation. - - if action.tool == "_Exception": - messages.append( - AIMessage( - content=action.log, - ) - ) - messages.append( - # Tool input is the error message for the exception - HumanMessage(content=action.tool_input) - ) - else: - messages.extend(action.messages) - function_result: FunctionResult = { - "name": action.tool, - "error": None, - "result": observation, - } - messages.append( - HumanMessage( - content=ast_printer.visit_function_result(function_result), - ) - ) - - return messages - - -# PUBLIC API - - -class AgentInput(TypedDict): - """The input to the agent.""" - - input: str - """The input to the agent.""" - intermediate_steps: List[Tuple[AgentAction, str]] - """The intermediate steps taken by the agent.""" - examples: NotRequired[List[BaseMessage]] - """A list of messages that can be used to form example traces.""" - - -def create_agent( - model: Union[BaseChatModel, BaseLanguageModel], - tools: Sequence[StructuredTool], - parser: AgentOutputParser, - *, - ast_printer: Union[AstPrinter, Literal["xml"]] = "xml", - rate_limiter: Optional[RateLimiter] = None, -) -> Runnable[AgentInput, Union[AgentAction, AgentFinish]]: - """Create an agent for a chat model.""" - if isinstance(ast_printer, str): - if ast_printer == "xml": - ast_printer_ = XMLEncoder() - elif ast_printer == "typescript": - ast_printer_ = TypeScriptEncoder() - else: - raise ValueError(f"Unknown ast printer: {ast_printer}") - elif isinstance(ast_printer, AstPrinter): - ast_printer_ = ast_printer - else: - raise TypeError( - f"Expected AstPrinter or str, got {type(ast_printer)} for `ast_printer`" - ) - - function_definitions = [convert_tool_to_function_definition(tool) for tool in tools] - tool_description = ast_printer_.visit_function_definitions(function_definitions) - - template = ChatPromptTemplate.from_messages( - [ - ("system", _AGENT_INSTRUCTIONS_BLOB_STYLE), - MessagesPlaceholder("examples"), # Can use to add example traces - ("human", "{input}"), - MessagesPlaceholder("history"), - ] - ).partial(tool_description=tool_description) - - # For the time being, hard-coding the fact that we're using a tag. - model = model.bind(stop=[""]) - - if rate_limiter: - # Apply a rate limiter if it was provided - model = with_rate_limit(model, rate_limiter) - - agent = ( - { - "input": lambda x: x["input"], - "history": lambda x: format_steps_for_chat( - x["intermediate_steps"], ast_printer_ - ), - "examples": lambda x: x.get("examples", []), - } - | template - | model - | parser - ) - return agent diff --git a/langchain_benchmarks/tool_usage/agents/experimental/encoder.py b/langchain_benchmarks/tool_usage/agents/experimental/encoder.py deleted file mode 100644 index c6799609..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/encoder.py +++ /dev/null @@ -1,240 +0,0 @@ -"""Prototyping code for rendering function definitions, invocations, and results. - -Types are simplified for now to `str`. - -We should actually support something like pydantic or jsonschema for the types, so -we can expand them recursively for nested types. -""" -import abc -from typing import Any, List, Optional - -from typing_extensions import NotRequired, TypedDict - - -class Parameter(TypedDict): - """Representation for a parameter.""" - - name: str - type: str - description: str - - -class Arguments(TypedDict): - """Arguments are passed to a function during function invocation.""" - - name: Optional[str] - value: Any - - -class ReturnValue(TypedDict): - """Representation for a return value of a function call.""" - - type: str - description: NotRequired[str] - - -class FunctionDefinition(TypedDict): - """Representation for a function.""" - - name: str - description: str # Function description - parameters: List[Parameter] - return_value: ReturnValue - - -class FunctionInvocation(TypedDict): - """Representation for a function invocation.""" - - id: NotRequired[str] - name: str - arguments: List[Arguments] - - -class FunctionResult(TypedDict): - """Representation for a function result.""" - - id: NotRequired[str] - name: str - result: Optional[str] - error: Optional[str] - - -class Visitor(abc.ABC): - @abc.abstractmethod - def visit_function_definition(self, function_definition: FunctionDefinition) -> str: - """Render a function.""" - - @abc.abstractmethod - def visit_function_definitions( - self, function_definitions: List[FunctionDefinition] - ) -> str: - """Render a function.""" - - @abc.abstractmethod - def visit_function_invocation(self, function_invocation: FunctionInvocation) -> str: - """Render a function invocation.""" - - @abc.abstractmethod - def visit_function_result(self, function_result: FunctionResult) -> str: - """Render a function result.""" - - -class AstPrinter(Visitor): - """Print the AST.""" - - -class XMLEncoder(AstPrinter): - def visit_function_definition(self, function_definition: FunctionDefinition) -> str: - """Render a function.""" - parameters_lines = [] - - for parameter in function_definition["parameters"]: - parameters_lines.extend( - [ - "", - f"{parameter['name']}", - f"{parameter['type']}", - f"{parameter['description']}", - "", - ] - ) - lines = [ - "", - f"{function_definition['name']}", - "", - f"{function_definition['description']}", - "", - "", - *parameters_lines, - "", - "", - f"{function_definition['return_value']['type']}", - ] - if function_definition["return_value"].get("description"): - lines.append( - f"{function_definition['return_value']['description']}" - f"" - ) - - lines.extend(["", ""]) - return "\n".join(lines) - - def visit_function_definitions( - self, function_definitions: List[FunctionDefinition] - ) -> str: - """Render a function.""" - strs = [ - self.visit_function_definition(function_definition) - for function_definition in function_definitions - ] - return "\n" + "\n".join(strs) + "\n" - - def visit_function_invocation(self, invocation: FunctionInvocation) -> str: - """Render a function invocation.""" - arguments_as_strings = [ - "\n" - f"{argument['name']}\n" - f"{argument['value']}\n" - "\n" - for argument in invocation["arguments"] - ] - lines = [""] - - if invocation.get("id"): - lines.append(f"{invocation['id']}") - - lines.extend( - [ - f"{invocation['name']}\n" - "\n" - f"{''.join(arguments_as_strings)}" # Already includes trailing newline - "\n" - "" - ] - ) - return "\n".join(lines) - - def visit_function_result(self, function_result: FunctionResult) -> str: - """Render a function result.""" - lines = [ - "", - ] - - if function_result.get("id"): - lines.append(f"{function_result['id']}") - - lines.append(f"{function_result['name']}") - - if function_result["error"]: - lines.extend( - [ - f"{function_result['error']}", - ] - ) - else: - lines.append( - f"{function_result['result']}", - ) - - lines.append("") - - return "\n".join(lines) - - -class TypeScriptEncoder(AstPrinter): - def visit_function_definition(self, function_definition: FunctionDefinition) -> str: - """Render a function.""" - parameters_as_strings = [ - f"{parameter['name']}: {parameter['type']}" - for parameter in function_definition["parameters"] - ] - # Let's use JSdoc style comments - # First the function description - lines = [ - f"// {function_definition['description']}", - # Then the parameter descriptions - *[ - f"// @param {parameter['name']} {parameter['description']}" - for parameter in function_definition["parameters"] - ], - # Then the return value description - f"// @returns {function_definition['return_value']['description']}", - # Then the function definition - f"function {function_definition['name']}(" - f"{', '.join(parameters_as_strings)}): " - f"{function_definition['return_value']['type']};", - ] - - # finally join - function = "\n".join(lines) - return function - - def visit_function_definitions( - self, function_definitions: List[FunctionDefinition] - ) -> str: - """Render a function.""" - strs = [ - self.visit_function_definition(function_definition) - for function_definition in function_definitions - ] - return "\n\n".join(strs) - - def visit_function_invocation(self, invocation: FunctionInvocation) -> str: - """Render a function invocation.""" - arguments_as_strings = [ - f"{argument['name']}: {argument['value']}" - for argument in invocation["arguments"] - ] - lines = [f"{invocation['name']}(" f"{', '.join(arguments_as_strings)});"] - return "\n".join(lines) - - def visit_function_result(self, function_result: FunctionResult) -> str: - """Render a function result.""" - lines = [] - if function_result["error"]: - lines.append(f"ERROR: {function_result['error']}") - else: - lines.append(f"> {function_result['result']}") - if function_result.get("id"): - lines.append(f"// ID: {function_result['id']}") - return "\n".join(lines) diff --git a/langchain_benchmarks/tool_usage/agents/experimental/factory.py b/langchain_benchmarks/tool_usage/agents/experimental/factory.py deleted file mode 100644 index d158acd3..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/factory.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Factory for creating agents for the tool usage task.""" -from typing import Optional - -from langchain.agents import AgentExecutor -from langchain_core.runnables import Runnable, RunnableConfig - -from langchain_benchmarks import RateLimiter, model_registry -from langchain_benchmarks.schema import ToolUsageTask -from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter -from langchain_benchmarks.tool_usage.agents.experimental.agent import create_agent -from langchain_benchmarks.tool_usage.agents.experimental.parser import ( - GenericAgentParser, -) - - -class CustomAgentFactory: - """A factory for creating tool using agents. - - A factory for agents that do not leverage any special JSON mode for - function usage; instead all function invocation behavior is implemented solely - through prompt engineering and parsing. - """ - - def __init__( - self, - task: ToolUsageTask, - model: str, - *, - rate_limiter: Optional[RateLimiter] = None, - num_retries: int = 0, - ) -> None: - """Create an agent factory for the given tool usage task. - - Args: - task: The task to create an agent factory for - model: model name (check model_registry) - rate_limiter: The rate limiter to use if provided - num_retries: The number of times to retry the agent if it fails - """ - if model not in model_registry: - raise ValueError(f"Unknown model: {model}") - self.task = task - self.model = model - self.rate_limiter = rate_limiter - self.num_retries = num_retries - - def __call__(self) -> Runnable: - if isinstance(self.model, str): - registered_model = model_registry.get_model(self.model) - if registered_model is None: - raise ValueError(f"Unknown model: {self.model}") - model = registered_model.get_model(model_params={"temperature": 0}) - else: - model = self.model - - def _add_task_instructions( - input: dict, config: Optional[RunnableConfig] = None, **kwargs - ) -> dict: - """Add task instructions to the question.""" - if not isinstance(input, dict): - raise ValueError( - f"Expected input to be a dict with key `question`. " - f"Found {type(input)}." - ) - input = input.copy() - input["question"] = ( - f"{self.task.instructions}\nWrite down your answer, " - f"but do not explain it. Input: `{input['question']}`" - ) - return input - - env = self.task.create_environment() - - agent = create_agent( - model, - env.tools, - GenericAgentParser(wrapping_xml_tag="tool", require_closing_xml_tag=False), - rate_limiter=self.rate_limiter, - ) - if self.num_retries > 0: - agent = agent.with_retry( - stop_after_attempt=self.num_retries + 1, - ) - executor = AgentExecutor( - agent=agent, - tools=env.tools, - handle_parsing_errors=True, - return_intermediate_steps=True, - ) - - return _add_task_instructions | apply_agent_executor_adapter( - executor, state_reader=env.read_state - ) diff --git a/langchain_benchmarks/tool_usage/agents/experimental/parser.py b/langchain_benchmarks/tool_usage/agents/experimental/parser.py deleted file mode 100644 index 002ddf02..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/parser.py +++ /dev/null @@ -1,122 +0,0 @@ -import ast -import re -from typing import Dict, Optional, Union - -from langchain.agents import AgentOutputParser -from langchain.pydantic_v1 import BaseModel, Field -from langchain_core.agents import AgentAction, AgentActionMessageLog, AgentFinish -from langchain_core.exceptions import OutputParserException -from langchain_core.messages import AIMessage - - -class _ToolInvocationRequest(BaseModel): - """Light-weight pydantic model for validating the raw tool invocation request. - - The purpose of this model, is to make sure that whatever as parsed from - the raw llm output has `tool_name` and potential `arguments` fields, and - nothing else. - """ - - tool_name: str - # OK parameterless tools which do not take arguments - arguments: Optional[Dict] = Field(default_factory=dict) - - -class GenericAgentParser(AgentOutputParser): - """A generalized parser that makes it easier to parameterize different parsing.""" - - wrapping_xml_tag: str - """The tag that wraps the function invocation request. - - For example, if "tool", then the function invocation request should be wrapped - in .... - """ - require_closing_xml_tag: bool = False - """Whether we should require a closing tag for the wrapping_xml_tag. - - For example, if True, then the function invocation request should be wrapped - """ - - def parse(self, text: str) -> Union[AgentFinish, AgentAction]: - """Parse the output of the agent.""" - open_tag = f"<{self.wrapping_xml_tag}>" - close_tag = f"" - if open_tag in text: - # This is a hack to make sure that is always present - # in the output if . may be a stop sequence for the - # language model, so depending on implementation - # the stop sequence may be cut off. - # There might be a better way to do this, but this works and - # is simple. - if not self.require_closing_xml_tag: - text += close_tag - - pattern = rf"{open_tag}(?P.*?){close_tag}" - match = re.search(pattern, text, re.DOTALL) - if match: - content = match.group("invocation").strip() - return parse_invocation(content, self.wrapping_xml_tag) - - return AgentFinish( - log=text, - return_values={ - "output": text, - }, - ) - - -def parse_invocation(text: str, tag: str) -> AgentAction: - """Parse the content of the function invocation. - - Args: - text: The text to parse. - tag: The tag that wraps the function invocation request. - - Returns: - An AgentAction that corresponds to the function invocation. - - Raises: - OutputParserException: If the parsing fails. - - This exception is meant to be caught by the agent executor and - handled appropriately to provide feedback to the LLM. - """ - ai_content = f"<{tag}>{text}\n" - - try: - result = ast.literal_eval(text) - except BaseException as e: - # Convert this to something controllable by the user. - err_msg = ( - f"ERROR: Please use the format " - f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}\n' - ) - - raise OutputParserException( - error=e, - llm_output=ai_content, - observation=err_msg, - send_to_llm=True, - ) - - try: - request = _ToolInvocationRequest.validate(result) - except Exception as e: # Using broad exception since it's not just ValidationError - # Can also raise DictError if result is not a dict. - err_msg = ( - f"ERROR: Please use the format " - f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}\n' - ) - raise OutputParserException( - error=e, - llm_output=ai_content, - send_to_llm=True, - observation=err_msg, - ) - - return AgentActionMessageLog( - message_log=[AIMessage(content=ai_content)], - tool=request.tool_name, - tool_input=request.arguments, - log=f"\nInvoking {request.tool_name}: {request.arguments}\n\t", - ) diff --git a/langchain_benchmarks/tool_usage/agents/experimental/prompts.py b/langchain_benchmarks/tool_usage/agents/experimental/prompts.py deleted file mode 100644 index 9abc051e..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/prompts.py +++ /dev/null @@ -1,42 +0,0 @@ -AGENT_INSTRUCTIONS_XML_FORMAT = """\ -In this environment you have access to a set of tools you can use to answer the user's question. - -You may call them like this: - - -$TOOL_NAME - -<$PARAMETER_NAME>$PARAMETER_VALUE -... - - - - -Here are the tools available: - -{tool_description} -""" # noqa: E501 - -_AGENT_INSTRUCTIONS_BLOB_STYLE = """\ -In this environment you have access to a set of tools you can use to answer the user's question. - -Here are the tools available: - -{tool_description} - -You may call one tool at a time using a format that includes and tag. - -Inside the tag the content is a python dictionary that uses python literals (e.g., numbers, strings, lists, dictionaries, etc.) to specify the tool invocation. - -It must match the schema of the function as described in the tool description. -"arguments" is a dictionary of the arguments to the function. - - -{{ - "tool_name": $TOOL_NAME, - "arguments": $ARGUMENTS -}} - - -If you do not know the answer use more tools. You can only take a single action at a time.\ -""" # noqa: E501 diff --git a/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py b/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py deleted file mode 100644 index 04fed82b..00000000 --- a/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Utilities to extract information from langchain tools for use in prompts.""" -import inspect -from textwrap import dedent -from typing import List - -from langchain.tools.base import StructuredTool - -from langchain_benchmarks.tool_usage.agents.experimental.encoder import ( - FunctionDefinition, - Parameter, -) - -# PUBLIC API - - -def get_parameters_from_tool(tool: StructuredTool) -> List[Parameter]: - """Convert a langchain tool to a tool user tool.""" - schema = tool.args_schema.schema() - - properties = schema["properties"] - parameters = [] - # Is this needed or is string OK? - type_adapter = { - "string": "str", # str or string? - "integer": "int", - "number": "float", - "boolean": "bool", - } - for key, value in properties.items(): - parameters.append( - { - "name": key, - "type": type_adapter.get(value["type"], value["type"]), - "description": value.get("description", ""), - } - ) - - return parameters - - -# -def convert_tool_to_function_definition(tool: StructuredTool) -> FunctionDefinition: - """Convert a langchain tool to a tool user tool.""" - # Here we re-inspect the underlying function to get the doc-string - # since StructuredTool modifies it, but we want the raw one for maximum - # flexibility. - description = inspect.getdoc(tool.func) - - parameters = get_parameters_from_tool(tool) - return { - "name": tool.name, - "description": dedent(description), - "parameters": parameters, - "return_value": { - "type": "Any", - }, - } diff --git a/tests/unit_tests/agents/__init__.py b/tests/unit_tests/agents/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit_tests/agents/encoding_and_decoding/__init__.py b/tests/unit_tests/agents/encoding_and_decoding/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py deleted file mode 100644 index 5ed5da7a..00000000 --- a/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py +++ /dev/null @@ -1,54 +0,0 @@ -import pytest -from langchain_core.agents import AgentActionMessageLog, AgentFinish -from langchain_core.exceptions import OutputParserException -from langchain_core.messages import AIMessage - -from langchain_benchmarks.tool_usage.agents.experimental.parser import ( - GenericAgentParser, -) - - -def test_parser() -> None: - """Test parser.""" - parser = GenericAgentParser(require_closing_tag=False, wrapping_xml_tag="tool") - - # If tag not found then it's an agent finish - assert isinstance(parser.invoke("goodbye"), AgentFinish) - - with pytest.raises(OutputParserException): - # Invocation content is missing tool name and arguments - parser.invoke("'hello'") - - with pytest.raises(OutputParserException): - parser.invoke("hello") - - # Full invocation - text = ( - '{\n "tool_name": "type_letter",\n ' - '"arguments": {\n ' - '"letter": "h"\n }\n}\n' - ) - - assert parser.invoke(text) == AgentActionMessageLog( - tool="type_letter", - tool_input={"letter": "h"}, - log="\nInvoking type_letter: {'letter': 'h'}\n\t", - message_log=[AIMessage(content=text)], - ) - - # Test more cases - parsed = parser.invoke('{"tool_name": "hello"}') - assert parsed.tool == "hello" - # Assumes that it's a structured tool by default! - assert parsed.tool_input == {} - - with pytest.raises(OutputParserException): - # Arguments need to be a dict - parser.invoke('{"tool_name": "hello", "arguments": [1, 2]}') - - parsed = parser.invoke( - '{"tool_name": "hello", "arguments": {"a": "b"}}' - ) - assert parsed.tool == "hello" - # Assumes that it's a structured tool by default! - assert parsed.tool_input == {"a": "b"} diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py deleted file mode 100644 index 39175919..00000000 --- a/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Test typescript encoding.""" -from langchain_benchmarks.tool_usage.agents.experimental.encoder import ( - FunctionDefinition, - TypeScriptEncoder, -) - - -def test_function_definition() -> None: - """Test encoding a function definition.""" - function_definition = FunctionDefinition( - name="test_function", - description="A test function", - parameters=[ - {"name": "test_parameter", "type": "str", "description": "A test parameter"} - ], - return_value={"type": "str", "description": "A test return value"}, - ) - encoder = TypeScriptEncoder() - xml = encoder.visit_function_definition(function_definition) - assert xml == ( - "// A test function\n" - "// @param test_parameter A test parameter\n" - "// @returns A test return value\n" - "function test_function(test_parameter: str): str;" - ) diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py deleted file mode 100644 index d41b63be..00000000 --- a/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Test XML encoding and decoding of function definitions, invocation, and results.""" -from langchain_benchmarks.tool_usage.agents.experimental.encoder import ( - FunctionDefinition, - FunctionInvocation, - FunctionResult, - XMLEncoder, -) - - -def test_function_definition_encoding() -> None: - """Test encoding a function definition.""" - function_definition = FunctionDefinition( - name="test_function", - description="A test function", - parameters=[ - {"name": "test_parameter", "type": "str", "description": "A test parameter"} - ], - return_value={"type": "str", "description": "A test return value"}, - ) - encoder = XMLEncoder() - xml = encoder.visit_function_definition(function_definition) - assert xml == ( - "\n" - "test_function\n" - "\n" - "A test function\n" - "\n" - "\n" - "\n" - "test_parameter\n" - "str\n" - "A test parameter\n" - "\n" - "\n" - "\n" - "str\n" - "A test return value\n" - "\n" - "" - ) - - -def test_function_result_encoding() -> None: - """Test encoding a function result.""" - encoder = XMLEncoder() - function_result = FunctionResult( - name="test_function", - result="test_result", - error=None, - ) - xml = encoder.visit_function_result(function_result) - assert xml == ( - "\n" - "test_function\n" - "test_result\n" - "" - ) - - function_result = FunctionResult( - name="test_function", - error="error", - ) - xml = encoder.visit_function_result(function_result) - assert xml == ( - "\n" - "test_function\n" - "error\n" - "" - ) - - -def test_function_invocation() -> None: - """Test function invocation.""" - function_invocation = FunctionInvocation( - name="test_function", - arguments=[{"name": "test_argument", "value": "test_value"}], - ) - encoder = XMLEncoder() - xml = encoder.visit_function_invocation(function_invocation) - assert xml == ( - "\n" - "test_function\n" - "\n" - "\n" - "test_argument\n" - "test_value\n" - "\n" - "\n" - "" - ) diff --git a/tests/unit_tests/agents/test_tool_utils.py b/tests/unit_tests/agents/test_tool_utils.py deleted file mode 100644 index 9e4bb95f..00000000 --- a/tests/unit_tests/agents/test_tool_utils.py +++ /dev/null @@ -1,59 +0,0 @@ -import pytest -from langchain.tools import tool - -from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import ( - convert_tool_to_function_definition, -) - - -@tool -def get_hello() -> str: - """Get hello.""" - return "hello" - - -@tool -def repeat(x: str) -> str: - """Repeat x. - - Args: - x: The string to repeat. - - Returns: - The repeated string. - """ - return x - - -def test_parameterless_function() -> None: - """Test foo.""" - function_definition = convert_tool_to_function_definition(get_hello) - assert function_definition == { - "name": "get_hello", - "description": "Get hello.", - "parameters": [], - "return_value": { - "type": "Any", - }, - } - - -@pytest.mark.skip("Need to fix handling of leading whitespace") -def test_function_with_parameters() -> None: - import textwrap - - doc = textwrap.dedent(repeat.func.__doc__) - assert convert_tool_to_function_definition(repeat) == { - "name": "repeat", - "description": doc, - "parameters": [ - { - "name": "x", - "type": "str", - "description": "", # Need to fix this - } - ], - "return_value": { - "type": "Any", - }, - } From 1dad65a375578dfee85d3fad9ce59aae0cf59bfe Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 18 Apr 2024 11:13:57 -0400 Subject: [PATCH 4/4] x --- langchain_benchmarks/tool_usage/agents/base.py | 11 +++++++++++ .../tool_usage/agents/runnable_agent.py | 3 ++- .../tool_usage/agents/tool_using_agent.py | 3 ++- tests/unit_tests/tool_usage/test_public_api.py | 8 +++++++- 4 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 langchain_benchmarks/tool_usage/agents/base.py diff --git a/langchain_benchmarks/tool_usage/agents/base.py b/langchain_benchmarks/tool_usage/agents/base.py new file mode 100644 index 00000000..aafdba8d --- /dev/null +++ b/langchain_benchmarks/tool_usage/agents/base.py @@ -0,0 +1,11 @@ +import abc + +from langchain_core.runnables import Runnable + + +class AgentFactory(abc.ABC): + """Abstract class for agent factory""" + + @abc.abstractmethod + def __call__(self) -> Runnable: + """Create a new agent""" diff --git a/langchain_benchmarks/tool_usage/agents/runnable_agent.py b/langchain_benchmarks/tool_usage/agents/runnable_agent.py index 8b130f55..b6f76b7b 100644 --- a/langchain_benchmarks/tool_usage/agents/runnable_agent.py +++ b/langchain_benchmarks/tool_usage/agents/runnable_agent.py @@ -10,9 +10,10 @@ from langchain_benchmarks.schema import ToolUsageTask from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter +from langchain_benchmarks.tool_usage.agents.base import AgentFactory -class CustomRunnableAgentFactory: +class CustomRunnableAgentFactory(AgentFactory): """A factory for creating tool using agents. A factory for agents that do not leverage any special JSON mode for diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py index 78672e4a..93653255 100644 --- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py +++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py @@ -12,9 +12,10 @@ from langchain_benchmarks.rate_limiting import RateLimiter, with_rate_limit from langchain_benchmarks.schema import ToolUsageTask from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter +from langchain_benchmarks.tool_usage.agents.base import AgentFactory -class StandardAgentFactory: +class StandardAgentFactory(AgentFactory): """A standard agent factory. Use this factory with chat models that support the standard LangChain tool diff --git a/tests/unit_tests/tool_usage/test_public_api.py b/tests/unit_tests/tool_usage/test_public_api.py index 1f422366..00110722 100644 --- a/tests/unit_tests/tool_usage/test_public_api.py +++ b/tests/unit_tests/tool_usage/test_public_api.py @@ -6,5 +6,11 @@ def test_public_api() -> None: # This test will also fail if __all__ is not sorted. # Please keep it sorted! assert __all__ == sorted( - ["apply_agent_executor_adapter", "get_eval_config"], key=str.lower + [ + "apply_agent_executor_adapter", + "get_eval_config", + "CustomRunnableAgentFactory", + "StandardAgentFactory", + ], + key=str.lower, )