From 4167f20c98ded73fb944e7fcacc656bd83ea9d0a Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 09:53:11 -0400
Subject: [PATCH 1/4] x

---
 langchain_benchmarks/tool_usage/__init__.py   |   6 +
 .../tool_usage/agents/__init__.py             |  14 -
 .../tool_usage/agents/anthropic_tool_user.py  | 271 ------------------
 .../tool_usage/agents/openai_assistant.py     |  77 -----
 .../tool_usage/agents/openai_functions.py     | 166 -----------
 .../tool_usage/agents/runnable_agent.py       |   4 +-
 .../tool_usage/agents/tool_using_agent.py     |   3 +-
 7 files changed, 10 insertions(+), 531 deletions(-)
 delete mode 100644 langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/openai_assistant.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/openai_functions.py

diff --git a/langchain_benchmarks/tool_usage/__init__.py b/langchain_benchmarks/tool_usage/__init__.py
index aa22c995..83da61d1 100644
--- a/langchain_benchmarks/tool_usage/__init__.py
+++ b/langchain_benchmarks/tool_usage/__init__.py
@@ -1,9 +1,15 @@
 """Package for helping to evaluate agent runs."""
+from langchain_benchmarks.tool_usage.agents import (
+    CustomRunnableAgentFactory,
+    StandardAgentFactory,
+)
 from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter
 from langchain_benchmarks.tool_usage.evaluators import get_eval_config
 
 # Please keep this list sorted!
 __all__ = [
     "apply_agent_executor_adapter",
+    "CustomRunnableAgentFactory",
     "get_eval_config",
+    "StandardAgentFactory",
 ]
diff --git a/langchain_benchmarks/tool_usage/agents/__init__.py b/langchain_benchmarks/tool_usage/agents/__init__.py
index c59133fe..7692514c 100644
--- a/langchain_benchmarks/tool_usage/agents/__init__.py
+++ b/langchain_benchmarks/tool_usage/agents/__init__.py
@@ -1,25 +1,11 @@
 from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
-from langchain_benchmarks.tool_usage.agents.anthropic_tool_user import (
-    AnthropicToolUserFactory,
-)
-from langchain_benchmarks.tool_usage.agents.experimental.factory import (
-    CustomAgentFactory,
-)
-from langchain_benchmarks.tool_usage.agents.openai_assistant import (
-    OpenAIAssistantFactory,
-)
-from langchain_benchmarks.tool_usage.agents.openai_functions import OpenAIAgentFactory
 from langchain_benchmarks.tool_usage.agents.runnable_agent import (
     CustomRunnableAgentFactory,
 )
 from langchain_benchmarks.tool_usage.agents.tool_using_agent import StandardAgentFactory
 
 __all__ = [
-    "OpenAIAgentFactory",
-    "OpenAIAssistantFactory",
     "apply_agent_executor_adapter",
-    "CustomAgentFactory",
-    "AnthropicToolUserFactory",
     "CustomRunnableAgentFactory",
     "StandardAgentFactory",
 ]
diff --git a/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py b/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py
deleted file mode 100644
index 53773a53..00000000
--- a/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py
+++ /dev/null
@@ -1,271 +0,0 @@
-"""Wrapper around the anthropic tool user SDK.
-
-The anthropic tool user SDK is an alpha release so this code will likely be
-changed or deleted in the future. It's here simply to make it easier to benchmark
-the performance of the existing tool user SDK, to compare it with the performance
-of other implementations.
-"""
-
-from importlib.util import find_spec
-from typing import Any, Dict, List, Optional, Sequence
-
-from langchain.tools import StructuredTool
-from langchain_core.callbacks.manager import trace_as_chain_group
-from langchain_core.runnables import Runnable, RunnableConfig, RunnableLambda
-
-from langchain_benchmarks import rate_limiting
-from langchain_benchmarks.schema import ToolUsageTask
-from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
-
-
-def convert_langchain_tool_to_tool_user_tool(lc_tool: StructuredTool) -> Any:
-    """Convert a langchain tool to a tool user tool."""
-    from tool_use_package.tools.base_tool import BaseTool
-
-    class DynamicTool(BaseTool):
-        def use_tool(self, **kwargs):
-            return lc_tool(kwargs)
-
-    schema = lc_tool.args_schema.schema()
-
-    properties = schema["properties"]
-    parameters = []
-    # Is this needed or is string OK?
-    type_adapter = {
-        "string": "str",  # str or string?
-        "integer": "int",
-        "number": "float",
-        "boolean": "bool",
-    }
-    for key, value in properties.items():
-        parameters.append(
-            {
-                "name": key,
-                "type": type_adapter.get(value["type"], value["type"]),
-                "description": value.get("description", ""),
-            }
-        )
-
-    return DynamicTool(lc_tool.name, lc_tool.description, parameters)
-
-
-def _handle_tool_inputs(
-    tool_inputs: List[Dict[str, Any]],
-    tools: Sequence[StructuredTool],
-    config: Optional[RunnableConfig] = None,
-) -> Dict[str, Any]:
-    """Handle tool inputs."""
-    tool_by_name = {tool.name: tool for tool in tools}
-    tool_error: Optional[str] = None
-    tool_outputs = []
-    for tool_input in tool_inputs:
-        tool_name = tool_input["tool_name"]
-        tool_arguments = tool_input["tool_arguments"]
-        tool = tool_by_name[tool_name]
-        try:
-            tool_result = tool.invoke(tool_arguments, config=config)
-        except Exception as e:  # Break on first error
-            tool_error = str(e)
-            tool_outputs = None
-            break
-        tool_outputs.append(
-            {
-                "tool_name": tool_name,
-                "tool_result": tool_result,
-            }
-        )
-    return {
-        "role": "tool_outputs",
-        "tool_outputs": tool_outputs,
-        "tool_error": tool_error,
-    }
-
-
-def run_anthropic_agent_simple(
-    tools: Sequence[StructuredTool],
-    user_message: str,
-    *,
-    max_iterations: int = 30,
-    config: Optional[RunnableConfig] = None,
-    **kwargs,
-) -> List[dict]:
-    """Make an anthropic agent."""
-    from tool_use_package.tool_user import ToolUser
-
-    verbose = kwargs.pop("verbose", False)
-
-    tool_user = ToolUser(
-        [convert_langchain_tool_to_tool_user_tool(tool) for tool in tools], **kwargs
-    )
-    messages = [
-        {
-            "role": "human",
-            "content": user_message,
-            "tool_error": None,
-            "tool_outputs": [],
-            "tool_inputs": [],
-        }
-    ]
-    with trace_as_chain_group(
-        "Anthropic Agent Run",
-        inputs={"user_message": user_message},
-        callback_manager=config.get("callbacks", None) if config else None,
-    ) as group_manager:
-        for num_iteration in range(max_iterations):
-            with trace_as_chain_group(
-                f"Anthropic Agent Iteration {num_iteration}",
-                inputs={"messages": messages},
-                callback_manager=group_manager.parent_run_manager.get_child(),
-            ) as iteration_manager:
-                last_message = tool_user.use_tools(
-                    messages, execution_mode="manual", verbose=verbose
-                )
-                new_messages = [last_message]
-
-                if last_message["role"] == "tool_inputs":
-                    tool_inputs = last_message["tool_inputs"]
-                    new_message = _handle_tool_inputs(
-                        tool_inputs,
-                        tools,
-                        config={
-                            "callbacks": iteration_manager.parent_run_manager.get_child(),
-                        },
-                    )
-                    new_messages.append(new_message)
-
-                iteration_manager.on_chain_end(outputs=new_messages)
-                messages.extend(new_messages)
-
-                # Finally break if the last message is from the assistant
-                if last_message["role"] == "assistant":
-                    break
-        else:
-            raise ValueError("Max iterations reached")
-        group_manager.on_chain_end(outputs=messages)
-    return messages
-
-
-def convert_messages_to_finalized_output(
-    messages: List[Dict[str, Any]],
-) -> Dict[str, Any]:
-    """Convert the history of messages into the expected output for eval.
-
-    This matches the agent executor output which has the following structure:
-
-    {
-        "output": "The output of the agent",
-        "intermediate_steps": [
-            (
-                AgentAction(
-                    tool="add_x_y",
-                    tool_input={"x": 2.0, "y": 5.0},
-                    log="Invoking tool `add_x_y` with `{'x': 2.0, 'y': 5.0}`",
-                ),
-                9.0,
-            )
-        ],
-        "state": Any, # Optional key for tasks that involve manipulation of an env.
-    }
-    """
-    if not messages:
-        raise ValueError("Expected at least one message")
-
-    last_message = messages[-1]
-
-    if last_message["role"] != "assistant":
-        raise ValueError(
-            f"Expected the last message to be from the assistant. "
-            f"Instead got {last_message}."
-        )
-
-    actual_steps = []
-
-    for message in messages:
-        if "role" not in message:
-            raise ValueError(f"Expected role in message {message}")
-        role = message["role"]
-
-        if role == "tool_inputs":
-            # Get the name of the tool used
-            for tool_input in message["tool_inputs"]:
-                actual_steps.append(tool_input["tool_name"])
-
-    return {
-        "output": last_message["content"],
-        "actual_steps": actual_steps,
-    }
-
-
-def create_agent(tools: Sequence[StructuredTool]) -> RunnableLambda:
-    """Create an agent."""
-
-    def run_agent(
-        input: dict, config: Optional[RunnableConfig] = None, **kwargs
-    ) -> dict:
-        """Run the agent."""
-        messages = run_anthropic_agent_simple(
-            tools, input["input"], config=config, **kwargs
-        )
-        return convert_messages_to_finalized_output(messages)
-
-    return RunnableLambda(run_agent)
-
-
-class AnthropicToolUserFactory:
-    def __init__(
-        self,
-        task: ToolUsageTask,
-        *,
-        rate_limiter: Optional[rate_limiting.RateLimiter] = None,
-    ) -> None:
-        """Create an OpenAI agent factory for the given task.
-
-
-        Args:
-            task: The task to create an agent factory for.
-            rate_limiter: The rate limiter to use
-        """
-        self.task = task
-        self.rate_limiter = rate_limiter
-        if not find_spec("tool_use_package"):
-            raise ImportError(
-                'Could not import "tool_use_package". Please '
-                "follow instructions here to install "
-                "https://github.com/anthropics/anthropic-tools/tree/main"
-            )
-
-    def __call__(self, **kwargs: Any) -> Runnable:
-        env = self.task.create_environment()
-
-        def _add_task_instructions(
-            input: dict, config: Optional[RunnableConfig] = None, **kwargs
-        ) -> dict:
-            """Add task instructions to the question."""
-            if not isinstance(input, dict) or "question" not in input:
-                raise ValueError(
-                    f"Expected input to be a dict with key `question`. "
-                    f"Found {type(input)}."
-                )
-
-            input = input.copy()
-            input["question"] = (
-                f"{self.task.instructions}\nWrite down your answer, "
-                f"but do not explain it. Input: `{input['question']}`"
-            )
-            return input
-
-        agent = create_agent(env.tools)  # type: ignore
-        # Returns `state` in the output if the environment has a state reader
-        # makes sure that `output` is always in the output
-
-        if kwargs:
-            agent = agent.bind(**kwargs)
-
-        runnable = _add_task_instructions | apply_agent_executor_adapter(
-            agent, state_reader=env.read_state
-        )
-
-        if self.rate_limiter:  # Add a rate limiter
-            runnable = rate_limiting.with_rate_limit(runnable, self.rate_limiter)
-
-        return runnable
diff --git a/langchain_benchmarks/tool_usage/agents/openai_assistant.py b/langchain_benchmarks/tool_usage/agents/openai_assistant.py
deleted file mode 100644
index 239846c6..00000000
--- a/langchain_benchmarks/tool_usage/agents/openai_assistant.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Code for creating an assistant factory for evaluating tool usage tasks.
-
-See: https://platform.openai.com/docs/assistants/how-it-works/creating-assistants
-"""
-from typing import Optional
-
-from langchain.agents import AgentExecutor
-from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable
-from langchain.schema.runnable import Runnable
-
-from langchain_benchmarks import rate_limiting
-from langchain_benchmarks.schema import ToolUsageTask
-from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
-
-
-class OpenAIAssistantFactory:
-    def __init__(
-        self,
-        task: ToolUsageTask,
-        *,
-        model: str,
-        rate_limiter: Optional[rate_limiting.RateLimiter] = None,
-        num_retries: int = 0,
-    ) -> None:
-        """Create an OpenAI agent factory for the given task.
-
-        Args:
-            task: The task to create an agent factory for.
-            model: The model to use -- this must be an open AI model.
-            rate_limiter: The rate limiter to use
-            num_retries: The number of times to retry the assistant if it fails
-        """
-        if not isinstance(model, str):
-            raise ValueError(f"Expected str for model, got {type(model)}")
-        self.task = task
-        tools = task.create_environment().tools
-        # Stateless, so we only need to create it once
-        self.agent = OpenAIAssistantRunnable.create_assistant(
-            name=f"{task.name} assistant",
-            instructions=self.task.instructions,
-            tools=tools,
-            model=model,
-            as_agent=True,
-        )
-        self.rate_limiter = rate_limiter
-        self.num_retries = num_retries
-
-    def __call__(self) -> Runnable:
-        env = self.task.create_environment()
-
-        agent = self.agent
-        if self.rate_limiter is not None:
-            # Rate limited model
-            agent = rate_limiting.with_rate_limit(agent, self.rate_limiter)
-
-        def _map_key(x: dict):
-            # Assistant expects the 'content' key explicitly
-            return {
-                "content": x["input"],
-                **{k: v for k, v in x.items() if k != "input"},
-            }
-
-        agent = _map_key | self.agent
-        if self.num_retries > 0:
-            agent = agent.with_retry(
-                stop_after_attempt=self.num_retries + 1,
-            )
-        runnable = AgentExecutor(
-            agent=agent,
-            tools=env.tools,
-            handle_parsing_errors=True,
-            return_intermediate_steps=True,
-        )
-
-        # Returns `state` in the output if the environment has a state reader
-        # makes sure that `output` is always in the output
-        return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
diff --git a/langchain_benchmarks/tool_usage/agents/openai_functions.py b/langchain_benchmarks/tool_usage/agents/openai_functions.py
deleted file mode 100644
index 8537dfb0..00000000
--- a/langchain_benchmarks/tool_usage/agents/openai_functions.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""Code for creating an agent factory for evaluating tool usage tasks."""
-from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
-
-from langchain.agents import AgentExecutor
-from langchain.agents.format_scratchpad.openai_tools import (
-    format_to_openai_tool_messages,
-)
-from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
-from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain.schema.runnable import Runnable
-from langchain.tools.render import format_tool_to_openai_tool
-from langchain_core.language_models import BaseChatModel, BaseLanguageModel
-from langchain_core.language_models.base import LanguageModelInput
-from langchain_core.messages import BaseMessage
-from langchain_core.pydantic_v1 import BaseModel
-
-from langchain_benchmarks import model_registry, rate_limiting
-from langchain_benchmarks.model_registration import RegisteredModel
-from langchain_benchmarks.schema import ToolUsageTask
-from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
-
-# PUBLIC API
-
-
-def _bind_tools(
-    llm: BaseChatModel,
-    tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
-    tool_choice: Optional[str] = None,
-    json_mode: bool = False,
-    **kwargs: Any,
-) -> Runnable[LanguageModelInput, BaseMessage]:
-    """Bind tools (and other objects) to this chat model.
-
-    Args:
-        tools: A list of tool definitions to bind to this chat model.
-            Can be  a dictionary, pydantic model, or callable. Pydantic
-            models and callables will be automatically converted to
-            their schema dictionary representation.
-        tool_choice: Which tool to require the model to call.
-            Must be the name of the single provided tool or
-            "auto" to automatically determine which tool to call
-            (if any).
-        json_mode: Whether to set JSON mode for the tool call.
-            This guarantees the model will respond in valid JSON
-            (unless truncated).
-        kwargs: Any additional parameters to pass to the
-            :class:`~langchain.runnable.Runnable` constructor.
-
-    """
-    formatted_tools: List[Dict[str, Union[str, dict]]] = [
-        format_tool_to_openai_tool(tool) for tool in tools
-    ]
-    if tool_choice is not None:
-        if not formatted_tools:
-            raise ValueError(
-                "When specifying `tool_choice`, you must provide at least one " "tool."
-            )
-        tool_names = [tool["function"]["name"] for tool in formatted_tools]
-        if not any(tool_name == tool_choice for tool_name in tool_names):
-            raise ValueError(
-                f"Tool choice {tool_choice} was specified, but the only "
-                f"provided tools were {tool_names}."
-            )
-        tool_choice_ = {"type": "function", "function": {"name": tool_choice}}
-        kwargs = {**kwargs, "tool_choice": tool_choice_}
-    if json_mode:
-        kwargs = {**kwargs, "response_format": {"type": "json_object"}}
-    return llm.bind(
-        tools=formatted_tools,
-        **kwargs,
-    )
-
-
-class OpenAIAgentFactory:
-    def __init__(
-        self,
-        task: ToolUsageTask,
-        *,
-        model: Union[
-            str, RegisteredModel, BaseLanguageModel, BaseChatModel
-        ] = "gpt-3.5-turbo-16k",
-        rate_limiter: Optional[rate_limiting.RateLimiter] = None,
-        num_retries: int = 0,
-    ) -> None:
-        """Create an OpenAI agent factory for the given task.
-
-        Args:
-            task: The task to create an agent factory for.
-            model: The model to use -- this must be an open AI model.
-            rate_limiter: The rate limiter to use
-        """
-        self.task = task
-        self.model = model
-        self.rate_limiter = rate_limiter
-        self.num_retries = num_retries
-
-    def _create_model(self) -> Union[BaseChatModel, BaseLanguageModel]:
-        if isinstance(self.model, RegisteredModel):
-            return self.model.get_model(
-                model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
-            )
-        elif isinstance(self.model, (BaseChatModel, BaseLanguageModel)):
-            return self.model
-        elif isinstance(self.model, str):
-            if self.model in model_registry:
-                registered_model = model_registry.get_model(self.model)
-                model_instance = registered_model.get_model(
-                    model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
-                )
-                return model_instance
-            else:
-                raise ValueError(f"Unknown model: {self.model}")
-        else:
-            raise TypeError(f"Expected str or RegisteredModel, got {type(self.model)}")
-
-    def create(self) -> Runnable:
-        """Agent Executor"""
-        # For backwards compatibility
-        return self()
-
-    def __call__(self) -> Runnable:
-        model = self._create_model()
-        env = self.task.create_environment()
-
-        model = _bind_tools(model, env.tools)
-
-        if self.rate_limiter is not None:
-            # Rate limited model
-            model = rate_limiting.with_rate_limit(model, self.rate_limiter)
-
-        prompt = ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system",
-                    self.task.instructions,
-                ),
-                ("user", "{input}"),
-                MessagesPlaceholder(variable_name="agent_scratchpad"),
-            ]
-        )
-
-        runnable_agent = (
-            {
-                "input": lambda x: x["input"],
-                "agent_scratchpad": lambda x: format_to_openai_tool_messages(
-                    x["intermediate_steps"]
-                ),
-            }
-            | prompt
-            | model
-            | OpenAIToolsAgentOutputParser()
-        )
-        if self.num_retries > 0:
-            runnable_agent = runnable_agent.with_retry(
-                stop_after_attempt=self.num_retries + 1,
-            )
-        runnable = AgentExecutor(
-            agent=runnable_agent,
-            tools=env.tools,
-            handle_parsing_errors=True,
-            return_intermediate_steps=True,
-        )
-
-        # Returns `state` in the output if the environment has a state reader
-        # makes sure that `output` is always in the output
-        return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
diff --git a/langchain_benchmarks/tool_usage/agents/runnable_agent.py b/langchain_benchmarks/tool_usage/agents/runnable_agent.py
index e0eadc28..8b130f55 100644
--- a/langchain_benchmarks/tool_usage/agents/runnable_agent.py
+++ b/langchain_benchmarks/tool_usage/agents/runnable_agent.py
@@ -46,4 +46,6 @@ def __call__(self) -> Runnable:
             return_intermediate_steps=True,
         )
 
-        return apply_agent_executor_adapter(executor, state_reader=env.read_state)
+        return apply_agent_executor_adapter(
+            executor, state_reader=env.read_state
+        ).with_config({"run_name": "Agent", "metadata": {"task": self.task.name}})
diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
index 767f8aba..6b9283ec 100644
--- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
+++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
@@ -5,6 +5,7 @@
 from typing import Optional
 
 from langchain.agents import AgentExecutor
+from langchain.agents import create_tool_calling_agent
 from langchain_core.language_models import BaseChatModel
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import Runnable
@@ -55,8 +56,6 @@ def __init__(
 
     def __call__(self) -> Runnable:
         """Call the factory to create Runnable agent."""
-        # Temporarily import here until new langchain is released with create_tools_agent
-        from langchain.agents import create_tool_calling_agent
 
         env = self.task.create_environment()
 

From c746018336a1ef98143bc5d7c76551d274c9f5b1 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 09:53:19 -0400
Subject: [PATCH 2/4] x

---
 langchain_benchmarks/tool_usage/__init__.py                | 2 +-
 langchain_benchmarks/tool_usage/agents/tool_using_agent.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/langchain_benchmarks/tool_usage/__init__.py b/langchain_benchmarks/tool_usage/__init__.py
index 83da61d1..a0ee4aae 100644
--- a/langchain_benchmarks/tool_usage/__init__.py
+++ b/langchain_benchmarks/tool_usage/__init__.py
@@ -2,8 +2,8 @@
 from langchain_benchmarks.tool_usage.agents import (
     CustomRunnableAgentFactory,
     StandardAgentFactory,
+    apply_agent_executor_adapter,
 )
-from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter
 from langchain_benchmarks.tool_usage.evaluators import get_eval_config
 
 # Please keep this list sorted!
diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
index 6b9283ec..78672e4a 100644
--- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
+++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
@@ -4,8 +4,7 @@
 """
 from typing import Optional
 
-from langchain.agents import AgentExecutor
-from langchain.agents import create_tool_calling_agent
+from langchain.agents import AgentExecutor, create_tool_calling_agent
 from langchain_core.language_models import BaseChatModel
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import Runnable

From 71fd2018325dd5e8ae591021df08c1e13f32d48d Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 09:54:01 -0400
Subject: [PATCH 3/4] x

---
 .../agents/experimental/__init__.py           |   0
 .../tool_usage/agents/experimental/agent.py   | 133 ----------
 .../tool_usage/agents/experimental/encoder.py | 240 ------------------
 .../tool_usage/agents/experimental/factory.py |  93 -------
 .../tool_usage/agents/experimental/parser.py  | 122 ---------
 .../tool_usage/agents/experimental/prompts.py |  42 ---
 .../agents/experimental/tool_utils.py         |  57 -----
 tests/unit_tests/agents/__init__.py           |   0
 .../agents/encoding_and_decoding/__init__.py  |   0
 .../encoding_and_decoding/test_decoding.py    |  54 ----
 .../test_typescript_encoding.py               |  25 --
 .../test_xml_encoding.py                      |  90 -------
 tests/unit_tests/agents/test_tool_utils.py    |  59 -----
 13 files changed, 915 deletions(-)
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/__init__.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/agent.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/encoder.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/factory.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/parser.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/prompts.py
 delete mode 100644 langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py
 delete mode 100644 tests/unit_tests/agents/__init__.py
 delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/__init__.py
 delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_decoding.py
 delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py
 delete mode 100644 tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py
 delete mode 100644 tests/unit_tests/agents/test_tool_utils.py

diff --git a/langchain_benchmarks/tool_usage/agents/experimental/__init__.py b/langchain_benchmarks/tool_usage/agents/experimental/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/agent.py b/langchain_benchmarks/tool_usage/agents/experimental/agent.py
deleted file mode 100644
index 87ada85e..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/agent.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from typing import List, Literal, Optional, Sequence, Tuple, Union
-
-from langchain.agents import AgentOutputParser
-from langchain.prompts.chat import ChatPromptTemplate
-from langchain.schema.runnable import Runnable
-from langchain.tools import StructuredTool
-from langchain_core.agents import AgentAction, AgentFinish
-from langchain_core.language_models import BaseChatModel, BaseLanguageModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
-from langchain_core.prompts import MessagesPlaceholder
-from typing_extensions import NotRequired, TypedDict
-
-from langchain_benchmarks import RateLimiter
-from langchain_benchmarks.rate_limiting import with_rate_limit
-from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
-    AstPrinter,
-    FunctionResult,
-    TypeScriptEncoder,
-    XMLEncoder,
-)
-from langchain_benchmarks.tool_usage.agents.experimental.prompts import (
-    _AGENT_INSTRUCTIONS_BLOB_STYLE,
-)
-from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
-    convert_tool_to_function_definition,
-)
-
-
-def format_steps_for_chat(
-    intermediate_steps: List[Tuple[AgentAction, str]],
-    ast_printer: AstPrinter,
-) -> List[BaseMessage]:
-    """Format the steps."""
-    messages = []
-    for action, observation in intermediate_steps:
-        # Action messages contains the tool invocation request from the LLM
-        # Now add the result of the tool invocation.
-
-        if action.tool == "_Exception":
-            messages.append(
-                AIMessage(
-                    content=action.log,
-                )
-            )
-            messages.append(
-                # Tool input is the error message for the exception
-                HumanMessage(content=action.tool_input)
-            )
-        else:
-            messages.extend(action.messages)
-            function_result: FunctionResult = {
-                "name": action.tool,
-                "error": None,
-                "result": observation,
-            }
-            messages.append(
-                HumanMessage(
-                    content=ast_printer.visit_function_result(function_result),
-                )
-            )
-
-    return messages
-
-
-# PUBLIC API
-
-
-class AgentInput(TypedDict):
-    """The input to the agent."""
-
-    input: str
-    """The input to the agent."""
-    intermediate_steps: List[Tuple[AgentAction, str]]
-    """The intermediate steps taken by the agent."""
-    examples: NotRequired[List[BaseMessage]]
-    """A list of messages that can be used to form example traces."""
-
-
-def create_agent(
-    model: Union[BaseChatModel, BaseLanguageModel],
-    tools: Sequence[StructuredTool],
-    parser: AgentOutputParser,
-    *,
-    ast_printer: Union[AstPrinter, Literal["xml"]] = "xml",
-    rate_limiter: Optional[RateLimiter] = None,
-) -> Runnable[AgentInput, Union[AgentAction, AgentFinish]]:
-    """Create an agent for a chat model."""
-    if isinstance(ast_printer, str):
-        if ast_printer == "xml":
-            ast_printer_ = XMLEncoder()
-        elif ast_printer == "typescript":
-            ast_printer_ = TypeScriptEncoder()
-        else:
-            raise ValueError(f"Unknown ast printer: {ast_printer}")
-    elif isinstance(ast_printer, AstPrinter):
-        ast_printer_ = ast_printer
-    else:
-        raise TypeError(
-            f"Expected AstPrinter or str, got {type(ast_printer)} for `ast_printer`"
-        )
-
-    function_definitions = [convert_tool_to_function_definition(tool) for tool in tools]
-    tool_description = ast_printer_.visit_function_definitions(function_definitions)
-
-    template = ChatPromptTemplate.from_messages(
-        [
-            ("system", _AGENT_INSTRUCTIONS_BLOB_STYLE),
-            MessagesPlaceholder("examples"),  # Can use to add example traces
-            ("human", "{input}"),
-            MessagesPlaceholder("history"),
-        ]
-    ).partial(tool_description=tool_description)
-
-    # For the time being, hard-coding the fact that we're using a <tool> tag.
-    model = model.bind(stop=["</tool>"])
-
-    if rate_limiter:
-        # Apply a rate limiter if it was provided
-        model = with_rate_limit(model, rate_limiter)
-
-    agent = (
-        {
-            "input": lambda x: x["input"],
-            "history": lambda x: format_steps_for_chat(
-                x["intermediate_steps"], ast_printer_
-            ),
-            "examples": lambda x: x.get("examples", []),
-        }
-        | template
-        | model
-        | parser
-    )
-    return agent
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/encoder.py b/langchain_benchmarks/tool_usage/agents/experimental/encoder.py
deleted file mode 100644
index c6799609..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/encoder.py
+++ /dev/null
@@ -1,240 +0,0 @@
-"""Prototyping code for rendering function definitions, invocations, and results.
-
-Types are simplified for now to `str`.
-
-We should actually support something like pydantic or jsonschema for the types, so
-we can expand them recursively for nested types.
-"""
-import abc
-from typing import Any, List, Optional
-
-from typing_extensions import NotRequired, TypedDict
-
-
-class Parameter(TypedDict):
-    """Representation for a parameter."""
-
-    name: str
-    type: str
-    description: str
-
-
-class Arguments(TypedDict):
-    """Arguments are passed to a function during function invocation."""
-
-    name: Optional[str]
-    value: Any
-
-
-class ReturnValue(TypedDict):
-    """Representation for a return value of a function call."""
-
-    type: str
-    description: NotRequired[str]
-
-
-class FunctionDefinition(TypedDict):
-    """Representation for a function."""
-
-    name: str
-    description: str  # Function description
-    parameters: List[Parameter]
-    return_value: ReturnValue
-
-
-class FunctionInvocation(TypedDict):
-    """Representation for a function invocation."""
-
-    id: NotRequired[str]
-    name: str
-    arguments: List[Arguments]
-
-
-class FunctionResult(TypedDict):
-    """Representation for a function result."""
-
-    id: NotRequired[str]
-    name: str
-    result: Optional[str]
-    error: Optional[str]
-
-
-class Visitor(abc.ABC):
-    @abc.abstractmethod
-    def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
-        """Render a function."""
-
-    @abc.abstractmethod
-    def visit_function_definitions(
-        self, function_definitions: List[FunctionDefinition]
-    ) -> str:
-        """Render a function."""
-
-    @abc.abstractmethod
-    def visit_function_invocation(self, function_invocation: FunctionInvocation) -> str:
-        """Render a function invocation."""
-
-    @abc.abstractmethod
-    def visit_function_result(self, function_result: FunctionResult) -> str:
-        """Render a function result."""
-
-
-class AstPrinter(Visitor):
-    """Print the AST."""
-
-
-class XMLEncoder(AstPrinter):
-    def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
-        """Render a function."""
-        parameters_lines = []
-
-        for parameter in function_definition["parameters"]:
-            parameters_lines.extend(
-                [
-                    "<parameter>",
-                    f"<name>{parameter['name']}</name>",
-                    f"<type>{parameter['type']}</type>",
-                    f"<description>{parameter['description']}</description>",
-                    "</parameter>",
-                ]
-            )
-        lines = [
-            "<function>",
-            f"<function_name>{function_definition['name']}</function_name>",
-            "<description>",
-            f"{function_definition['description']}",
-            "</description>",
-            "<parameters>",
-            *parameters_lines,
-            "</parameters>",
-            "<return_value>",
-            f"<type>{function_definition['return_value']['type']}</type>",
-        ]
-        if function_definition["return_value"].get("description"):
-            lines.append(
-                f"<description>{function_definition['return_value']['description']}"
-                f"</description>"
-            )
-
-        lines.extend(["</return_value>", "</function>"])
-        return "\n".join(lines)
-
-    def visit_function_definitions(
-        self, function_definitions: List[FunctionDefinition]
-    ) -> str:
-        """Render a function."""
-        strs = [
-            self.visit_function_definition(function_definition)
-            for function_definition in function_definitions
-        ]
-        return "<functions>\n" + "\n".join(strs) + "\n</functions>"
-
-    def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
-        """Render a function invocation."""
-        arguments_as_strings = [
-            "<argument>\n"
-            f"<name>{argument['name']}</name>\n"
-            f"<value>{argument['value']}</value>\n"
-            "</argument>\n"
-            for argument in invocation["arguments"]
-        ]
-        lines = ["<function_invocation>"]
-
-        if invocation.get("id"):
-            lines.append(f"<id>{invocation['id']}</id>")
-
-        lines.extend(
-            [
-                f"<function_name>{invocation['name']}</function_name>\n"
-                "<arguments>\n"
-                f"{''.join(arguments_as_strings)}"  # Already includes trailing newline
-                "</arguments>\n"
-                "</function_invocation>"
-            ]
-        )
-        return "\n".join(lines)
-
-    def visit_function_result(self, function_result: FunctionResult) -> str:
-        """Render a function result."""
-        lines = [
-            "<function_result>",
-        ]
-
-        if function_result.get("id"):
-            lines.append(f"<id>{function_result['id']}</id>")
-
-        lines.append(f"<function_name>{function_result['name']}</function_name>")
-
-        if function_result["error"]:
-            lines.extend(
-                [
-                    f"<error>{function_result['error']}</error>",
-                ]
-            )
-        else:
-            lines.append(
-                f"<result>{function_result['result']}</result>",
-            )
-
-        lines.append("</function_result>")
-
-        return "\n".join(lines)
-
-
-class TypeScriptEncoder(AstPrinter):
-    def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
-        """Render a function."""
-        parameters_as_strings = [
-            f"{parameter['name']}: {parameter['type']}"
-            for parameter in function_definition["parameters"]
-        ]
-        # Let's use JSdoc style comments
-        # First the function description
-        lines = [
-            f"// {function_definition['description']}",
-            # Then the parameter descriptions
-            *[
-                f"// @param {parameter['name']} {parameter['description']}"
-                for parameter in function_definition["parameters"]
-            ],
-            # Then the return value description
-            f"// @returns {function_definition['return_value']['description']}",
-            # Then the function definition
-            f"function {function_definition['name']}("
-            f"{', '.join(parameters_as_strings)}): "
-            f"{function_definition['return_value']['type']};",
-        ]
-
-        # finally join
-        function = "\n".join(lines)
-        return function
-
-    def visit_function_definitions(
-        self, function_definitions: List[FunctionDefinition]
-    ) -> str:
-        """Render a function."""
-        strs = [
-            self.visit_function_definition(function_definition)
-            for function_definition in function_definitions
-        ]
-        return "\n\n".join(strs)
-
-    def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
-        """Render a function invocation."""
-        arguments_as_strings = [
-            f"{argument['name']}: {argument['value']}"
-            for argument in invocation["arguments"]
-        ]
-        lines = [f"{invocation['name']}(" f"{', '.join(arguments_as_strings)});"]
-        return "\n".join(lines)
-
-    def visit_function_result(self, function_result: FunctionResult) -> str:
-        """Render a function result."""
-        lines = []
-        if function_result["error"]:
-            lines.append(f"ERROR: {function_result['error']}")
-        else:
-            lines.append(f"> {function_result['result']}")
-        if function_result.get("id"):
-            lines.append(f"// ID: {function_result['id']}")
-        return "\n".join(lines)
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/factory.py b/langchain_benchmarks/tool_usage/agents/experimental/factory.py
deleted file mode 100644
index d158acd3..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/factory.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""Factory for creating agents for the tool usage task."""
-from typing import Optional
-
-from langchain.agents import AgentExecutor
-from langchain_core.runnables import Runnable, RunnableConfig
-
-from langchain_benchmarks import RateLimiter, model_registry
-from langchain_benchmarks.schema import ToolUsageTask
-from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
-from langchain_benchmarks.tool_usage.agents.experimental.agent import create_agent
-from langchain_benchmarks.tool_usage.agents.experimental.parser import (
-    GenericAgentParser,
-)
-
-
-class CustomAgentFactory:
-    """A factory for creating tool using agents.
-
-    A factory for agents that do not leverage any special JSON mode for
-    function usage; instead all function invocation behavior is implemented solely
-    through prompt engineering and parsing.
-    """
-
-    def __init__(
-        self,
-        task: ToolUsageTask,
-        model: str,
-        *,
-        rate_limiter: Optional[RateLimiter] = None,
-        num_retries: int = 0,
-    ) -> None:
-        """Create an agent factory for the given tool usage task.
-
-        Args:
-            task: The task to create an agent factory for
-            model: model name (check model_registry)
-            rate_limiter: The rate limiter to use if provided
-            num_retries: The number of times to retry the agent if it fails
-        """
-        if model not in model_registry:
-            raise ValueError(f"Unknown model: {model}")
-        self.task = task
-        self.model = model
-        self.rate_limiter = rate_limiter
-        self.num_retries = num_retries
-
-    def __call__(self) -> Runnable:
-        if isinstance(self.model, str):
-            registered_model = model_registry.get_model(self.model)
-            if registered_model is None:
-                raise ValueError(f"Unknown model: {self.model}")
-            model = registered_model.get_model(model_params={"temperature": 0})
-        else:
-            model = self.model
-
-        def _add_task_instructions(
-            input: dict, config: Optional[RunnableConfig] = None, **kwargs
-        ) -> dict:
-            """Add task instructions to the question."""
-            if not isinstance(input, dict):
-                raise ValueError(
-                    f"Expected input to be a dict with key `question`. "
-                    f"Found {type(input)}."
-                )
-            input = input.copy()
-            input["question"] = (
-                f"{self.task.instructions}\nWrite down your answer, "
-                f"but do not explain it. Input: `{input['question']}`"
-            )
-            return input
-
-        env = self.task.create_environment()
-
-        agent = create_agent(
-            model,
-            env.tools,
-            GenericAgentParser(wrapping_xml_tag="tool", require_closing_xml_tag=False),
-            rate_limiter=self.rate_limiter,
-        )
-        if self.num_retries > 0:
-            agent = agent.with_retry(
-                stop_after_attempt=self.num_retries + 1,
-            )
-        executor = AgentExecutor(
-            agent=agent,
-            tools=env.tools,
-            handle_parsing_errors=True,
-            return_intermediate_steps=True,
-        )
-
-        return _add_task_instructions | apply_agent_executor_adapter(
-            executor, state_reader=env.read_state
-        )
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/parser.py b/langchain_benchmarks/tool_usage/agents/experimental/parser.py
deleted file mode 100644
index 002ddf02..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/parser.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import ast
-import re
-from typing import Dict, Optional, Union
-
-from langchain.agents import AgentOutputParser
-from langchain.pydantic_v1 import BaseModel, Field
-from langchain_core.agents import AgentAction, AgentActionMessageLog, AgentFinish
-from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import AIMessage
-
-
-class _ToolInvocationRequest(BaseModel):
-    """Light-weight pydantic model for validating the raw tool invocation request.
-
-    The purpose of this model, is to make sure that whatever as parsed from
-    the raw llm output has `tool_name` and potential `arguments` fields, and
-    nothing else.
-    """
-
-    tool_name: str
-    # OK parameterless tools which do not take arguments
-    arguments: Optional[Dict] = Field(default_factory=dict)
-
-
-class GenericAgentParser(AgentOutputParser):
-    """A generalized parser that makes it easier to parameterize different parsing."""
-
-    wrapping_xml_tag: str
-    """The tag that wraps the function invocation request.
-    
-    For example, if "tool", then the function invocation request should be wrapped
-    in <tool>...</tool>.
-    """
-    require_closing_xml_tag: bool = False
-    """Whether we should require a closing tag for the wrapping_xml_tag.
-    
-    For example, if True, then the function invocation request should be wrapped
-    """
-
-    def parse(self, text: str) -> Union[AgentFinish, AgentAction]:
-        """Parse the output of the agent."""
-        open_tag = f"<{self.wrapping_xml_tag}>"
-        close_tag = f"</{self.wrapping_xml_tag}>"
-        if open_tag in text:
-            # This is a hack to make sure that </tool> is always present
-            # in the output if <tool>. </tool> may be a stop sequence for the
-            # language model, so depending on implementation
-            # the stop sequence may be cut off.
-            # There might be a better way to do this, but this works and
-            # is simple.
-            if not self.require_closing_xml_tag:
-                text += close_tag
-
-        pattern = rf"{open_tag}(?P<invocation>.*?){close_tag}"
-        match = re.search(pattern, text, re.DOTALL)
-        if match:
-            content = match.group("invocation").strip()
-            return parse_invocation(content, self.wrapping_xml_tag)
-
-        return AgentFinish(
-            log=text,
-            return_values={
-                "output": text,
-            },
-        )
-
-
-def parse_invocation(text: str, tag: str) -> AgentAction:
-    """Parse the content of the function invocation.
-
-    Args:
-        text: The text to parse.
-        tag: The tag that wraps the function invocation request.
-
-    Returns:
-        An AgentAction that corresponds to the function invocation.
-
-    Raises:
-        OutputParserException: If the parsing fails.
-
-        This exception is meant to be caught by the agent executor and
-        handled appropriately to provide feedback to the LLM.
-    """
-    ai_content = f"<{tag}>{text}</{tag}>\n"
-
-    try:
-        result = ast.literal_eval(text)
-    except BaseException as e:
-        # Convert this to something controllable by the user.
-        err_msg = (
-            f"ERROR: Please use the format "
-            f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
-        )
-
-        raise OutputParserException(
-            error=e,
-            llm_output=ai_content,
-            observation=err_msg,
-            send_to_llm=True,
-        )
-
-    try:
-        request = _ToolInvocationRequest.validate(result)
-    except Exception as e:  # Using broad exception since it's not just ValidationError
-        # Can also raise DictError if result is not a dict.
-        err_msg = (
-            f"ERROR: Please use the format "
-            f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
-        )
-        raise OutputParserException(
-            error=e,
-            llm_output=ai_content,
-            send_to_llm=True,
-            observation=err_msg,
-        )
-
-    return AgentActionMessageLog(
-        message_log=[AIMessage(content=ai_content)],
-        tool=request.tool_name,
-        tool_input=request.arguments,
-        log=f"\nInvoking {request.tool_name}: {request.arguments}\n\t",
-    )
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/prompts.py b/langchain_benchmarks/tool_usage/agents/experimental/prompts.py
deleted file mode 100644
index 9abc051e..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/prompts.py
+++ /dev/null
@@ -1,42 +0,0 @@
-AGENT_INSTRUCTIONS_XML_FORMAT = """\
-In this environment you have access to a set of tools you can use to answer the user's question.
-
-You may call them like this:
-<function_calls>
-<invoke>
-<tool_name>$TOOL_NAME</tool_name>
-<parameters>
-<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-...
-</parameters>
-</invoke>
-</function_calls>
-
-Here are the tools available:
-
-{tool_description}
-"""  # noqa: E501
-
-_AGENT_INSTRUCTIONS_BLOB_STYLE = """\
-In this environment you have access to a set of tools you can use to answer the user's question.
-
-Here are the tools available:
-
-{tool_description}
-
-You may call one tool at a time using a format that includes <tool> and </tool> tag. 
-
-Inside the tag the content is a python dictionary that uses python literals (e.g., numbers, strings, lists, dictionaries, etc.) to specify the tool invocation.
-
-It must match the schema of the function as described in the tool description.
-"arguments" is a dictionary of the arguments to the function.
-
-<tool>
-{{
-    "tool_name": $TOOL_NAME,
-    "arguments": $ARGUMENTS
-}}
-</tool>
-
-If you do not know the answer use more tools. You can only take a single action at a time.\
-"""  # noqa: E501
diff --git a/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py b/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py
deleted file mode 100644
index 04fed82b..00000000
--- a/langchain_benchmarks/tool_usage/agents/experimental/tool_utils.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Utilities to extract information from langchain tools for use in prompts."""
-import inspect
-from textwrap import dedent
-from typing import List
-
-from langchain.tools.base import StructuredTool
-
-from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
-    FunctionDefinition,
-    Parameter,
-)
-
-# PUBLIC API
-
-
-def get_parameters_from_tool(tool: StructuredTool) -> List[Parameter]:
-    """Convert a langchain tool to a tool user tool."""
-    schema = tool.args_schema.schema()
-
-    properties = schema["properties"]
-    parameters = []
-    # Is this needed or is string OK?
-    type_adapter = {
-        "string": "str",  # str or string?
-        "integer": "int",
-        "number": "float",
-        "boolean": "bool",
-    }
-    for key, value in properties.items():
-        parameters.append(
-            {
-                "name": key,
-                "type": type_adapter.get(value["type"], value["type"]),
-                "description": value.get("description", ""),
-            }
-        )
-
-    return parameters
-
-
-#
-def convert_tool_to_function_definition(tool: StructuredTool) -> FunctionDefinition:
-    """Convert a langchain tool to a tool user tool."""
-    # Here we re-inspect the underlying function to get the doc-string
-    # since StructuredTool modifies it, but we want the raw one for maximum
-    # flexibility.
-    description = inspect.getdoc(tool.func)
-
-    parameters = get_parameters_from_tool(tool)
-    return {
-        "name": tool.name,
-        "description": dedent(description),
-        "parameters": parameters,
-        "return_value": {
-            "type": "Any",
-        },
-    }
diff --git a/tests/unit_tests/agents/__init__.py b/tests/unit_tests/agents/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit_tests/agents/encoding_and_decoding/__init__.py b/tests/unit_tests/agents/encoding_and_decoding/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py
deleted file mode 100644
index 5ed5da7a..00000000
--- a/tests/unit_tests/agents/encoding_and_decoding/test_decoding.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pytest
-from langchain_core.agents import AgentActionMessageLog, AgentFinish
-from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import AIMessage
-
-from langchain_benchmarks.tool_usage.agents.experimental.parser import (
-    GenericAgentParser,
-)
-
-
-def test_parser() -> None:
-    """Test parser."""
-    parser = GenericAgentParser(require_closing_tag=False, wrapping_xml_tag="tool")
-
-    # If <tool> tag not found then it's an agent finish
-    assert isinstance(parser.invoke("goodbye"), AgentFinish)
-
-    with pytest.raises(OutputParserException):
-        # Invocation content is missing tool name and arguments
-        parser.invoke("<tool>'hello'</tool>")
-
-    with pytest.raises(OutputParserException):
-        parser.invoke("<tool>hello")
-
-    # Full invocation
-    text = (
-        '<tool>{\n    "tool_name": "type_letter",\n    '
-        '"arguments": {\n        '
-        '"letter": "h"\n    }\n}</tool>\n'
-    )
-
-    assert parser.invoke(text) == AgentActionMessageLog(
-        tool="type_letter",
-        tool_input={"letter": "h"},
-        log="\nInvoking type_letter: {'letter': 'h'}\n\t",
-        message_log=[AIMessage(content=text)],
-    )
-
-    # Test more cases
-    parsed = parser.invoke('<tool>{"tool_name": "hello"}</tool>')
-    assert parsed.tool == "hello"
-    # Assumes that it's a structured tool by default!
-    assert parsed.tool_input == {}
-
-    with pytest.raises(OutputParserException):
-        # Arguments need to be a dict
-        parser.invoke('<tool>{"tool_name": "hello", "arguments": [1, 2]}</tool>')
-
-    parsed = parser.invoke(
-        '<tool>{"tool_name": "hello", "arguments": {"a": "b"}}</tool>'
-    )
-    assert parsed.tool == "hello"
-    # Assumes that it's a structured tool by default!
-    assert parsed.tool_input == {"a": "b"}
diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py
deleted file mode 100644
index 39175919..00000000
--- a/tests/unit_tests/agents/encoding_and_decoding/test_typescript_encoding.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""Test typescript encoding."""
-from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
-    FunctionDefinition,
-    TypeScriptEncoder,
-)
-
-
-def test_function_definition() -> None:
-    """Test encoding a function definition."""
-    function_definition = FunctionDefinition(
-        name="test_function",
-        description="A test function",
-        parameters=[
-            {"name": "test_parameter", "type": "str", "description": "A test parameter"}
-        ],
-        return_value={"type": "str", "description": "A test return value"},
-    )
-    encoder = TypeScriptEncoder()
-    xml = encoder.visit_function_definition(function_definition)
-    assert xml == (
-        "// A test function\n"
-        "// @param test_parameter A test parameter\n"
-        "// @returns A test return value\n"
-        "function test_function(test_parameter: str): str;"
-    )
diff --git a/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py b/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py
deleted file mode 100644
index d41b63be..00000000
--- a/tests/unit_tests/agents/encoding_and_decoding/test_xml_encoding.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""Test XML encoding and decoding of function definitions, invocation, and results."""
-from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
-    FunctionDefinition,
-    FunctionInvocation,
-    FunctionResult,
-    XMLEncoder,
-)
-
-
-def test_function_definition_encoding() -> None:
-    """Test encoding a function definition."""
-    function_definition = FunctionDefinition(
-        name="test_function",
-        description="A test function",
-        parameters=[
-            {"name": "test_parameter", "type": "str", "description": "A test parameter"}
-        ],
-        return_value={"type": "str", "description": "A test return value"},
-    )
-    encoder = XMLEncoder()
-    xml = encoder.visit_function_definition(function_definition)
-    assert xml == (
-        "<function>\n"
-        "<function_name>test_function</function_name>\n"
-        "<description>\n"
-        "A test function\n"
-        "</description>\n"
-        "<parameters>\n"
-        "<parameter>\n"
-        "<name>test_parameter</name>\n"
-        "<type>str</type>\n"
-        "<description>A test parameter</description>\n"
-        "</parameter>\n"
-        "</parameters>\n"
-        "<return_value>\n"
-        "<type>str</type>\n"
-        "<description>A test return value</description>\n"
-        "</return_value>\n"
-        "</function>"
-    )
-
-
-def test_function_result_encoding() -> None:
-    """Test encoding a function result."""
-    encoder = XMLEncoder()
-    function_result = FunctionResult(
-        name="test_function",
-        result="test_result",
-        error=None,
-    )
-    xml = encoder.visit_function_result(function_result)
-    assert xml == (
-        "<function_result>\n"
-        "<function_name>test_function</function_name>\n"
-        "<result>test_result</result>\n"
-        "</function_result>"
-    )
-
-    function_result = FunctionResult(
-        name="test_function",
-        error="error",
-    )
-    xml = encoder.visit_function_result(function_result)
-    assert xml == (
-        "<function_result>\n"
-        "<function_name>test_function</function_name>\n"
-        "<error>error</error>\n"
-        "</function_result>"
-    )
-
-
-def test_function_invocation() -> None:
-    """Test function invocation."""
-    function_invocation = FunctionInvocation(
-        name="test_function",
-        arguments=[{"name": "test_argument", "value": "test_value"}],
-    )
-    encoder = XMLEncoder()
-    xml = encoder.visit_function_invocation(function_invocation)
-    assert xml == (
-        "<function_invocation>\n"
-        "<function_name>test_function</function_name>\n"
-        "<arguments>\n"
-        "<argument>\n"
-        "<name>test_argument</name>\n"
-        "<value>test_value</value>\n"
-        "</argument>\n"
-        "</arguments>\n"
-        "</function_invocation>"
-    )
diff --git a/tests/unit_tests/agents/test_tool_utils.py b/tests/unit_tests/agents/test_tool_utils.py
deleted file mode 100644
index 9e4bb95f..00000000
--- a/tests/unit_tests/agents/test_tool_utils.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import pytest
-from langchain.tools import tool
-
-from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
-    convert_tool_to_function_definition,
-)
-
-
-@tool
-def get_hello() -> str:
-    """Get hello."""
-    return "hello"
-
-
-@tool
-def repeat(x: str) -> str:
-    """Repeat x.
-
-    Args:
-        x: The string to repeat.
-
-    Returns:
-        The repeated string.
-    """
-    return x
-
-
-def test_parameterless_function() -> None:
-    """Test foo."""
-    function_definition = convert_tool_to_function_definition(get_hello)
-    assert function_definition == {
-        "name": "get_hello",
-        "description": "Get hello.",
-        "parameters": [],
-        "return_value": {
-            "type": "Any",
-        },
-    }
-
-
-@pytest.mark.skip("Need to fix handling of leading whitespace")
-def test_function_with_parameters() -> None:
-    import textwrap
-
-    doc = textwrap.dedent(repeat.func.__doc__)
-    assert convert_tool_to_function_definition(repeat) == {
-        "name": "repeat",
-        "description": doc,
-        "parameters": [
-            {
-                "name": "x",
-                "type": "str",
-                "description": "",  # Need to fix this
-            }
-        ],
-        "return_value": {
-            "type": "Any",
-        },
-    }

From 1dad65a375578dfee85d3fad9ce59aae0cf59bfe Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 11:13:57 -0400
Subject: [PATCH 4/4] x

---
 langchain_benchmarks/tool_usage/agents/base.py        | 11 +++++++++++
 .../tool_usage/agents/runnable_agent.py               |  3 ++-
 .../tool_usage/agents/tool_using_agent.py             |  3 ++-
 tests/unit_tests/tool_usage/test_public_api.py        |  8 +++++++-
 4 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 langchain_benchmarks/tool_usage/agents/base.py

diff --git a/langchain_benchmarks/tool_usage/agents/base.py b/langchain_benchmarks/tool_usage/agents/base.py
new file mode 100644
index 00000000..aafdba8d
--- /dev/null
+++ b/langchain_benchmarks/tool_usage/agents/base.py
@@ -0,0 +1,11 @@
+import abc
+
+from langchain_core.runnables import Runnable
+
+
+class AgentFactory(abc.ABC):
+    """Abstract class for agent factory"""
+
+    @abc.abstractmethod
+    def __call__(self) -> Runnable:
+        """Create a new agent"""
diff --git a/langchain_benchmarks/tool_usage/agents/runnable_agent.py b/langchain_benchmarks/tool_usage/agents/runnable_agent.py
index 8b130f55..b6f76b7b 100644
--- a/langchain_benchmarks/tool_usage/agents/runnable_agent.py
+++ b/langchain_benchmarks/tool_usage/agents/runnable_agent.py
@@ -10,9 +10,10 @@
 
 from langchain_benchmarks.schema import ToolUsageTask
 from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
+from langchain_benchmarks.tool_usage.agents.base import AgentFactory
 
 
-class CustomRunnableAgentFactory:
+class CustomRunnableAgentFactory(AgentFactory):
     """A factory for creating tool using agents.
 
     A factory for agents that do not leverage any special JSON mode for
diff --git a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
index 78672e4a..93653255 100644
--- a/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
+++ b/langchain_benchmarks/tool_usage/agents/tool_using_agent.py
@@ -12,9 +12,10 @@
 from langchain_benchmarks.rate_limiting import RateLimiter, with_rate_limit
 from langchain_benchmarks.schema import ToolUsageTask
 from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
+from langchain_benchmarks.tool_usage.agents.base import AgentFactory
 
 
-class StandardAgentFactory:
+class StandardAgentFactory(AgentFactory):
     """A standard agent factory.
 
     Use this factory with chat models that support the standard LangChain tool
diff --git a/tests/unit_tests/tool_usage/test_public_api.py b/tests/unit_tests/tool_usage/test_public_api.py
index 1f422366..00110722 100644
--- a/tests/unit_tests/tool_usage/test_public_api.py
+++ b/tests/unit_tests/tool_usage/test_public_api.py
@@ -6,5 +6,11 @@ def test_public_api() -> None:
     # This test will also fail if __all__ is not sorted.
     # Please keep it sorted!
     assert __all__ == sorted(
-        ["apply_agent_executor_adapter", "get_eval_config"], key=str.lower
+        [
+            "apply_agent_executor_adapter",
+            "get_eval_config",
+            "CustomRunnableAgentFactory",
+            "StandardAgentFactory",
+        ],
+        key=str.lower,
     )