add notebook

langchain-ai · Apr 10, 2024 · a60a915 · a60a915
1 parent 20a4aee
commit a60a915
Showing 1 changed file with 168 additions and 0 deletions.
diff --git a/docs/source/notebooks/tool_usage/tool_calling.ipynb b/docs/source/notebooks/tool_usage/tool_calling.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f108a5e4-d4cb-463c-ab87-ef5dabb31ae7",
+   "metadata": {},
+   "source": [
+    "https://langchain-ai.github.io/langchain-benchmarks/notebooks/tool_usage/intro.html#benchmarking"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93433486-5c18-4bbd-aaf2-417464853093",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "from langsmith.client import Client\n",
+    "\n",
+    "from langchain_benchmarks import (\n",
+    "    __version__,\n",
+    "    clone_public_dataset,\n",
+    "    model_registry,\n",
+    "    registry,\n",
+    ")\n",
+    "from langchain_benchmarks.rate_limiting import RateLimiter\n",
+    "from langchain_benchmarks.tool_usage.agents import StandardAgentFactory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1e197fa-adc9-4c8c-8203-43caa0a787d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tasks = [task for task in registry.tasks if task.name in (\"Tool Usage - Relational Data\",)]\n",
+    "\n",
+    "task = tasks[0]\n",
+    "task"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3838c9f4-ab48-4f25-a6bf-0b13f533f268",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import uuid\n",
+    "\n",
+    "experiment_uuid = uuid.uuid4().hex[:]\n",
+    "print(experiment_uuid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9dcd648-6106-4f45-a2fd-0a5b6ec36dc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_anthropic import ChatAnthropic\n",
+    "from langchain_cohere import ChatCohere\n",
+    "from langchain_fireworks import ChatFireworks\n",
+    "from langchain_mistralai import ChatMistralAI\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "from langchain_google_vertexai import ChatVertexAI\n",
+    "from langchain_groq import ChatGroq\n",
+    "\n",
+    "\n",
+    "tests = [\n",
+    "    (\"claude-3-opus-20240229\", ChatAnthropic(model=\"claude-3-opus-20240229\", temperature=0)),\n",
+    "    (\"gpt-3.5\", ChatOpenAI(temperature=0)),\n",
+    "    (\"cohere\", ChatCohere(temperature=0)),\n",
+    "    (\"fireworks\", ChatFireworks(model=\"accounts/fireworks/models/firefunction-v1\", temperature=0)),\n",
+    "    (\"mistral\", ChatMistralAI(model=\"mistral-large-latest\", temperature=0)),\n",
+    "    (\"vertex\", ChatVertexAI(model_name=\"gemini-pro\", temperature=0)),\n",
+    "    (\"groq\", ChatGroq(temperature=0)),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f33cb36-470d-4316-962f-57feda47a6c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", \"{instructions}\"),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "        MessagesPlaceholder(\"agent_scratchpad\"),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0b3b8b0-76fd-4f96-aa9c-eeb7da8d139d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = Client()  # Launch langsmith client for cloning datasets\n",
+    "today = datetime.date.today().isoformat()\n",
+    "rate_limiter = RateLimiter(requests_per_second=2)\n",
+    "\n",
+    "for task in tasks:\n",
+    "    if task.type != \"ToolUsageTask\":\n",
+    "        continue\n",
+    "\n",
+    "    dataset_name = task.name\n",
+    "    clone_public_dataset(task.dataset_id, dataset_name=dataset_name)\n",
+    "\n",
+    "    for model_name, model in tests:\n",
+    "        print()\n",
+    "        print(f\"Benchmarking {task.name} with model: {model_name}\")\n",
+    "        eval_config = task.get_eval_config()\n",
+    "\n",
+    "        agent_factory = StandardAgentFactory(task, model, prompt)\n",
+    "\n",
+    "        client.run_on_dataset(\n",
+    "            dataset_name=dataset_name,\n",
+    "            llm_or_chain_factory=agent_factory,\n",
+    "            evaluation=eval_config,\n",
+    "            verbose=False,\n",
+    "            project_name=f\"{model_name}-{task.name}-{today}-{experiment_uuid}\",\n",
+    "            tags=[model_name],\n",
+    "            concurrency_level=5,\n",
+    "            project_metadata={\n",
+    "                \"model\": model_name,\n",
+    "                \"id\": experiment_uuid,\n",
+    "                \"task\": task.name,\n",
+    "                \"date\": today,\n",
+    "                \"langchain_benchmarks_version\": __version__,\n",
+    "            },\n",
+    "        )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}