langchain-ai · hwchase17 · Nov 14, 2023 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/docs/source/notebooks/extraction/chat_extraction.ipynb b/docs/source/notebooks/extraction/chat_extraction.ipynb
@@ -3310,7 +3310,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.11.1"
   }
  },
  "nbformat": 4,

diff --git a/docs/source/notebooks/extraction/custom-chain.ipynb b/docs/source/notebooks/extraction/custom-chain.ipynb
@@ -0,0 +1,379 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7e8fc49a-e8b2-404b-a059-e9f668c460e5",
+   "metadata": {},
+   "source": [
+    "# Custom Chain\n",
+    "\n",
+    "This notebook shows how to evaluate a custom chain on ALL evaluation tasks.\n",
+    "\n",
+    "We will first define a `create_chain` function that creates a custom chain given a schema to extract. We will then iterate over all benchmark tasks for extraction and run our chain over them.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "758872ec-911b-4b62-99c3-6e6b73fad8e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install -U langchain-benchmarks langchain langchain-openai rapidfuzz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "101b0520-2a07-4fab-8cf5-59f81f55359b",
+   "metadata": {},
+   "source": [
+    "## Get the Benchmarks\n",
+    "\n",
+    "First, let's load the relevant benchmarks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "86912590-a90a-4351-8ab4-89192cdee1e7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<thead>\n",
+       "<tr><th>Name            </th><th>Type          </th><th>Dataset ID                                                                                                                                                 </th><th>Description  </th></tr>\n",
+       "</thead>\n",
+       "<tbody>\n",
+       "<tr><td>Email Extraction</td><td>ExtractionTask</td><td><a href=\"https://smith.langchain.com/public/a1742786-bde5-4f51-a1d8-e148e5251ddb/d\" target=\"_blank\" rel=\"noopener\">a1742786-bde5-4f51-a1d8-e148e5251ddb</a></td><td>A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\n",
+       "\n",
+       "Some additional cleanup of the data was done by hand after the initial pass.\n",
+       "\n",
+       "See https://github.com/jacoblee93/oss-model-extraction-evals.              </td></tr>\n",
+       "<tr><td>Chat Extraction </td><td>ExtractionTask</td><td><a href=\"https://smith.langchain.com/public/00f4444c-9460-4a82-b87a-f50096f1cfef/d\" target=\"_blank\" rel=\"noopener\">00f4444c-9460-4a82-b87a-f50096f1cfef</a></td><td>A dataset meant to test the ability of an LLM to extract and infer\n",
+       "structured information from a dialogue. The dialogue is between a user and a support\n",
+       "engineer. Outputs should be structured as a JSON object and test both the ability\n",
+       "of the LLM to correctly structure the information and its ability to perform simple \n",
+       "classification tasks.              </td></tr>\n",
+       "</tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "Registry(tasks=[ExtractionTask(name='Email Extraction', dataset_id='https://smith.langchain.com/public/a1742786-bde5-4f51-a1d8-e148e5251ddb/d', description='A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\\n\\nSome additional cleanup of the data was done by hand after the initial pass.\\n\\nSee https://github.com/jacoblee93/oss-model-extraction-evals.\\n    ', schema=<class 'langchain_benchmarks.extraction.tasks.email_task.Email'>, instructions=ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```'))])), ExtractionTask(name='Chat Extraction', dataset_id='https://smith.langchain.com/public/00f4444c-9460-4a82-b87a-f50096f1cfef/d', description='A dataset meant to test the ability of an LLM to extract and infer\\nstructured information from a dialogue. The dialogue is between a user and a support\\nengineer. Outputs should be structured as a JSON object and test both the ability\\nof the LLM to correctly structure the information and its ability to perform simple \\nclassification tasks.', schema=<class 'langchain_benchmarks.extraction.tasks.chat_extraction.schema.GenerateTicket'>, instructions=ChatPromptTemplate(input_variables=['dialogue'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['dialogue'], template='Generate a ticket for the following question-response pair:\\n<Dialogue>\\n{dialogue}\\n</Dialogue>'))]))])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_benchmarks import clone_public_dataset, registry\n",
+    "\n",
+    "registry.filter(Type=\"ExtractionTask\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "95f7df8b-5b50-409c-b7c4-190e96b3fbe1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task = registry[\"Email Extraction\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e91c9d74-598e-46c9-b50d-3163dc63588e",
+   "metadata": {},
+   "source": [
+    "Each task has instructions (which are a prompt) as well as a schema. You do not need to use the instructions but they may be helpful for quickly bootstrapping a default prompt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "77a19239-f700-4e43-97a5-7ab7c14603ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```'))])"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "task.instructions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "937c72b8-af2d-4f12-9314-4bea05297557",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "langchain_benchmarks.extraction.tasks.email_task.Email"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "task.schema"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd1cc226-50dc-4b1c-ad78-ecaf8b381c5b",
+   "metadata": {},
+   "source": [
+    "## Define Chain Creation Function\n",
+    "\n",
+    "Here is where we put our logic for extracting things. We will make this function take in a prompt and an output schema (although it can really take in anything, you just need to modify the logic where it is called below)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "2532e9d6-df61-45a4-9d11-a625747fcd7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.output_parsers.openai_tools import JsonOutputToolsParser\n",
+    "from langchain_openai import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "361e11de-2070-4549-81a3-d4ec87bd2f40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_extraction_chain(prompt, schema):\n",
+    "    llm = ChatOpenAI(model=\"gpt-4-turbo-preview\", temperature=0).bind_tools(\n",
+    "        tools=[schema],\n",
+    "    )\n",
+    "\n",
+    "    output_parser = JsonOutputToolsParser()\n",
+    "    extraction_chain = (\n",
+    "        prompt | llm | output_parser | (lambda x: {\"output\": x[0][\"args\"]})\n",
+    "    )\n",
+    "    return extraction_chain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb2c6bf8-33e3-43d4-a878-c07749c51d51",
+   "metadata": {},
+   "source": [
+    "## Loop over tasks\n",
+    "\n",
+    "Here we loop over the tasks with our chains to evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "a0f4b7d9-fa20-4053-aed5-94ebad97e6f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chains_to_eval = [(\"openai-tools\", create_extraction_chain)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "665cabd0-fbf3-4f5a-91e2-6692f671bdb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "from langsmith.client import Client\n",
+    "\n",
+    "from langchain_benchmarks.extraction import get_eval_config\n",
+    "from langchain_benchmarks.extraction.tasks.chat_extraction import (\n",
+    "    get_eval_config as get_chat_eval_config,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "0b7b763d-6f3e-4b1c-9482-94e08f58ab7b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_configs = {\n",
+    "    \"Email Extraction\": get_eval_config(),\n",
+    "    \"Chat Extraction\": get_chat_eval_config(),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "8a15c7ae-2079-46d4-811a-5e6b5afdf860",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompts = {\n",
+    "    task.name: task.instructions for task in registry.filter(Type=\"ExtractionTask\")\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "5342ef85-3d2c-4cbc-aabb-c539a635fab8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.messages import HumanMessage, SystemMessage\n",
+    "\n",
+    "_email_template = \"\"\"What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```\"\"\"\n",
+    "\n",
+    "\n",
+    "def email_extraction_formatting(inputs):\n",
+    "    return [HumanMessage(content=_email_template.format(input=inputs[\"input\"]))]\n",
+    "\n",
+    "\n",
+    "_chat_template = \"\"\"Generate a ticket for the following question-response pair:\\n<Dialogue>\\n{dialogue}\\n</Dialogue>'\"\"\"\n",
+    "_chat_instructions = \"\"\"You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.\"\"\"\n",
+    "\n",
+    "\n",
+    "def format_run(dialogue_input: dict):\n",
+    "    question = dialogue_input[\"question\"]\n",
+    "    answer = dialogue_input[\"answer\"]\n",
+    "    return {\n",
+    "        \"dialogue\": f\"<question>\\n{question}\\n</question>\\n\"\n",
+    "        f\"<assistant-response>\\n{answer}\\n</assistant-response>\"\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def chat_extraction_formatting(inputs):\n",
+    "    dialogue = format_run(inputs)[\"dialogue\"]\n",
+    "    return [\n",
+    "        SystemMessage(content=_chat_instructions),\n",
+    "        HumanMessage(content=_chat_template.format(dialogue=dialogue)),\n",
+    "    ]\n",
+    "\n",
+    "\n",
+    "prompt_formatting = {\n",
+    "    \"Email Extraction\": email_extraction_formatting,\n",
+    "    \"Chat Extraction\": chat_extraction_formatting,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "0a69f9a5-0d85-4446-bc05-63b2573c1c24",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset Email Extraction already exists. Skipping.\n",
+      "You can access the dataset at https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/ccbb1190-dc59-45c8-8f5d-7a7a00fa4c4d.\n",
+      "Dataset Chat Extraction already exists. Skipping.\n",
+      "You can access the dataset at https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc.\n",
+      "\n",
+      "Benchmarking Chat Extraction on openai-tools\n",
+      "View the evaluation results for project 'openai-tools-Chat Extraction-2024-02-20T20:39:20.189708' at:\n",
+      "https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc/compare?selectedSessions=7c1213e1-7dcb-4d2b-b252-04e51e3ed82e\n",
+      "\n",
+      "View all tests for Dataset Chat Extraction at:\n",
+      "https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc\n",
+      "[------------------------------------------------->] 27/27"
+     ]
+    }
+   ],
+   "source": [
+    "import uuid\n",
+    "\n",
+    "client = Client()  # Launch langsmith client for cloning datasets\n",
+    "today = datetime.datetime.today().isoformat()\n",
+    "\n",
+    "for task in registry.filter(Type=\"ExtractionTask\"):\n",
+    "    dataset_name = task.name\n",
+    "    clone_public_dataset(task.dataset_id, dataset_name=dataset_name)\n",
+    "    dataset = client.read_dataset(dataset_name=dataset_name)\n",
+    "\n",
+    "    for name, chain_factory in chains_to_eval:\n",
+    "        print()\n",
+    "        print(f\"Benchmarking {task.name} on {name}\")\n",
+    "        eval_config = eval_configs[task.name]\n",
+    "\n",
+    "        chain = chain_factory(prompt_formatting[task.name], task.schema)\n",
+    "        project_name = f\"{name}-{task.name}-{today}\"\n",
+    "        client.run_on_dataset(\n",
+    "            dataset_name=dataset_name,\n",
+    "            llm_or_chain_factory=chain,\n",
+    "            evaluation=eval_config,\n",
+    "            verbose=False,\n",
+    "            project_name=project_name,\n",
+    "            tags=[name],\n",
+    "            concurrency_level=5,\n",
+    "            project_metadata={\n",
+    "                \"name\": name,\n",
+    "                \"task\": task.name,\n",
+    "                \"date\": today,\n",
+    "            },\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5ee3318-a528-4766-a12a-887863633438",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58302cdd-9fe9-43a5-8d11-7077dd2c47d8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/notebooks/extraction/email.ipynb b/docs/source/notebooks/extraction/email.ipynb
@@ -1194,7 +1194,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.11.1"
   }
  },
  "nbformat": 4,