From e517474bdacd8e255ebba7da817bb94392a73eb6 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Mon, 20 Nov 2023 16:31:05 -0500
Subject: [PATCH 1/3] x

---
 langchain_benchmarks/extraction/evaluators.py |  27 +++++
 .../extraction/implementations.py             | 102 ++++++++++++++++++
 .../extraction/tasks/__init__.py              |   0
 .../extraction/{ => tasks}/email_task.py      |  26 -----
 langchain_benchmarks/registration.py          |   2 +-
 .../extraction/test_email_extraction.py       |   1 -
 6 files changed, 130 insertions(+), 28 deletions(-)
 create mode 100644 langchain_benchmarks/extraction/evaluators.py
 create mode 100644 langchain_benchmarks/extraction/implementations.py
 create mode 100644 langchain_benchmarks/extraction/tasks/__init__.py
 rename langchain_benchmarks/extraction/{ => tasks}/email_task.py (52%)

diff --git a/langchain_benchmarks/extraction/evaluators.py b/langchain_benchmarks/extraction/evaluators.py
new file mode 100644
index 00000000..5a6f29a1
--- /dev/null
+++ b/langchain_benchmarks/extraction/evaluators.py
@@ -0,0 +1,27 @@
+from langchain.smith import RunEvalConfig
+from pydantic import BaseModel
+
+
+def get_eval_config(eval_llm: BaseModel) -> RunEvalConfig:
+    """Get the evaluation configuration for the email task."""
+    return RunEvalConfig(
+        evaluators=[
+            RunEvalConfig.LabeledScoreString(
+                criteria={
+                    "accuracy": """
+    Score 1: The answer is incorrect and unrelated to the question or reference document.
+    Score 3: The answer is partially correct but has more than one omission or major errors.
+    Score 5: The answer is mostly correct but has more than one omission or major error.
+    Score 7: The answer is mostly correct but has at most one omission or major error.
+    Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document.
+    Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible.
+
+    If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct.
+    If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized.
+    """  # noqa
+                },
+                llm=eval_llm,
+                normalize_by=10.0,
+            ),
+        ],
+    )
diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py
new file mode 100644
index 00000000..b998097a
--- /dev/null
+++ b/langchain_benchmarks/extraction/implementations.py
@@ -0,0 +1,102 @@
+"""Default implementations of LLMs that can be used for extraction."""
+import os
+
+from typing import Optional, List
+from enum import Enum
+
+from langsmith import Client
+from langchain.smith import RunEvalConfig, run_on_dataset
+
+from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
+from langchain_experimental.llms.ollama_functions import OllamaFunctions
+from langchain_experimental.llms.anthropic_functions import AnthropicFunctions
+from langchain.pydantic_v1 import BaseModel, Field
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.chains.openai_functions import convert_to_openai_function
+
+
+class ToneEnum(str, Enum):
+    positive = "positive"
+    negative = "negative"
+
+
+class Email(BaseModel):
+    """Relevant information about an email."""
+
+    sender: Optional[str] = Field(None, description="The sender's name, if available")
+    sender_phone_number: Optional[str] = Field(
+        None, description="The sender's phone number, if available"
+    )
+    sender_address: Optional[str] = Field(
+        None, description="The sender's address, if available"
+    )
+    action_items: List[str] = Field(
+        ..., description="A list of action items requested by the email"
+    )
+    topic: str = Field(
+        ..., description="High level description of what the email is about"
+    )
+    tone: ToneEnum = Field(..., description="The tone of the email.")
+
+
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", "You are an expert researcher."),
+        (
+            "human",
+            "What can you tell me about the following email? Make sure to answer in the correct format: {email}",
+        ),
+    ]
+)
+
+openai_functions = [convert_to_openai_function(Email)]
+llm_kwargs = {
+    "functions": openai_functions,
+    "function_call": {"name": openai_functions[0]["name"]},
+}
+
+# Ollama JSON mode has a bug where it infintely generates newlines. This stop sequence hack fixes it
+llm = OllamaFunctions(temperature=0, model="llama2", timeout=300, stop=["\n\n\n\n"])
+# llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
+# llm = AnthropicFunctions(temperature=0, model="claude-2")
+
+# output_parser = get_openai_output_parser([Email])
+output_parser = JsonOutputFunctionsParser()
+extraction_chain = (
+    prompt | llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x})
+)
+
+eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
+
+evaluation_config = RunEvalConfig(
+    evaluators=[
+        RunEvalConfig.LabeledScoreString(
+            criteria={
+                "accuracy": """
+Score 1: The answer is incorrect and unrelated to the question or reference document.
+Score 3: The answer is partially correct but has more than one omission or major errors.
+Score 5: The answer is mostly correct but has more than one omission or major error.
+Score 7: The answer is mostly correct but has at most one omission or major error.
+Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document.
+Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible.
+
+If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct.
+If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized.
+"""  # noqa
+            },
+            llm=eval_llm,
+            normalize_by=10.0,
+        ),
+    ],
+)
+
+client = Client()
+run_on_dataset(
+    dataset_name="Extraction Over Spam Emails",
+    llm_or_chain_factory=extraction_chain,
+    client=client,
+    evaluation=evaluation_config,
+    project_name="llama2-test",
+    concurrency_level=1,
+)
diff --git a/langchain_benchmarks/extraction/tasks/__init__.py b/langchain_benchmarks/extraction/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/langchain_benchmarks/extraction/email_task.py b/langchain_benchmarks/extraction/tasks/email_task.py
similarity index 52%
rename from langchain_benchmarks/extraction/email_task.py
rename to langchain_benchmarks/extraction/tasks/email_task.py
index e03f138d..6550307d 100644
--- a/langchain_benchmarks/extraction/email_task.py
+++ b/langchain_benchmarks/extraction/tasks/email_task.py
@@ -1,7 +1,6 @@
 from enum import Enum
 from typing import Optional, List
 
-from langchain.smith import RunEvalConfig
 from pydantic import BaseModel, Field
 
 from langchain_benchmarks.schema import ExtractionTask
@@ -33,31 +32,6 @@ class Email(BaseModel):
     tone: ToneEnum = Field(..., description="The tone of the email.")
 
 
-def get_eval_config(eval_llm: BaseModel) -> RunEvalConfig:
-    """Get the evaluation configuration for the email task."""
-    return RunEvalConfig(
-        evaluators=[
-            RunEvalConfig.LabeledScoreString(
-                criteria={
-                    "accuracy": """
-    Score 1: The answer is incorrect and unrelated to the question or reference document.
-    Score 3: The answer is partially correct but has more than one omission or major errors.
-    Score 5: The answer is mostly correct but has more than one omission or major error.
-    Score 7: The answer is mostly correct but has at most one omission or major error.
-    Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document.
-    Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible.
-
-    If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct.
-    If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized.
-    """  # noqa
-                },
-                llm=eval_llm,
-                normalize_by=10.0,
-            ),
-        ],
-    )
-
-
 EMAIL_EXTRACTION_TASK = ExtractionTask(
     name="Email Extraction",
     dataset_id="https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d",
diff --git a/langchain_benchmarks/registration.py b/langchain_benchmarks/registration.py
index 7ebf8fed..f68d72e1 100644
--- a/langchain_benchmarks/registration.py
+++ b/langchain_benchmarks/registration.py
@@ -1,6 +1,6 @@
 """Registry of environments for ease of access."""
 
-from langchain_benchmarks.extraction import email_task
+from langchain_benchmarks.extraction.tasks import email_task
 from langchain_benchmarks.schema import Registry
 from langchain_benchmarks.tool_usage.tasks import (
     type_writer,
diff --git a/tests/unit_tests/extraction/test_email_extraction.py b/tests/unit_tests/extraction/test_email_extraction.py
index 57bf3530..e9622150 100644
--- a/tests/unit_tests/extraction/test_email_extraction.py
+++ b/tests/unit_tests/extraction/test_email_extraction.py
@@ -1,3 +1,2 @@
 def test_email_extraction() -> None:
     """Try to import the email task."""
-    from langchain_benchmarks.extraction import email_task  # noqa: F401

From 57ed4a78816551254127eeb425dc37f1eb370280 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Mon, 20 Nov 2023 17:23:18 -0500
Subject: [PATCH 2/3] x

---
 .../extraction/implementations.py             | 159 +++++++-----------
 .../extraction/tasks/email_task.py            |  16 +-
 langchain_benchmarks/schema.py                |  13 +-
 poetry.lock                                   |  10 +-
 .../extraction/test_import_stuff.py           |   3 +
 5 files changed, 96 insertions(+), 105 deletions(-)
 create mode 100644 tests/unit_tests/extraction/test_import_stuff.py

diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py
index b998097a..ede686ad 100644
--- a/langchain_benchmarks/extraction/implementations.py
+++ b/langchain_benchmarks/extraction/implementations.py
@@ -1,102 +1,67 @@
 """Default implementations of LLMs that can be used for extraction."""
-import os
+from typing import Type, Optional, List, Any, Dict
 
-from typing import Optional, List
-from enum import Enum
-
-from langsmith import Client
-from langchain.smith import RunEvalConfig, run_on_dataset
-
-from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
-from langchain_experimental.llms.ollama_functions import OllamaFunctions
-from langchain_experimental.llms.anthropic_functions import AnthropicFunctions
-from langchain.pydantic_v1 import BaseModel, Field
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts import ChatPromptTemplate
 from langchain.chains.openai_functions import convert_to_openai_function
-
-
-class ToneEnum(str, Enum):
-    positive = "positive"
-    negative = "negative"
-
-
-class Email(BaseModel):
-    """Relevant information about an email."""
-
-    sender: Optional[str] = Field(None, description="The sender's name, if available")
-    sender_phone_number: Optional[str] = Field(
-        None, description="The sender's phone number, if available"
-    )
-    sender_address: Optional[str] = Field(
-        None, description="The sender's address, if available"
-    )
-    action_items: List[str] = Field(
-        ..., description="A list of action items requested by the email"
+from langchain.chat_models import ChatOpenAI
+from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
+from langchain.schema.runnable import Runnable
+from langsmith.client import Client
+from pydantic import BaseModel
+
+from langchain_benchmarks.extraction.evaluators import get_eval_config
+from langchain_benchmarks.schema import ExtractionTask
+
+# PUBLIC API
+
+def create_openai_function_based_extractor(
+    llm: Runnable,
+    schema: Type[BaseModel],
+) -> Runnable[dict, dict]:
+    """Create an extraction chain that uses an LLM to extract a schema.
+
+    The underlying functionality is exclusively for LLMs that support
+    extraction using openai functions format.
+
+    Args:
+        llm: The LLM to use for extraction.
+        schema: The schema to extract.
+
+    Returns:
+        An llm that will extract the schema
+    """
+    openai_functions = [convert_to_openai_function(schema)]
+    llm_kwargs = {
+        "functions": openai_functions,
+        "function_call": {"name": openai_functions[0]["name"]},
+    }
+    output_parser = JsonOutputFunctionsParser()
+    extraction_chain = (
+        llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x})
     )
-    topic: str = Field(
-        ..., description="High level description of what the email is about"
+    return extraction_chain
+
+
+def run_on_dataset(
+    task: ExtractionTask,
+    llm: Runnable,
+    *,
+    tags: Optional[List[str]] = None,
+    **kwargs: Any,
+) -> Dict[str, Any]:
+    """Run an LLM on a dataset.
+
+    Args:
+        task: The task to run on.
+        llm: The LLM to run.
+        tags: The tags to use for the run.
+        kwargs: Additional arguments to pass to the client.
+    """
+    client = Client()
+    eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
+    return client.run_on_dataset(
+        dataset_name=task.name,
+        llm_or_chain_factory=create_openai_function_based_extractor(llm, task.schema),
+        evaluation=get_eval_config(eval_llm),
+        tags=tags,
+        **kwargs,
     )
-    tone: ToneEnum = Field(..., description="The tone of the email.")
-
-
-prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", "You are an expert researcher."),
-        (
-            "human",
-            "What can you tell me about the following email? Make sure to answer in the correct format: {email}",
-        ),
-    ]
-)
-
-openai_functions = [convert_to_openai_function(Email)]
-llm_kwargs = {
-    "functions": openai_functions,
-    "function_call": {"name": openai_functions[0]["name"]},
-}
-
-# Ollama JSON mode has a bug where it infintely generates newlines. This stop sequence hack fixes it
-llm = OllamaFunctions(temperature=0, model="llama2", timeout=300, stop=["\n\n\n\n"])
-# llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
-# llm = AnthropicFunctions(temperature=0, model="claude-2")
-
-# output_parser = get_openai_output_parser([Email])
-output_parser = JsonOutputFunctionsParser()
-extraction_chain = (
-    prompt | llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x})
-)
-
-eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
-
-evaluation_config = RunEvalConfig(
-    evaluators=[
-        RunEvalConfig.LabeledScoreString(
-            criteria={
-                "accuracy": """
-Score 1: The answer is incorrect and unrelated to the question or reference document.
-Score 3: The answer is partially correct but has more than one omission or major errors.
-Score 5: The answer is mostly correct but has more than one omission or major error.
-Score 7: The answer is mostly correct but has at most one omission or major error.
-Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document.
-Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible.
-
-If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct.
-If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized.
-"""  # noqa
-            },
-            llm=eval_llm,
-            normalize_by=10.0,
-        ),
-    ],
-)
-
-client = Client()
-run_on_dataset(
-    dataset_name="Extraction Over Spam Emails",
-    llm_or_chain_factory=extraction_chain,
-    client=client,
-    evaluation=evaluation_config,
-    project_name="llama2-test",
-    concurrency_level=1,
-)
diff --git a/langchain_benchmarks/extraction/tasks/email_task.py b/langchain_benchmarks/extraction/tasks/email_task.py
index 6550307d..d216a4a2 100644
--- a/langchain_benchmarks/extraction/tasks/email_task.py
+++ b/langchain_benchmarks/extraction/tasks/email_task.py
@@ -1,6 +1,7 @@
 from enum import Enum
 from typing import Optional, List
 
+from langchain.prompts import ChatPromptTemplate
 from pydantic import BaseModel, Field
 
 from langchain_benchmarks.schema import ExtractionTask
@@ -32,10 +33,22 @@ class Email(BaseModel):
     tone: ToneEnum = Field(..., description="The tone of the email.")
 
 
+# This is a default prompt that works for chat models.
+DEFAULT_CHAT_MODEL_PROMPT = ChatPromptTemplate.from_messages(
+    [
+        ("system", "You are an expert researcher."),
+        (
+            "human",
+            "What can you tell me about the following email? Make sure to "
+            "answer in the correct format: {schema}",
+        ),
+    ]
+)
+
 EMAIL_EXTRACTION_TASK = ExtractionTask(
     name="Email Extraction",
     dataset_id="https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d",
-    model=Email,
+    schema=Email,
     description="""\
 A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, \
 as well as a script for initial extraction and formatting of other emails from \
@@ -45,4 +58,5 @@ class Email(BaseModel):
 
 See https://github.com/jacoblee93/oss-model-extraction-evals.
     """,
+    instructions=DEFAULT_CHAT_MODEL_PROMPT,
 )
diff --git a/langchain_benchmarks/schema.py b/langchain_benchmarks/schema.py
index eac59c47..17370306 100644
--- a/langchain_benchmarks/schema.py
+++ b/langchain_benchmarks/schema.py
@@ -2,6 +2,7 @@
 import dataclasses
 from typing import List, Callable, Any, Optional, Type, Union
 
+from langchain.prompts import ChatPromptTemplate
 from langchain.tools import BaseTool
 from pydantic import BaseModel
 from tabulate import tabulate
@@ -68,8 +69,16 @@ class ToolUsageTask(BaseTask):
 class ExtractionTask(BaseTask):
     """A definition for an extraction task."""
 
-    model: Type[BaseModel]
-    """Get the model for the task."""
+    schema: Type[BaseModel]
+    """Get schema that specifies what should be extracted."""
+
+    # We might want to make this optional / or support more types
+    # and add validation, but let's wait until we have more examples
+    instructions: ChatPromptTemplate
+    """Get the prompt for the task.
+    
+    This is the default prompt to use for the task.
+    """
 
 
 @dataclasses.dataclass(frozen=False)
diff --git a/poetry.lock b/poetry.lock
index 0fa0c1ab..464687a6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1593,13 +1593,13 @@ files = [
 
 [[package]]
 name = "langchain"
-version = "0.0.336"
+version = "0.0.339"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.336-py3-none-any.whl", hash = "sha256:cbc72c170c5eb67509bf44fb833412a3d4ccf4476136447abd4f10468ef7d4c4"},
-    {file = "langchain-0.0.336.tar.gz", hash = "sha256:2cbb992b0a6975948d35616386d088c2920b66023cb94eb4f4b25e097fa1374d"},
+    {file = "langchain-0.0.339-py3-none-any.whl", hash = "sha256:fec250074a6fbb3711a51423d830006d69f34aedb67604df39c642be80852cbb"},
+    {file = "langchain-0.0.339.tar.gz", hash = "sha256:34eb4d7987d979663e361da435479c6f0648a170dae3eb1e9f0f7417f033a2c1"},
 ]
 
 [package.dependencies]
@@ -1617,8 +1617,8 @@ SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
-all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
-azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
+all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
+azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
 clarifai = ["clarifai (>=9.1.0)"]
 cli = ["typer (>=0.9.0,<0.10.0)"]
 cohere = ["cohere (>=4,<5)"]
diff --git a/tests/unit_tests/extraction/test_import_stuff.py b/tests/unit_tests/extraction/test_import_stuff.py
new file mode 100644
index 00000000..363340bc
--- /dev/null
+++ b/tests/unit_tests/extraction/test_import_stuff.py
@@ -0,0 +1,3 @@
+def test_import_stuff() -> None:
+    """Test that all imports work."""
+    from langchain_benchmarks.extraction import evaluators, implementations  # noqa: F401

From ca1e29145cdc4e678641660aae492059a5204aa9 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Mon, 20 Nov 2023 21:27:58 -0500
Subject: [PATCH 3/3] x

---
 langchain_benchmarks/extraction/implementations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py
index ede686ad..a1238a6a 100644
--- a/langchain_benchmarks/extraction/implementations.py
+++ b/langchain_benchmarks/extraction/implementations.py
@@ -13,6 +13,7 @@
 
 # PUBLIC API
 
+
 def create_openai_function_based_extractor(
     llm: Runnable,
     schema: Type[BaseModel],