From e517474bdacd8e255ebba7da817bb94392a73eb6 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 20 Nov 2023 16:31:05 -0500 Subject: [PATCH 1/3] x --- langchain_benchmarks/extraction/evaluators.py | 27 +++++ .../extraction/implementations.py | 102 ++++++++++++++++++ .../extraction/tasks/__init__.py | 0 .../extraction/{ => tasks}/email_task.py | 26 ----- langchain_benchmarks/registration.py | 2 +- .../extraction/test_email_extraction.py | 1 - 6 files changed, 130 insertions(+), 28 deletions(-) create mode 100644 langchain_benchmarks/extraction/evaluators.py create mode 100644 langchain_benchmarks/extraction/implementations.py create mode 100644 langchain_benchmarks/extraction/tasks/__init__.py rename langchain_benchmarks/extraction/{ => tasks}/email_task.py (52%) diff --git a/langchain_benchmarks/extraction/evaluators.py b/langchain_benchmarks/extraction/evaluators.py new file mode 100644 index 00000000..5a6f29a1 --- /dev/null +++ b/langchain_benchmarks/extraction/evaluators.py @@ -0,0 +1,27 @@ +from langchain.smith import RunEvalConfig +from pydantic import BaseModel + + +def get_eval_config(eval_llm: BaseModel) -> RunEvalConfig: + """Get the evaluation configuration for the email task.""" + return RunEvalConfig( + evaluators=[ + RunEvalConfig.LabeledScoreString( + criteria={ + "accuracy": """ + Score 1: The answer is incorrect and unrelated to the question or reference document. + Score 3: The answer is partially correct but has more than one omission or major errors. + Score 5: The answer is mostly correct but has more than one omission or major error. + Score 7: The answer is mostly correct but has at most one omission or major error. + Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document. + Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible. + + If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct. + If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized. + """ # noqa + }, + llm=eval_llm, + normalize_by=10.0, + ), + ], + ) diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py new file mode 100644 index 00000000..b998097a --- /dev/null +++ b/langchain_benchmarks/extraction/implementations.py @@ -0,0 +1,102 @@ +"""Default implementations of LLMs that can be used for extraction.""" +import os + +from typing import Optional, List +from enum import Enum + +from langsmith import Client +from langchain.smith import RunEvalConfig, run_on_dataset + +from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser +from langchain_experimental.llms.ollama_functions import OllamaFunctions +from langchain_experimental.llms.anthropic_functions import AnthropicFunctions +from langchain.pydantic_v1 import BaseModel, Field +from langchain.chat_models import ChatOpenAI +from langchain.prompts import ChatPromptTemplate +from langchain.chains.openai_functions import convert_to_openai_function + + +class ToneEnum(str, Enum): + positive = "positive" + negative = "negative" + + +class Email(BaseModel): + """Relevant information about an email.""" + + sender: Optional[str] = Field(None, description="The sender's name, if available") + sender_phone_number: Optional[str] = Field( + None, description="The sender's phone number, if available" + ) + sender_address: Optional[str] = Field( + None, description="The sender's address, if available" + ) + action_items: List[str] = Field( + ..., description="A list of action items requested by the email" + ) + topic: str = Field( + ..., description="High level description of what the email is about" + ) + tone: ToneEnum = Field(..., description="The tone of the email.") + + +prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are an expert researcher."), + ( + "human", + "What can you tell me about the following email? Make sure to answer in the correct format: {email}", + ), + ] +) + +openai_functions = [convert_to_openai_function(Email)] +llm_kwargs = { + "functions": openai_functions, + "function_call": {"name": openai_functions[0]["name"]}, +} + +# Ollama JSON mode has a bug where it infintely generates newlines. This stop sequence hack fixes it +llm = OllamaFunctions(temperature=0, model="llama2", timeout=300, stop=["\n\n\n\n"]) +# llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview") +# llm = AnthropicFunctions(temperature=0, model="claude-2") + +# output_parser = get_openai_output_parser([Email]) +output_parser = JsonOutputFunctionsParser() +extraction_chain = ( + prompt | llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x}) +) + +eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42}) + +evaluation_config = RunEvalConfig( + evaluators=[ + RunEvalConfig.LabeledScoreString( + criteria={ + "accuracy": """ +Score 1: The answer is incorrect and unrelated to the question or reference document. +Score 3: The answer is partially correct but has more than one omission or major errors. +Score 5: The answer is mostly correct but has more than one omission or major error. +Score 7: The answer is mostly correct but has at most one omission or major error. +Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document. +Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible. + +If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct. +If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized. +""" # noqa + }, + llm=eval_llm, + normalize_by=10.0, + ), + ], +) + +client = Client() +run_on_dataset( + dataset_name="Extraction Over Spam Emails", + llm_or_chain_factory=extraction_chain, + client=client, + evaluation=evaluation_config, + project_name="llama2-test", + concurrency_level=1, +) diff --git a/langchain_benchmarks/extraction/tasks/__init__.py b/langchain_benchmarks/extraction/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/langchain_benchmarks/extraction/email_task.py b/langchain_benchmarks/extraction/tasks/email_task.py similarity index 52% rename from langchain_benchmarks/extraction/email_task.py rename to langchain_benchmarks/extraction/tasks/email_task.py index e03f138d..6550307d 100644 --- a/langchain_benchmarks/extraction/email_task.py +++ b/langchain_benchmarks/extraction/tasks/email_task.py @@ -1,7 +1,6 @@ from enum import Enum from typing import Optional, List -from langchain.smith import RunEvalConfig from pydantic import BaseModel, Field from langchain_benchmarks.schema import ExtractionTask @@ -33,31 +32,6 @@ class Email(BaseModel): tone: ToneEnum = Field(..., description="The tone of the email.") -def get_eval_config(eval_llm: BaseModel) -> RunEvalConfig: - """Get the evaluation configuration for the email task.""" - return RunEvalConfig( - evaluators=[ - RunEvalConfig.LabeledScoreString( - criteria={ - "accuracy": """ - Score 1: The answer is incorrect and unrelated to the question or reference document. - Score 3: The answer is partially correct but has more than one omission or major errors. - Score 5: The answer is mostly correct but has more than one omission or major error. - Score 7: The answer is mostly correct but has at most one omission or major error. - Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document. - Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible. - - If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct. - If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized. - """ # noqa - }, - llm=eval_llm, - normalize_by=10.0, - ), - ], - ) - - EMAIL_EXTRACTION_TASK = ExtractionTask( name="Email Extraction", dataset_id="https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d", diff --git a/langchain_benchmarks/registration.py b/langchain_benchmarks/registration.py index 7ebf8fed..f68d72e1 100644 --- a/langchain_benchmarks/registration.py +++ b/langchain_benchmarks/registration.py @@ -1,6 +1,6 @@ """Registry of environments for ease of access.""" -from langchain_benchmarks.extraction import email_task +from langchain_benchmarks.extraction.tasks import email_task from langchain_benchmarks.schema import Registry from langchain_benchmarks.tool_usage.tasks import ( type_writer, diff --git a/tests/unit_tests/extraction/test_email_extraction.py b/tests/unit_tests/extraction/test_email_extraction.py index 57bf3530..e9622150 100644 --- a/tests/unit_tests/extraction/test_email_extraction.py +++ b/tests/unit_tests/extraction/test_email_extraction.py @@ -1,3 +1,2 @@ def test_email_extraction() -> None: """Try to import the email task.""" - from langchain_benchmarks.extraction import email_task # noqa: F401 From 57ed4a78816551254127eeb425dc37f1eb370280 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 20 Nov 2023 17:23:18 -0500 Subject: [PATCH 2/3] x --- .../extraction/implementations.py | 159 +++++++----------- .../extraction/tasks/email_task.py | 16 +- langchain_benchmarks/schema.py | 13 +- poetry.lock | 10 +- .../extraction/test_import_stuff.py | 3 + 5 files changed, 96 insertions(+), 105 deletions(-) create mode 100644 tests/unit_tests/extraction/test_import_stuff.py diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py index b998097a..ede686ad 100644 --- a/langchain_benchmarks/extraction/implementations.py +++ b/langchain_benchmarks/extraction/implementations.py @@ -1,102 +1,67 @@ """Default implementations of LLMs that can be used for extraction.""" -import os +from typing import Type, Optional, List, Any, Dict -from typing import Optional, List -from enum import Enum - -from langsmith import Client -from langchain.smith import RunEvalConfig, run_on_dataset - -from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser -from langchain_experimental.llms.ollama_functions import OllamaFunctions -from langchain_experimental.llms.anthropic_functions import AnthropicFunctions -from langchain.pydantic_v1 import BaseModel, Field -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate from langchain.chains.openai_functions import convert_to_openai_function - - -class ToneEnum(str, Enum): - positive = "positive" - negative = "negative" - - -class Email(BaseModel): - """Relevant information about an email.""" - - sender: Optional[str] = Field(None, description="The sender's name, if available") - sender_phone_number: Optional[str] = Field( - None, description="The sender's phone number, if available" - ) - sender_address: Optional[str] = Field( - None, description="The sender's address, if available" - ) - action_items: List[str] = Field( - ..., description="A list of action items requested by the email" +from langchain.chat_models import ChatOpenAI +from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser +from langchain.schema.runnable import Runnable +from langsmith.client import Client +from pydantic import BaseModel + +from langchain_benchmarks.extraction.evaluators import get_eval_config +from langchain_benchmarks.schema import ExtractionTask + +# PUBLIC API + +def create_openai_function_based_extractor( + llm: Runnable, + schema: Type[BaseModel], +) -> Runnable[dict, dict]: + """Create an extraction chain that uses an LLM to extract a schema. + + The underlying functionality is exclusively for LLMs that support + extraction using openai functions format. + + Args: + llm: The LLM to use for extraction. + schema: The schema to extract. + + Returns: + An llm that will extract the schema + """ + openai_functions = [convert_to_openai_function(schema)] + llm_kwargs = { + "functions": openai_functions, + "function_call": {"name": openai_functions[0]["name"]}, + } + output_parser = JsonOutputFunctionsParser() + extraction_chain = ( + llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x}) ) - topic: str = Field( - ..., description="High level description of what the email is about" + return extraction_chain + + +def run_on_dataset( + task: ExtractionTask, + llm: Runnable, + *, + tags: Optional[List[str]] = None, + **kwargs: Any, +) -> Dict[str, Any]: + """Run an LLM on a dataset. + + Args: + task: The task to run on. + llm: The LLM to run. + tags: The tags to use for the run. + kwargs: Additional arguments to pass to the client. + """ + client = Client() + eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42}) + return client.run_on_dataset( + dataset_name=task.name, + llm_or_chain_factory=create_openai_function_based_extractor(llm, task.schema), + evaluation=get_eval_config(eval_llm), + tags=tags, + **kwargs, ) - tone: ToneEnum = Field(..., description="The tone of the email.") - - -prompt = ChatPromptTemplate.from_messages( - [ - ("system", "You are an expert researcher."), - ( - "human", - "What can you tell me about the following email? Make sure to answer in the correct format: {email}", - ), - ] -) - -openai_functions = [convert_to_openai_function(Email)] -llm_kwargs = { - "functions": openai_functions, - "function_call": {"name": openai_functions[0]["name"]}, -} - -# Ollama JSON mode has a bug where it infintely generates newlines. This stop sequence hack fixes it -llm = OllamaFunctions(temperature=0, model="llama2", timeout=300, stop=["\n\n\n\n"]) -# llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview") -# llm = AnthropicFunctions(temperature=0, model="claude-2") - -# output_parser = get_openai_output_parser([Email]) -output_parser = JsonOutputFunctionsParser() -extraction_chain = ( - prompt | llm.bind(**llm_kwargs) | output_parser | (lambda x: {"output": x}) -) - -eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42}) - -evaluation_config = RunEvalConfig( - evaluators=[ - RunEvalConfig.LabeledScoreString( - criteria={ - "accuracy": """ -Score 1: The answer is incorrect and unrelated to the question or reference document. -Score 3: The answer is partially correct but has more than one omission or major errors. -Score 5: The answer is mostly correct but has more than one omission or major error. -Score 7: The answer is mostly correct but has at most one omission or major error. -Score 9: The answer is mostly correct with no omissions and only minor errors, and aligns with the reference document. -Score 10: The answer is correct, complete, and aligns with the reference document. Extra information is acceptable if it is sensible. - -If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct. -If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct and not be penalized. -""" # noqa - }, - llm=eval_llm, - normalize_by=10.0, - ), - ], -) - -client = Client() -run_on_dataset( - dataset_name="Extraction Over Spam Emails", - llm_or_chain_factory=extraction_chain, - client=client, - evaluation=evaluation_config, - project_name="llama2-test", - concurrency_level=1, -) diff --git a/langchain_benchmarks/extraction/tasks/email_task.py b/langchain_benchmarks/extraction/tasks/email_task.py index 6550307d..d216a4a2 100644 --- a/langchain_benchmarks/extraction/tasks/email_task.py +++ b/langchain_benchmarks/extraction/tasks/email_task.py @@ -1,6 +1,7 @@ from enum import Enum from typing import Optional, List +from langchain.prompts import ChatPromptTemplate from pydantic import BaseModel, Field from langchain_benchmarks.schema import ExtractionTask @@ -32,10 +33,22 @@ class Email(BaseModel): tone: ToneEnum = Field(..., description="The tone of the email.") +# This is a default prompt that works for chat models. +DEFAULT_CHAT_MODEL_PROMPT = ChatPromptTemplate.from_messages( + [ + ("system", "You are an expert researcher."), + ( + "human", + "What can you tell me about the following email? Make sure to " + "answer in the correct format: {schema}", + ), + ] +) + EMAIL_EXTRACTION_TASK = ExtractionTask( name="Email Extraction", dataset_id="https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d", - model=Email, + schema=Email, description="""\ A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, \ as well as a script for initial extraction and formatting of other emails from \ @@ -45,4 +58,5 @@ class Email(BaseModel): See https://github.com/jacoblee93/oss-model-extraction-evals. """, + instructions=DEFAULT_CHAT_MODEL_PROMPT, ) diff --git a/langchain_benchmarks/schema.py b/langchain_benchmarks/schema.py index eac59c47..17370306 100644 --- a/langchain_benchmarks/schema.py +++ b/langchain_benchmarks/schema.py @@ -2,6 +2,7 @@ import dataclasses from typing import List, Callable, Any, Optional, Type, Union +from langchain.prompts import ChatPromptTemplate from langchain.tools import BaseTool from pydantic import BaseModel from tabulate import tabulate @@ -68,8 +69,16 @@ class ToolUsageTask(BaseTask): class ExtractionTask(BaseTask): """A definition for an extraction task.""" - model: Type[BaseModel] - """Get the model for the task.""" + schema: Type[BaseModel] + """Get schema that specifies what should be extracted.""" + + # We might want to make this optional / or support more types + # and add validation, but let's wait until we have more examples + instructions: ChatPromptTemplate + """Get the prompt for the task. + + This is the default prompt to use for the task. + """ @dataclasses.dataclass(frozen=False) diff --git a/poetry.lock b/poetry.lock index 0fa0c1ab..464687a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1593,13 +1593,13 @@ files = [ [[package]] name = "langchain" -version = "0.0.336" +version = "0.0.339" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain-0.0.336-py3-none-any.whl", hash = "sha256:cbc72c170c5eb67509bf44fb833412a3d4ccf4476136447abd4f10468ef7d4c4"}, - {file = "langchain-0.0.336.tar.gz", hash = "sha256:2cbb992b0a6975948d35616386d088c2920b66023cb94eb4f4b25e097fa1374d"}, + {file = "langchain-0.0.339-py3-none-any.whl", hash = "sha256:fec250074a6fbb3711a51423d830006d69f34aedb67604df39c642be80852cbb"}, + {file = "langchain-0.0.339.tar.gz", hash = "sha256:34eb4d7987d979663e361da435479c6f0648a170dae3eb1e9f0f7417f033a2c1"}, ] [package.dependencies] @@ -1617,8 +1617,8 @@ SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<9.0.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] -azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] clarifai = ["clarifai (>=9.1.0)"] cli = ["typer (>=0.9.0,<0.10.0)"] cohere = ["cohere (>=4,<5)"] diff --git a/tests/unit_tests/extraction/test_import_stuff.py b/tests/unit_tests/extraction/test_import_stuff.py new file mode 100644 index 00000000..363340bc --- /dev/null +++ b/tests/unit_tests/extraction/test_import_stuff.py @@ -0,0 +1,3 @@ +def test_import_stuff() -> None: + """Test that all imports work.""" + from langchain_benchmarks.extraction import evaluators, implementations # noqa: F401 From ca1e29145cdc4e678641660aae492059a5204aa9 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 20 Nov 2023 21:27:58 -0500 Subject: [PATCH 3/3] x --- langchain_benchmarks/extraction/implementations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/langchain_benchmarks/extraction/implementations.py b/langchain_benchmarks/extraction/implementations.py index ede686ad..a1238a6a 100644 --- a/langchain_benchmarks/extraction/implementations.py +++ b/langchain_benchmarks/extraction/implementations.py @@ -13,6 +13,7 @@ # PUBLIC API + def create_openai_function_based_extractor( llm: Runnable, schema: Type[BaseModel],