Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

automating tool benchmarks #188

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Weekly Tool Benchmarks

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * 0' # Runs at midnight (00:00) every Sunday (UTC time)

jobs:
run_tool_benchmarks:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.12 + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: '3.12'
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: .
cache-key: benchmarks-all

- name: Install dependencies
shell: bash
run: |
echo "Running tests, installing dependencies with poetry..."
poetry install --with test,lint,typing,docs

- name: Execute Tool Benchmarks
run: python scripts/tool_benchmarks.py
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')

lint lint_diff:
# [ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
# [ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)

Expand Down
2 changes: 1 addition & 1 deletion archived/csv-qa/custom_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from langchain.agents.agent_toolkits.conversational_retrieval.tool import (
create_retriever_tool,
)
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.tools import PythonAstREPLTool
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langsmith import Client
from pydantic import BaseModel, Field

Expand Down
2 changes: 1 addition & 1 deletion archived/csv-qa/pandas_agent_gpt_35.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pandas as pd
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain_openai import ChatOpenAI
from langsmith import Client

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion archived/csv-qa/pandas_agent_gpt_4.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pandas as pd
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain_openai import ChatOpenAI
from langsmith import Client

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion archived/csv-qa/pandas_ai.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain_openai import ChatOpenAI
from langsmith import Client
from pandasai import PandasAI

Expand Down
2 changes: 1 addition & 1 deletion archived/csv-qa/streamlit_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import streamlit as st
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI

df = pd.read_csv("titanic.csv")

Expand Down
2 changes: 1 addition & 1 deletion archived/extraction/streamlit_app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import streamlit as st
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langsmith import Client

st.set_page_config(page_title="🦜🔗 Text-to-graph extraction")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema.messages import AIMessage, HumanMessage
from langchain.tools import tool
from langchain.tools.render import format_tool_to_openai_function
from langchain_docs_retriever.retriever import get_retriever
from langchain_openai import ChatOpenAI

# This is used to tell the model how to best use the retriever.

Expand Down
2 changes: 1 addition & 1 deletion archived/langchain-docs-benchmarking/run_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

from anthropic_iterative_search.chain import chain as anthropic_agent_chain
from chat_langchain.chain import create_chain
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import Runnable
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain_openai import ChatOpenAI
from langsmith import Client
from oai_assistant.chain import agent_executor as openai_assistant_chain
from openai_functions_agent import agent_executor as openai_functions_agent_chain
Expand Down
2 changes: 1 addition & 1 deletion docs/source/notebooks/extraction/chat_extraction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,8 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-4-1106-preview\", temperature=0).bind_functions(\n",
" functions=[task.schema],\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/source/notebooks/extraction/email.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\", temperature=0).bind_functions(\n",
" functions=[task.schema],\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/source/notebooks/extraction/intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"from langchain_benchmarks.extraction import get_eval_config\n",
"\n",
Expand Down
3 changes: 2 additions & 1 deletion docs/source/notebooks/retrieval/comparing_techniques.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -728,12 +729,12 @@
"from langchain.agents import AgentExecutor\n",
"from langchain.agents.format_scratchpad import format_to_openai_functions\n",
"from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
"from langchain.pydantic_v1 import BaseModel, Field\n",
"from langchain.schema.messages import AIMessage, HumanMessage\n",
"from langchain.tools import tool\n",
"from langchain.tools.render import format_tool_to_openai_function\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"# This is used to tell the model how to best use the retriever.\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,8 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema.messages import HumanMessage\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"\n",
"def image_summarize(img_base64, prompt):\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,10 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"\n",
"def rag_chain(retriever):\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,11 @@
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.document import Document\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable.passthrough import RunnableAssign\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"# Prompt\n",
"prompt = ChatPromptTemplate.from_messages(\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@
"source": [
"import uuid\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.document_loaders import PyPDFLoader\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.prompts import ChatPromptTemplate\n",
Expand All @@ -138,6 +137,7 @@
"from langchain.storage import InMemoryStore\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.vectorstores import Chroma\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"\n",
"def prepare_documents(docs):\n",
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/extraction/evaluators.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Optional

from langchain.chat_models import ChatOpenAI
from langchain.chat_models.base import BaseChatModel
from langchain.smith import RunEvalConfig
from langchain_openai import ChatOpenAI


def get_eval_config(eval_llm: Optional[BaseChatModel] = None) -> RunEvalConfig:
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/extraction/implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from typing import Any, Dict, List, Optional, Type

from langchain.chains.openai_functions import convert_to_openai_function
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import Runnable
from langchain_openai import ChatOpenAI
from langsmith.client import Client
from pydantic import BaseModel

Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/rag/evaluators.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Optional

from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator
from langchain.smith import RunEvalConfig
from langchain_openai import ChatOpenAI

try:
from langchain.schema.language_model import BaseLanguageModel
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Optional

from langchain.base_language import BaseLanguageModel
from langchain.chat_models import ChatOpenAI
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import Runnable
from langchain_openai import ChatOpenAI

from langchain_benchmarks.rag.tasks.langchain_docs.architectures.crqa import (
create_response_chain,
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/rag/utils/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from functools import partial
from typing import Callable, Iterable, List, Optional

from langchain.chat_models import ChatOpenAI
from langchain.indexes import SQLRecordManager, index
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser
from langchain.prompts import ChatPromptTemplate
Expand All @@ -18,6 +17,7 @@
from langchain.schema.vectorstore import VectorStore
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain_openai import ChatOpenAI
from tqdm.auto import tqdm

logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions langchain_benchmarks/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,8 @@ def _get_default_path(provider: str, type_: ModelType) -> str:
"""Get the default path for a model."""
paths = {
("anthropic", "chat"): "langchain_anthropic.ChatAnthropic",
("anyscale", "chat"): "langchain.chat_models.anyscale.ChatAnyscale",
("anyscale", "llm"): "langchain.llms.anyscale.Anyscale",
("anyscale", "chat"): "langchain_community.chat_models.anyscale.ChatAnyscale",
("anyscale", "llm"): "langchain_community.llms.anyscale.Anyscale",
("fireworks", "chat"): "langchain_fireworks.ChatFireworks",
("fireworks", "llm"): "langchain_fireworks.Fireworks",
("openai", "chat"): "langchain_openai.ChatOpenAI",
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/tool_usage/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

from langchain.callbacks.manager import collect_runs
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import EvaluatorType, StringEvaluator, load_evaluator
from langchain.evaluation.schema import StringEvaluator
from langchain.smith import RunEvalConfig
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
from langchain_openai import ChatOpenAI
from langsmith.evaluation.evaluator import (
EvaluationResult,
EvaluationResults,
Expand Down
4 changes: 2 additions & 2 deletions langchain_benchmarks/tool_usage/tasks/multiverse_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

def multiply(a: float, b: float) -> float:
"""Multiply two numbers; a * b."""
return 1.1 * a * b
return round(1.1 * a * b, 5)


def divide(a: float, b: float) -> float:
Expand Down Expand Up @@ -140,7 +140,7 @@ def get_environment() -> ToolUsageEnvironment:
"expected_steps": ["subtract"],
},
{
"question": "What is -5 if evaluated using the negate function?",
"question": "what is the value of the negate function evaluated on the argument -5",
"answer": negate(-5),
"expected_steps": ["negate"],
},
Expand Down
Loading
Loading