Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wfh/add semistructured #38

Merged
merged 19 commits into from
Nov 21, 2023
1 change: 0 additions & 1 deletion csv-qa/pandas_agent_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.tools import PythonAstREPLTool
from langchain.vectorstores import FAISS
Expand Down
2 changes: 0 additions & 2 deletions csv-qa/pandas_ai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pandas as pd
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
Expand Down
1,719 changes: 0 additions & 1,719 deletions docs/source/notebooks/rag.ipynb

This file was deleted.

973 changes: 973 additions & 0 deletions docs/source/notebooks/rag_langchain_docs.ipynb

Large diffs are not rendered by default.

383 changes: 383 additions & 0 deletions docs/source/notebooks/rag_semi_structured.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,4 @@ def mapper(input: dict):

agent_executor = (mapper | agent_executor | (lambda x: x["output"])).with_types(
input_type=ChainInput
)
)
2 changes: 1 addition & 1 deletion langchain-docs-benchmarking/prepare_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Copy the public dataset to your own langsmith tenant."""
from typing import Optional

from langsmith import Client
from tqdm import tqdm

DATASET_NAME = "LangChain Docs Q&A"
PUBLIC_DATASET_TOKEN = "452ccafc-18e1-4314-885b-edd735f17b9d"
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/extraction/email_task.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Optional, List
from typing import List, Optional

from langchain.smith import RunEvalConfig
from pydantic import BaseModel, Field
Expand Down
1 change: 1 addition & 0 deletions langchain_benchmarks/rag/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.sql
11 changes: 4 additions & 7 deletions langchain_benchmarks/rag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""RAG environments."""
from langchain_benchmarks.rag.evaluators import RAG_EVALUATION
from langchain_benchmarks.rag.environments.langchain_docs.task import (
LANGCHAIN_DOCS_TASK,
)
from langchain_benchmarks.rag.evaluators import get_eval_config
from langchain_benchmarks.rag.tasks import LANGCHAIN_DOCS_TASK

# Please keep this list sorted!
__all__ = ["LANGCHAIN_DOCS_TASK", "RAG_EVALUATION"]
# Please keep this sorted
__all__ = ["get_eval_config", "LANGCHAIN_DOCS_TASK"]

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

This file was deleted.

31 changes: 17 additions & 14 deletions langchain_benchmarks/rag/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,21 @@ def evaluate_run(
""" # noqa
}

eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
# Use a longer-context LLM to check documents
faithfulness_eval_llm = ChatOpenAI(
model="gpt-4-1106-preview", temperature=0.0, model_kwargs={"seed": 42}
)

RAG_EVALUATION = RunEvalConfig(
evaluators=[
RunEvalConfig.LabeledScoreString(
criteria=_ACCURACY_CRITERION, llm=eval_llm, normalize_by=10.0
),
RunEvalConfig.EmbeddingDistance(),
],
custom_evaluators=[FaithfulnessEvaluator(llm=faithfulness_eval_llm)],
)
def get_eval_config() -> RunEvalConfig:
"""Returns the evaluator for the environment."""
eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
# Use a longer-context LLM to check documents
faithfulness_eval_llm = ChatOpenAI(
model="gpt-4-1106-preview", temperature=0.0, model_kwargs={"seed": 42}
)

return RunEvalConfig(
evaluators=[
RunEvalConfig.LabeledScoreString(
criteria=_ACCURACY_CRITERION, llm=eval_llm, normalize_by=10.0
),
RunEvalConfig.EmbeddingDistance(),
],
custom_evaluators=[FaithfulnessEvaluator(llm=faithfulness_eval_llm)],
)
7 changes: 7 additions & 0 deletions langchain_benchmarks/rag/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from langchain_benchmarks.rag.tasks.langchain_docs.task import LANGCHAIN_DOCS_TASK
from langchain_benchmarks.rag.tasks.semi_structured_earnings.task import (
SEMI_STRUCTURED_EARNINGS_TASK,
)

# Please keep this sorted
__all__ = ["LANGCHAIN_DOCS_TASK", "SEMI_STRUCTURED_EARNINGS_TASK"]
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# LangChain Docs Environment
# LangChain Docs Task

This code contains utilities to scrape the LangChain docs (already run) and index them
using common techniques. The docs were scraped using the code in `_ingest_docs.py` and
uploaded to gcs. To better compare retrieval techniques, we hold these constant and pull
from that cache whenever generating different indices.


The content in `indexing` composes some common indexing strategies with default paramaters for
benchmarking on the langchain docs.
8 changes: 8 additions & 0 deletions langchain_benchmarks/rag/tasks/langchain_docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from langchain_benchmarks.rag.tasks.langchain_docs import architectures, indexing
from langchain_benchmarks.rag.tasks.langchain_docs.task import LANGCHAIN_DOCS_TASK

DATASET_ID = (
"452ccafc-18e1-4314-885b-edd735f17b9d" # ID of public LangChain Docs dataset
)

__all__ = ["architectures", "indexing", "DATASET_ID", "LANGCHAIN_DOCS_TASK"]
Loading