Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wfh/add semistructured #38

Merged
merged 19 commits into from
Nov 21, 2023
1 change: 0 additions & 1 deletion csv-qa/pandas_agent_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.tools import PythonAstREPLTool
from langchain.vectorstores import FAISS
Expand Down
2 changes: 0 additions & 2 deletions csv-qa/pandas_ai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pandas as pd
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
Expand Down
966 changes: 110 additions & 856 deletions docs/source/notebooks/rag.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion langchain-docs-benchmarking/prepare_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Copy the public dataset to your own langsmith tenant."""
from typing import Optional

from langsmith import Client
from tqdm import tqdm

DATASET_NAME = "LangChain Docs Q&A"
PUBLIC_DATASET_TOKEN = "452ccafc-18e1-4314-885b-edd735f17b9d"
Expand Down
2 changes: 1 addition & 1 deletion langchain_benchmarks/extraction/email_task.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Optional, List
from typing import List, Optional

from langchain.smith import RunEvalConfig
from pydantic import BaseModel, Field
Expand Down
1 change: 1 addition & 0 deletions langchain_benchmarks/rag/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.sql
10 changes: 3 additions & 7 deletions langchain_benchmarks/rag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
"""RAG environments."""
from langchain_benchmarks.rag.evaluators import RAG_EVALUATION
from langchain_benchmarks.rag.environments.langchain_docs.task import (
LANGCHAIN_DOCS_TASK,
)
from langchain_benchmarks.rag.tasks import LANGCHAIN_DOCS_TASK

# Please keep this list sorted!
__all__ = ["LANGCHAIN_DOCS_TASK", "RAG_EVALUATION"]
# Please keep this sorted
__all__ = ["LANGCHAIN_DOCS_TASK"]
Empty file.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

31 changes: 17 additions & 14 deletions langchain_benchmarks/rag/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,21 @@ def evaluate_run(
""" # noqa
}

eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
# Use a longer-context LLM to check documents
faithfulness_eval_llm = ChatOpenAI(
model="gpt-4-1106-preview", temperature=0.0, model_kwargs={"seed": 42}
)

RAG_EVALUATION = RunEvalConfig(
evaluators=[
RunEvalConfig.LabeledScoreString(
criteria=_ACCURACY_CRITERION, llm=eval_llm, normalize_by=10.0
),
RunEvalConfig.EmbeddingDistance(),
],
custom_evaluators=[FaithfulnessEvaluator(llm=faithfulness_eval_llm)],
)
def get_eval_config() -> RunEvalConfig:
"""Returns the evaluator for the environment."""
eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0, model_kwargs={"seed": 42})
# Use a longer-context LLM to check documents
faithfulness_eval_llm = ChatOpenAI(
model="gpt-4-1106-preview", temperature=0.0, model_kwargs={"seed": 42}
)

return RunEvalConfig(
evaluators=[
RunEvalConfig.LabeledScoreString(
criteria=_ACCURACY_CRITERION, llm=eval_llm, normalize_by=10.0
),
RunEvalConfig.EmbeddingDistance(),
],
custom_evaluators=[FaithfulnessEvaluator(llm=faithfulness_eval_llm)],
)
5 changes: 5 additions & 0 deletions langchain_benchmarks/rag/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from langchain_benchmarks.rag.tasks.langchain_docs.task import LANGCHAIN_DOCS_TASK


# Please keep this sorted
__all__ = ["LANGCHAIN_DOCS_TASK"]
8 changes: 8 additions & 0 deletions langchain_benchmarks/rag/tasks/langchain_docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from langchain_benchmarks.rag.tasks.langchain_docs import architectures, indexing
from langchain_benchmarks.rag.tasks.langchain_docs.task import LANGCHAIN_DOCS_TASK

DATASET_ID = (
"452ccafc-18e1-4314-885b-edd735f17b9d" # ID of public LangChain Docs dataset
)

__all__ = ["architectures", "indexing", "DATASET_ID", "LANGCHAIN_DOCS_TASK"]
Loading
Loading