Skip to content

Commit

Permalink
hub prompts
Browse files Browse the repository at this point in the history
  • Loading branch information
isahers1 committed Jul 11, 2024
1 parent 611408d commit be68210
Showing 1 changed file with 89 additions and 113 deletions.
202 changes: 89 additions & 113 deletions scripts/tool_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import uuid

from langchain import hub
from langchain_anthropic import ChatAnthropic
from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
Expand All @@ -12,7 +13,7 @@
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langsmith.client import Client

from langchain_benchmarks import __version__
from langchain_benchmarks import __version__, registry
from langchain_benchmarks.rate_limiting import RateLimiter
from langchain_benchmarks.tool_usage.agents import StandardAgentFactory
from langchain_benchmarks.tool_usage.tasks.multiverse_math import *
Expand All @@ -39,6 +40,7 @@

client = Client() # Launch langsmith client for cloning datasets


experiment_uuid = uuid.uuid4().hex[:4]
today = datetime.date.today().isoformat()
for task in registry.tasks:
Expand Down Expand Up @@ -77,121 +79,95 @@
)
few_shot_messages += converted_messages

few_shot_messages = [m for m in few_shot_messages if not isinstance(m, SystemMessage)]
few_shot_messages = [
m for m in few_shot_messages if not isinstance(m, SystemMessage)
]

few_shot_str = ""
for m in few_shot_messages:
if isinstance(m.content, list):
few_shot_str += "AI message: "
for tool_use in m.content:
if "name" in tool_use:
few_shot_str += f"Use tool {tool_use['name']}, input: {', '.join(f'{k}:{v}' for k,v in tool_use['input'].items())}"
else:
few_shot_str += tool_use["text"]
few_shot_str += "\n"
else:
if isinstance(m, HumanMessage):
few_shot_str += f"Human message: {m.content}"
few_shot_str = ""
for m in few_shot_messages:
if isinstance(m.content, list):
few_shot_str += "AI message: "
for tool_use in m.content:
if "name" in tool_use:
few_shot_str += f"Use tool {tool_use['name']}, input: {', '.join(f'{k}:{v}' for k,v in tool_use['input'].items())}"
else:
few_shot_str += tool_use["text"]
few_shot_str += "\n"
else:
few_shot_str += f"AI message: {m.content}"

few_shot_str += "\n"

example_selector = SemanticSimilarityExampleSelector.from_examples(
examples,
OpenAIEmbeddings(),
FAISS,
k=3,
input_keys=["question"],
example_keys=["messages"],
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
input_variables=[],
example_selector=example_selector,
example_prompt=MessagesPlaceholder("messages"),
)

prompts = [
(
ChatPromptTemplate.from_messages(
[
("system", "{instructions}"),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
if isinstance(m, HumanMessage):
few_shot_str += f"Human message: {m.content}"
else:
few_shot_str += f"AI message: {m.content}"

few_shot_str += "\n"

example_selector = SemanticSimilarityExampleSelector.from_examples(
examples,
OpenAIEmbeddings(),
FAISS,
k=3,
input_keys=["question"],
example_keys=["messages"],
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
input_variables=[],
example_selector=example_selector,
example_prompt=MessagesPlaceholder("messages"),
)

prompts = [
(
hub.pull("multiverse-math-no-few-shot"),
"no-few-shot",
),
"no-few-shot",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ",
),
]
+ few_shot_messages
+ [
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
(
hub.pull("multiverse-math-few-shot-messages"),
"few-shot-messages",
),
"few-shot-message",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: "
+ few_shot_message,
),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
(
hub.pull("multiverse-math-few-shot-str"),
"few-shot-string",
),
"few-shot-string",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ",
),
few_shot_prompt,
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"),
]
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ",
),
few_shot_prompt,
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"),
]
),
"few-shot-semantic",
),
"few-shot-semantic",
),
]

for model_name, model in tests[:-1]:
rate_limiter = RateLimiter(requests_per_second=1)

print(f"Benchmarking {task.name} with model: {model_name}")
eval_config = task.get_eval_config()

for prompt, prompt_name in prompts:
agent_factory = StandardAgentFactory(
task, model, prompt, rate_limiter=rate_limiter
)

client.run_on_dataset(
dataset_name=dataset_name,
llm_or_chain_factory=agent_factory,
evaluation=eval_config,
verbose=False,
project_name=f"{model_name}-{task.name}-{prompt_name}-{experiment_uuid}",
concurrency_level=5,
project_metadata={
"model": model_name,
"id": experiment_uuid,
"task": task.name,
"date": today,
"langchain_benchmarks_version": __version__,
},
)
]

for model_name, model in tests[-2:-1]:
rate_limiter = RateLimiter(requests_per_second=1)

print(f"Benchmarking {task.name} with model: {model_name}")
eval_config = task.get_eval_config()

for prompt, prompt_name in prompts:
agent_factory = StandardAgentFactory(
task, model, prompt, rate_limiter=rate_limiter
)

client.run_on_dataset(
dataset_name=dataset_name,
llm_or_chain_factory=agent_factory,
evaluation=eval_config,
verbose=False,
project_name=f"{model_name}-{task.name}-{prompt_name}-{experiment_uuid}",
concurrency_level=5,
project_metadata={
"model": model_name,
"id": experiment_uuid,
"task": task.name,
"date": today,
"langchain_benchmarks_version": __version__,
},
)

0 comments on commit be68210

Please sign in to comment.