From be682101d8ecc007710ff9a72fa56c284efd305f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 10 Jul 2024 22:43:40 -0700 Subject: [PATCH] hub prompts --- scripts/tool_benchmarks.py | 202 ++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 113 deletions(-) diff --git a/scripts/tool_benchmarks.py b/scripts/tool_benchmarks.py index 96f10a7..41e3523 100644 --- a/scripts/tool_benchmarks.py +++ b/scripts/tool_benchmarks.py @@ -1,6 +1,7 @@ import datetime import uuid +from langchain import hub from langchain_anthropic import ChatAnthropic from langchain_community.vectorstores import FAISS from langchain_core.example_selectors import SemanticSimilarityExampleSelector @@ -12,7 +13,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langsmith.client import Client -from langchain_benchmarks import __version__ +from langchain_benchmarks import __version__, registry from langchain_benchmarks.rate_limiting import RateLimiter from langchain_benchmarks.tool_usage.agents import StandardAgentFactory from langchain_benchmarks.tool_usage.tasks.multiverse_math import * @@ -39,6 +40,7 @@ client = Client() # Launch langsmith client for cloning datasets + experiment_uuid = uuid.uuid4().hex[:4] today = datetime.date.today().isoformat() for task in registry.tasks: @@ -77,121 +79,95 @@ ) few_shot_messages += converted_messages -few_shot_messages = [m for m in few_shot_messages if not isinstance(m, SystemMessage)] + few_shot_messages = [ + m for m in few_shot_messages if not isinstance(m, SystemMessage) + ] -few_shot_str = "" -for m in few_shot_messages: - if isinstance(m.content, list): - few_shot_str += "AI message: " - for tool_use in m.content: - if "name" in tool_use: - few_shot_str += f"Use tool {tool_use['name']}, input: {', '.join(f'{k}:{v}' for k,v in tool_use['input'].items())}" - else: - few_shot_str += tool_use["text"] - few_shot_str += "\n" - else: - if isinstance(m, HumanMessage): - few_shot_str += f"Human message: {m.content}" + few_shot_str = "" + for m in few_shot_messages: + if isinstance(m.content, list): + few_shot_str += "AI message: " + for tool_use in m.content: + if "name" in tool_use: + few_shot_str += f"Use tool {tool_use['name']}, input: {', '.join(f'{k}:{v}' for k,v in tool_use['input'].items())}" + else: + few_shot_str += tool_use["text"] + few_shot_str += "\n" else: - few_shot_str += f"AI message: {m.content}" - - few_shot_str += "\n" - - example_selector = SemanticSimilarityExampleSelector.from_examples( - examples, - OpenAIEmbeddings(), - FAISS, - k=3, - input_keys=["question"], - example_keys=["messages"], - ) - - few_shot_prompt = FewShotChatMessagePromptTemplate( - input_variables=[], - example_selector=example_selector, - example_prompt=MessagesPlaceholder("messages"), - ) - - prompts = [ - ( - ChatPromptTemplate.from_messages( - [ - ("system", "{instructions}"), - ("human", "{question}"), - MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent - ] + if isinstance(m, HumanMessage): + few_shot_str += f"Human message: {m.content}" + else: + few_shot_str += f"AI message: {m.content}" + + few_shot_str += "\n" + + example_selector = SemanticSimilarityExampleSelector.from_examples( + examples, + OpenAIEmbeddings(), + FAISS, + k=3, + input_keys=["question"], + example_keys=["messages"], + ) + + few_shot_prompt = FewShotChatMessagePromptTemplate( + input_variables=[], + example_selector=example_selector, + example_prompt=MessagesPlaceholder("messages"), + ) + + prompts = [ + ( + hub.pull("multiverse-math-no-few-shot"), + "no-few-shot", ), - "no-few-shot", - ), - ( - ChatPromptTemplate.from_messages( - [ - ( - "system", - "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ", - ), - ] - + few_shot_messages - + [ - ("human", "{question}"), - MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent - ] + ( + hub.pull("multiverse-math-few-shot-messages"), + "few-shot-messages", ), - "few-shot-message", - ), - ( - ChatPromptTemplate.from_messages( - [ - ( - "system", - "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: " - + few_shot_message, - ), - ("human", "{question}"), - MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent - ] + ( + hub.pull("multiverse-math-few-shot-str"), + "few-shot-string", ), - "few-shot-string", - ), - ( - ChatPromptTemplate.from_messages( - [ - ( - "system", - "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ", - ), - few_shot_prompt, - ("human", "{question}"), - MessagesPlaceholder("agent_scratchpad"), - ] + ( + ChatPromptTemplate.from_messages( + [ + ( + "system", + "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ", + ), + few_shot_prompt, + ("human", "{question}"), + MessagesPlaceholder("agent_scratchpad"), + ] + ), + "few-shot-semantic", ), - "few-shot-semantic", - ), - ] - - for model_name, model in tests[:-1]: - rate_limiter = RateLimiter(requests_per_second=1) - - print(f"Benchmarking {task.name} with model: {model_name}") - eval_config = task.get_eval_config() - - for prompt, prompt_name in prompts: - agent_factory = StandardAgentFactory( - task, model, prompt, rate_limiter=rate_limiter - ) - - client.run_on_dataset( - dataset_name=dataset_name, - llm_or_chain_factory=agent_factory, - evaluation=eval_config, - verbose=False, - project_name=f"{model_name}-{task.name}-{prompt_name}-{experiment_uuid}", - concurrency_level=5, - project_metadata={ - "model": model_name, - "id": experiment_uuid, - "task": task.name, - "date": today, - "langchain_benchmarks_version": __version__, - }, - ) + ] + + for model_name, model in tests[-2:-1]: + rate_limiter = RateLimiter(requests_per_second=1) + + print(f"Benchmarking {task.name} with model: {model_name}") + eval_config = task.get_eval_config() + + for prompt, prompt_name in prompts: + agent_factory = StandardAgentFactory( + task, model, prompt, rate_limiter=rate_limiter + ) + + client.run_on_dataset( + dataset_name=dataset_name, + llm_or_chain_factory=agent_factory, + evaluation=eval_config, + verbose=False, + project_name=f"{model_name}-{task.name}-{prompt_name}-{experiment_uuid}", + concurrency_level=5, + project_metadata={ + "model": model_name, + "id": experiment_uuid, + "task": task.name, + "date": today, + "langchain_benchmarks_version": __version__, + }, + )