From f4b9e9547500cd6f1082b67d80d098adc1e46708 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 09:26:01 -0400
Subject: [PATCH 1/2] x

---
 .../tool_usage/tasks/multiverse_math.py       | 37 +------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/langchain_benchmarks/tool_usage/tasks/multiverse_math.py b/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
index 671dbe1..f2861bf 100644
--- a/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
+++ b/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
@@ -128,7 +128,7 @@ def get_environment() -> ToolUsageEnvironment:
 
 
 # Source dataset used to create the public dataset in LangSmith
-DATASET_TINY = [
+DATASET = [
     {
         "question": "Add 2 and 3",
         "answer": add(2, 3),
@@ -188,9 +188,6 @@ def get_environment() -> ToolUsageEnvironment:
         "answer": divide(multiply(15, pi()), 180),
         "expected_steps": ["pi", "multiply", "divide"],
     },
-]
-
-DATASET = DATASET_TINY + [
     {
         "question": "evaluate negate(-131,778)",
         "answer": negate(-131_778),
@@ -245,38 +242,6 @@ def get_environment() -> ToolUsageEnvironment:
     },
 ]
 
-MULTIVERSE_MATH_TINY = ToolUsageTask(
-    name="Multiverse Math (Tiny)",
-    dataset_id="https://smith.langchain.com/public/594f9f60-30a0-49bf-b075-f44beabf546a/d",
-    create_environment=get_environment,
-    instructions=(
-        "You are requested to solve math questions in an alternate "
-        "mathematical universe. The operations have been altered to yield "
-        "different results than expected. Do not guess the answer or rely on your "
-        " innate knowledge of math. Use the provided tools to answer the question. "
-        "While associativity and commutativity apply, distributivity does not. Answer "
-        "the question using the fewest possible tools. Only include the numeric "
-        "response without any clarifications."
-    ),
-    description=(
-        """\
-An environment that contains a few basic math operations, but with altered results.
-
-For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. \
-The basic operations retain some basic properties, such as commutativity, \
-associativity, and distributivity; however, the results are different than expected.
-
-The objective of this task is to evaluate the ability to use the provided tools to \
-solve simple math questions and ignore any innate knowledge about math.
-
-This is a tiny version of the Multiverse Math task, with 10 examples only.
-"""
-    ),
-    eval_params={
-        "output_evaluation": "qa_math_without_question",
-    },
-)
-
 MULTIVERSE_MATH = ToolUsageTask(
     name="Multiverse Math",
     dataset_id="https://smith.langchain.com/public/47ed57bc-e852-4f84-a23e-cce4793864e9/d",

From 28b0054c304a51eb60681b4552abe1edb4c1644c Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 18 Apr 2024 09:28:43 -0400
Subject: [PATCH 2/2] x

---
 langchain_benchmarks/registration.py          |  1 -
 .../tool_usage/tasks/multiverse_math.py       | 40 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/langchain_benchmarks/registration.py b/langchain_benchmarks/registration.py
index d700208..4b272e2 100644
--- a/langchain_benchmarks/registration.py
+++ b/langchain_benchmarks/registration.py
@@ -25,7 +25,6 @@
         type_writer_26_funcs.TYPE_WRITER_26_FUNCS_TASK,
         relational_data.RELATIONAL_DATA_TASK,
         multiverse_math.MULTIVERSE_MATH,
-        multiverse_math.MULTIVERSE_MATH_TINY,
         email_task.EMAIL_EXTRACTION_TASK,
         chat_extraction.CHAT_EXTRACTION_TASK,
         LANGCHAIN_DOCS_TASK,
diff --git a/langchain_benchmarks/tool_usage/tasks/multiverse_math.py b/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
index f2861bf..0beb8c2 100644
--- a/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
+++ b/langchain_benchmarks/tool_usage/tasks/multiverse_math.py
@@ -128,7 +128,7 @@ def get_environment() -> ToolUsageEnvironment:
 
 
 # Source dataset used to create the public dataset in LangSmith
-DATASET = [
+DATASET_TINY = [
     {
         "question": "Add 2 and 3",
         "answer": add(2, 3),
@@ -188,6 +188,9 @@ def get_environment() -> ToolUsageEnvironment:
         "answer": divide(multiply(15, pi()), 180),
         "expected_steps": ["pi", "multiply", "divide"],
     },
+]
+
+DATASET = DATASET_TINY + [
     {
         "question": "evaluate negate(-131,778)",
         "answer": negate(-131_778),
@@ -242,6 +245,41 @@ def get_environment() -> ToolUsageEnvironment:
     },
 ]
 
+# Provided here for backwards compatibility, but we do not register
+# it as a task in the task registry.
+# TINY is just the multiverse math task with 10 examples instead of full dataset.
+MULTIVERSE_MATH_TINY = ToolUsageTask(
+    name="Multiverse Math (Tiny)",
+    dataset_id="https://smith.langchain.com/public/594f9f60-30a0-49bf-b075-f44beabf546a/d",
+    create_environment=get_environment,
+    instructions=(
+        "You are requested to solve math questions in an alternate "
+        "mathematical universe. The operations have been altered to yield "
+        "different results than expected. Do not guess the answer or rely on your "
+        " innate knowledge of math. Use the provided tools to answer the question. "
+        "While associativity and commutativity apply, distributivity does not. Answer "
+        "the question using the fewest possible tools. Only include the numeric "
+        "response without any clarifications."
+    ),
+    description=(
+        """\
+An environment that contains a few basic math operations, but with altered results.
+
+For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. \
+The basic operations retain some basic properties, such as commutativity, \
+associativity, and distributivity; however, the results are different than expected.
+
+The objective of this task is to evaluate the ability to use the provided tools to \
+solve simple math questions and ignore any innate knowledge about math.
+
+This is a tiny version of the Multiverse Math task, with 10 examples only.
+"""
+    ),
+    eval_params={
+        "output_evaluation": "qa_math_without_question",
+    },
+)
+
 MULTIVERSE_MATH = ToolUsageTask(
     name="Multiverse Math",
     dataset_id="https://smith.langchain.com/public/47ed57bc-e852-4f84-a23e-cce4793864e9/d",