From 5b6bf4d12f6d2f42a0b8a694238141e91c0923d8 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 24 Jan 2024 14:35:04 +0800
Subject: [PATCH] intact can be removed

---
 coml/core.py                           | 16 ++++++++++++++--
 coml/prompts/generate_fewshots_v2.json |  4 +++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/coml/core.py b/coml/core.py
index 2394113..36bd7bf 100644
--- a/coml/core.py
+++ b/coml/core.py
@@ -118,6 +118,8 @@ class CoMLAgent:
         ensemble_shuffle: Shuffle the examples in the prompt before ensemble.
         example_ranking: A model that ranks the examples. If provided, the examples
             will be ranked by the model before selecting the examples.
+        intact_instruction: Whether to instruct LLM to keep the variables unmodified.
+            For experimenting purposes only.
     """
 
     def __init__(
@@ -134,6 +136,7 @@ def __init__(
         ensemble: int | None = None,
         ensemble_shuffle: bool = True,
         example_ranking: Embeddings | None = None,
+        intact_instruction: bool = True,
     ):
         self.llm = llm
         self.prompt_version = prompt_version
@@ -145,6 +148,7 @@ def __init__(
         self.ensemble = ensemble
         self.ensemble_shuffle = ensemble_shuffle
         self.example_ranking = example_ranking
+        self.intact_instruction = intact_instruction
 
     def _fix_context_from_any_context(
         self, context: GenerateContext | FixContext, **kwargs: Any
@@ -282,9 +286,17 @@ def generate_code(
         messages: list[BaseMessage] = []
 
         if self.chain_of_thought:
-            messages.append(SystemMessage(content=GENERATE_INSTRUCTION_COT))
+            generate_instruction = GENERATE_INSTRUCTION_COT
         else:
-            messages.append(SystemMessage(content=GENERATE_INSTRUCTION))
+            generate_instruction = GENERATE_INSTRUCTION
+        if not self.intact_instruction:
+            generate_instruction = re.sub(r"- Do not overwrite or modify.*\n", "", generate_instruction)
+            for shot in fewshots:
+                if "answer_wo_intact" in shot:
+                    shot["answer"] = shot.pop("answer_wo_intact")
+                if "rationale_wo_intact" in shot:
+                    shot["rationale"] = shot.pop("rationale_wo_intact")
+        messages.append(SystemMessage(content=generate_instruction))
 
         for shot in self._select_examples(request, fewshots):
             question, answer = render_generate_context(
diff --git a/coml/prompts/generate_fewshots_v2.json b/coml/prompts/generate_fewshots_v2.json
index fbf61e8..4ce1c45 100644
--- a/coml/prompts/generate_fewshots_v2.json
+++ b/coml/prompts/generate_fewshots_v2.json
@@ -31,7 +31,9 @@
     ],
     "request": "Compare the average weight of surface products and macbook products?",
     "answer": "\n# Import pandas as it seems unimported.\nimport pandas as pd\n\n# Create a new variable to avoid unintentional modifications to raw DataFrame.\ndf_product_weight = pd.DataFrame({\n    \"product\": df[\"product\"].apply(lambda x: \"Macbook\" if \"Macbook\" in x else \"Surface\"),\n    \"weight\": df[\"weight\"].apply(lambda x: float(x.replace(\" lbs\", \"\"))),\n})\ndf_product_weight.groupby(\"product\").mean()",
-    "rationale": "To compare the average weight of Surface products and MacBook products, we'll need to:\n\n1. Extract the weight as a numerical value from the \"weight\" column.\n2. Create a new column to classify each product as either 'Surface' or 'Macbook'.\n3. Use the `groupby` function to calculate the average weight for each product group.\n\nWe need to avoid unintentional modifications to the raw DataFrame, as the request didn't ask for in-place changes to the data."
+    "rationale": "To compare the average weight of Surface products and MacBook products, we'll need to:\n\n1. Extract the weight as a numerical value from the \"weight\" column.\n2. Create a new column to classify each product as either 'Surface' or 'Macbook'.\n3. Use the `groupby` function to calculate the average weight for each product group.\n\nWe need to avoid unintentional modifications to the raw DataFrame, as the request didn't ask for in-place changes to the data.",
+    "answer_wo_intact": "# Import pandas as it seems unimported.\nimport pandas as pd\n\ndf[\"product\"] = df[\"product\"].apply(lambda x: \"Macbook\" if \"Macbook\" in x else \"Surface\")\ndf[\"weight\"] = df[\"weight\"].apply(lambda x: float(x.replace(\" lbs\", \"\")))\ndf.groupby(\"product\").mean()",
+    "rationale_wo_intact": "To compare the average weight of Surface products and MacBook products, we'll need to:\n\n1. Extract the weight as a numerical value from the \"weight\" column.\n2. Classify each product as either 'Surface' or 'Macbook'.\n3. Use the `groupby` function to calculate the average weight for each product group."
   },
   {
     "variables": {