From da354fa85ec59dec7a3d7566245d451077039702 Mon Sep 17 00:00:00 2001
From: Maram Hasanain <maramhasanain@gmail.com>
Date: Mon, 28 Aug 2023 14:52:25 +0300
Subject: [PATCH 1/2] Created DialectQADI_BLOOMZ_ZeroShot.py

---
 .../DialectQADI_BLOOMZ_ZeroShot.py            | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 assets/benchmark_v1/sequence_tagging_ner_pos_etc/DialectQADI_BLOOMZ_ZeroShot.py
diff --git a/assets/benchmark_v1/sequence_tagging_ner_pos_etc/DialectQADI_BLOOMZ_ZeroShot.py b/assets/benchmark_v1/sequence_tagging_ner_pos_etc/DialectQADI_BLOOMZ_ZeroShot.py
new file mode 100644
index 00000000..9f30ab4d
--- /dev/null
+++ b/assets/benchmark_v1/sequence_tagging_ner_pos_etc/DialectQADI_BLOOMZ_ZeroShot.py
@@ -0,0 +1,94 @@
+import os
+
+from llmebench.datasets import QADIDataset
+from llmebench.models import BLOOMPetalModel
+from llmebench.tasks import DialectIDTask
+
+def config():
+    return {
+        "dataset": QADIDataset,
+        "dataset_args": {},
+        "task": DialectIDTask,
+        "task_args": {},
+        "model": BLOOMPetalModel,
+        "model_args": {
+            "api_url": os.environ["API_URL"],
+            "class_labels": [
+                "EG",
+                "DZ",
+                "SD",
+                "YE",
+                "SY",
+                "TN",
+                "AE",
+                "JO",
+                "LY",
+                "PS",
+                "OM",
+                "LB",
+                "KW",
+                "QA",
+                "BH",
+                "MSA",
+                "SA",
+                "IQ",
+                "MA",
+            ],
+            "max_tries": 0,
+        },
+        "general_args": {
+            "data_path": "data/sequence_tagging_ner_pos_etc/dialect_identification/QADI_test-PalestinePS-corrected.txt",
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f'Identify the dialect of the following Arabic "text" given the following possible list of dialects: “Egyptian”, “Algerian”,  "Sudanese",  "Yemeni",  "Syrian",  “Tunisian”, "Emirati",  "Jordanian",  "Libyan",  "Palestinian",  "Omani",  “Lebanese”, “Kuwaiti”, "Qatari",  "Bahrani",  "modern standard Arabic",  "Saudi",  "Iraqi",  "Moroccan"\n\n'
+        f"text: {input_sample}\n"
+        f"label: \n"
+    )
+
+    return {
+        "prompt": prompt_string,
+    }
+
+
+def post_process(response):
+    count_label_map = {
+        "Egyptian": "EG",
+        "Algerian": "DZ",
+        "Sudanese": "SD",
+        "Yemeni": "YE",
+        "Syrian": "SY",
+        "Tunisian": "TN",
+        "Emirati": "AE",
+        "Jordanian": "JO",
+        "Libyan": "LY",
+        "Palestinian": "PS",
+        "Omani": "OM",
+        "Lebanese": "LB",
+        "Kuwaiti": "KW",
+        "Qatari": "QA",
+        "Bahrani": "BH",
+        "modern standard Arabic": "MSA",
+        "Modern standard Arabic": "MSA",
+        "Modern Standard Arabic": "MSA",
+        "Saudi": "SA",
+        "Iraqi": "IQ",
+        "Moroccan": "MA",
+    }
+
+    label = response["outputs"].strip()
+    label = label.replace("<s>", "")
+    label = label.replace("</s>", "")
+    label = label.replace("Dialect: ", "").replace("dialect: ","")
+    label = label.replace("label: ", "")
+    label = label.strip()
+
+    if label in count_label_map:
+        label_fixed = count_label_map[label]
+    else:
+        label_fixed = None
+
+    return label_fixed

From 5306c0d8401f461dcacbd466361cd7bdb26acffc Mon Sep 17 00:00:00 2001
From: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
Date: Thu, 7 Sep 2023 13:13:15 +0300
Subject: [PATCH 2/2] Code cleanup

---
 ...lectQADI_BLOOMZ_ZeroShot.py => QADI_BLOOMZ_ZeroShot.py} | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
 rename assets/ar/sequence_tagging_and_information_extraction/dialect_identification/{DialectQADI_BLOOMZ_ZeroShot.py => QADI_BLOOMZ_ZeroShot.py} (94%)

diff --git a/assets/ar/sequence_tagging_and_information_extraction/dialect_identification/DialectQADI_BLOOMZ_ZeroShot.py b/assets/ar/sequence_tagging_and_information_extraction/dialect_identification/QADI_BLOOMZ_ZeroShot.py
similarity index 94%
rename from assets/ar/sequence_tagging_and_information_extraction/dialect_identification/DialectQADI_BLOOMZ_ZeroShot.py
rename to assets/ar/sequence_tagging_and_information_extraction/dialect_identification/QADI_BLOOMZ_ZeroShot.py
index 9f30ab4d..42c82607 100644
--- a/assets/ar/sequence_tagging_and_information_extraction/dialect_identification/DialectQADI_BLOOMZ_ZeroShot.py
+++ b/assets/ar/sequence_tagging_and_information_extraction/dialect_identification/QADI_BLOOMZ_ZeroShot.py
@@ -1,16 +1,17 @@
 import os
 
 from llmebench.datasets import QADIDataset
-from llmebench.models import BLOOMPetalModel
+from llmebench.models import PetalsModel
 from llmebench.tasks import DialectIDTask
 
+
 def config():
     return {
         "dataset": QADIDataset,
         "dataset_args": {},
         "task": DialectIDTask,
         "task_args": {},
-        "model": BLOOMPetalModel,
+        "model": PetalsModel,
         "model_args": {
             "api_url": os.environ["API_URL"],
             "class_labels": [
@@ -82,7 +83,7 @@ def post_process(response):
     label = response["outputs"].strip()
     label = label.replace("<s>", "")
     label = label.replace("</s>", "")
-    label = label.replace("Dialect: ", "").replace("dialect: ","")
+    label = label.replace("Dialect: ", "").replace("dialect: ", "")
     label = label.replace("label: ", "")
     label = label.strip()