From dab6f19f5961124b1d9a5018e66da6667fbdf9a4 Mon Sep 17 00:00:00 2001
From: Maram Hasanain <maramhasanain@gmail.com>
Date: Mon, 2 Oct 2023 16:01:19 +0300
Subject: [PATCH] Add a new asset for XNLI with Arabic prompt (#183)

* Created XNLI_CGPT4_ZeroShot_Arabic_instruct.py

* Update asset with metadata and remove obsolete code

---------

Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
---
 assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py b/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
index 7864b1fe..47e64a19 100644
--- a/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
+++ b/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
@@ -7,8 +7,8 @@ def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Accuracy": "0.753"},
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. Uses an prompt specified in Arabic.",
+        "scores": {"Accuracy": "0.740"},
     }
 
 
@@ -17,30 +17,26 @@ def config():
         "dataset": XNLIDataset,
         "task": XNLITask,
         "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
     }
 
 
 def prompt(input_sample):
     sent1, sent2 = input_sample.split("\t")
-
-    prompt_text = "You are provided with a premise and a hypothesis. Your task is to classify the hypothesis as either true (entailing the premise), false (contradicting the premise), or unknown (neutral) based on the given premise. The output should only be exactly one of three labels: true, false or unknown."
+    prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف."
     prompt_text = (
         prompt_text
-        + "\nPremise: "
+        + "\nالفرضية المسبقة: "
         + sent1
-        + "\nHypothesis: "
+        + "\nالفرضية اللاحقة: "
         + sent2
         + "\n"
-        + "label: "
+        + "التصنيف: "
     )
 
     return [
         {
             "role": "system",
-            "content": "You are an expert in Arabic language understanding.",
+            "content": "أنت خبير في فهم اللغة العربية.",
         },
         {
             "role": "user",
@@ -53,11 +49,11 @@ def post_process(response):
     input_label = response["choices"][0]["message"]["content"]
     input_label = input_label.replace(".", "").strip().lower()
 
-    if "neutral" in input_label or "unknown" in input_label:
+    if "غير معروف" in input_label or "حيادي" in input_label:
         pred_label = "neutral"
-    elif "true" in input_label or "entailment" in input_label:
+    elif "صحيح" in input_label or "تدل" in input_label:
         pred_label = "entailment"
-    elif "false" in input_label or "contradiction" in input_label:
+    elif "خطأ" in input_label or "تناقض" in input_label:
         pred_label = "contradiction"
     else:
         print(input_label)