Add a new asset for XNLI with Arabic prompt (#183)

* Created XNLI_CGPT4_ZeroShot_Arabic_instruct.py * Update asset with metadata and remove obsolete code --------- Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
qcri · Oct 2, 2023 · dab6f19 · dab6f19
1 parent 5843d54
commit dab6f19
Showing 1 changed file with 10 additions and 14 deletions.
diff --git a/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py b/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
@@ -7,8 +7,8 @@ def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Accuracy": "0.753"},
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. Uses an prompt specified in Arabic.",
+        "scores": {"Accuracy": "0.740"},
     }
 
 
@@ -17,30 +17,26 @@ def config():
         "dataset": XNLIDataset,
         "task": XNLITask,
         "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
     }
 
 
 def prompt(input_sample):
     sent1, sent2 = input_sample.split("\t")
-
-    prompt_text = "You are provided with a premise and a hypothesis. Your task is to classify the hypothesis as either true (entailing the premise), false (contradicting the premise), or unknown (neutral) based on the given premise. The output should only be exactly one of three labels: true, false or unknown."
+    prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف."
     prompt_text = (
         prompt_text
-        + "\nPremise: "
+        + "\nالفرضية المسبقة: "
         + sent1
-        + "\nHypothesis: "
+        + "\nالفرضية اللاحقة: "
         + sent2
         + "\n"
-        + "label: "
+        + "التصنيف: "
     )
 
     return [
         {
             "role": "system",
-            "content": "You are an expert in Arabic language understanding.",
+            "content": "أنت خبير في فهم اللغة العربية.",
         },
         {
             "role": "user",
@@ -53,11 +49,11 @@ def post_process(response):
     input_label = response["choices"][0]["message"]["content"]
     input_label = input_label.replace(".", "").strip().lower()
 
-    if "neutral" in input_label or "unknown" in input_label:
+    if "غير معروف" in input_label or "حيادي" in input_label:
         pred_label = "neutral"
-    elif "true" in input_label or "entailment" in input_label:
+    elif "صحيح" in input_label or "تدل" in input_label:
         pred_label = "entailment"
-    elif "false" in input_label or "contradiction" in input_label:
+    elif "خطأ" in input_label or "تناقض" in input_label:
         pred_label = "contradiction"
     else:
         print(input_label)