diff --git a/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py b/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
index 7864b1fe..47e64a19 100644
--- a/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
+++ b/assets/ar/semantics/NLI/XNLI_GPT4_ZeroShot.py
@@ -7,8 +7,8 @@ def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Accuracy": "0.753"},
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. Uses an prompt specified in Arabic.",
+        "scores": {"Accuracy": "0.740"},
     }
 
 
@@ -17,30 +17,26 @@ def config():
         "dataset": XNLIDataset,
         "task": XNLITask,
         "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
     }
 
 
 def prompt(input_sample):
     sent1, sent2 = input_sample.split("\t")
-
-    prompt_text = "You are provided with a premise and a hypothesis. Your task is to classify the hypothesis as either true (entailing the premise), false (contradicting the premise), or unknown (neutral) based on the given premise. The output should only be exactly one of three labels: true, false or unknown."
+    prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف."
     prompt_text = (
         prompt_text
-        + "\nPremise: "
+        + "\nالفرضية المسبقة: "
         + sent1
-        + "\nHypothesis: "
+        + "\nالفرضية اللاحقة: "
         + sent2
         + "\n"
-        + "label: "
+        + "التصنيف: "
     )
 
     return [
         {
             "role": "system",
-            "content": "You are an expert in Arabic language understanding.",
+            "content": "أنت خبير في فهم اللغة العربية.",
         },
         {
             "role": "user",
@@ -53,11 +49,11 @@ def post_process(response):
     input_label = response["choices"][0]["message"]["content"]
     input_label = input_label.replace(".", "").strip().lower()
 
-    if "neutral" in input_label or "unknown" in input_label:
+    if "غير معروف" in input_label or "حيادي" in input_label:
         pred_label = "neutral"
-    elif "true" in input_label or "entailment" in input_label:
+    elif "صحيح" in input_label or "تدل" in input_label:
         pred_label = "entailment"
-    elif "false" in input_label or "contradiction" in input_label:
+    elif "خطأ" in input_label or "تناقض" in input_label:
         pred_label = "contradiction"
     else:
         print(input_label)