diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
index 0fd76530..a21c5d98 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
@@ -8,6 +8,7 @@ def metadata():
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "bloomz-176b (8bit quantized)",
         "description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
+        "scores": {"Weighted-F1": "0.749"},
     }
 
 
@@ -29,8 +30,7 @@ def prompt(input_sample):
         input_sample = arr[:1000]
 
     prompt_string = (
-        f"Classify following the tweet as yes or no.\n"
-        f"Provide only label.\n\n"
+        f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n"
         f"tweet: {input_sample}\n"
         f"label: \n"
     )
@@ -46,13 +46,26 @@ def post_process(response):
     label = label.replace("</s>", "")
     label = label.lower()
 
-    if label.startswith("I am unable to verify".lower()) or label.startswith(
-        "I am unable to categorize".lower()
+    if (
+        label.startswith("i am unable to verify")
+        or label.startswith("i am unable to categorize")
+        or label.startswith("i cannot")
+        or "cannot" in label
     ):
         label_fixed = None
-    elif "label: incorrect" in label or "incorrect" in label or label == "no":
+    elif (
+        "label: incorrect" in label
+        or "incorrect" in label
+        or label == "no"
+        or label == "لا"
+    ):
         label_fixed = "no"
-    elif "label: correct" in label or "correct" in label or label == "yes":
+    elif (
+        "label: correct" in label
+        or "correct" in label
+        or "yes" in label
+        or "نعم" in label
+    ):
         label_fixed = "yes"
     else:
         label_fixed = None
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py
index 3f3a66b3..9fe2da47 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py
@@ -8,7 +8,7 @@ def metadata():
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-35-turbo (version 0301)",
         "description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
-        "scores": {"Weighted-F1": "0.103"},
+        "scores": {"Weighted-F1": "0.393"},
     }
 
 
@@ -26,8 +26,8 @@ def config():
 
 def prompt(input_sample):
     prompt_string = (
-        f"Detect the information in the sentence as correct or incorrect. Use label as yes or no.\n\n"
-        f"text: {input_sample}\n"
+        f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n"
+        f"tweet: {input_sample}\n"
         f"label: \n"
     )
     return {
@@ -44,16 +44,27 @@ def prompt(input_sample):
 def post_process(response):
     label = response["choices"][0]["text"].lower().replace(".", "").lower()
 
-    if label.startswith("I am unable to verify".lower()) or label.startswith(
-        "I am unable to categorize".lower()
+    if (
+        label.startswith("i am unable to verify")
+        or label.startswith("i am unable to categorize")
+        or label.startswith("i cannot")
+        or "cannot" in label
     ):
         label_fixed = None
-    elif "incorrect" in label or "label: no" in label:
+    elif (
+        "label: incorrect" in label
+        or "incorrect" in label
+        or label == "no"
+        or label == "لا"
+    ):
         label_fixed = "no"
-    elif "correct" in label or "label: yes" in label:
+    elif (
+        "label: correct" in label
+        or "correct" in label
+        or "yes" in label
+        or "نعم" in label
+    ):
         label_fixed = "yes"
-    elif "no" == label or "yes" == label:
-        label_fixed = label
     else:
         label_fixed = None
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
index a294c5d0..6510c4e0 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
@@ -8,7 +8,7 @@ def metadata():
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-4-32k (version 0314)",
         "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
-        "scores": {"Weighted-F1": "0.497"},
+        "scores": {"Weighted-F1": "0.491"},
     }
 
 
@@ -49,11 +49,11 @@ def few_shot_prompt(input_sample, base_prompt, examples):
 
 
 def prompt(input_sample, examples):
-    base_prompt = f'Annotate the "tweet" into one of the following categories: yes or no. Provide only label.'
+    base_prompt = f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no."
     return [
         {
             "role": "system",
-            "content": "You are a social media expert, a fact-checker and you can annotate tweets.",
+            "content": "You are an expert fact-checker.",
         },
         {
             "role": "user",
@@ -63,20 +63,28 @@ def prompt(input_sample, examples):
 
 
 def post_process(response):
-    label = response["choices"][0]["message"]["content"]
+    label = response["choices"][0]["message"]["content"].lower()
 
     if (
+        label.startswith("i am unable to verify")
+        or label.startswith("i am unable to categorize")
+        or label.startswith("i cannot")
+        or "cannot" in label
+    ):
+        # print(label)
+        label_fixed = None
+    elif (
         "label: incorrect" in label
         or "incorrect" in label
         or label == "no"
-        or "label: no" in label
+        or label == "لا"
     ):
         label_fixed = "no"
     elif (
         "label: correct" in label
         or "correct" in label
-        or label == "yes"
-        or "label: yes" in label
+        or "yes" in label
+        or "نعم" in label
     ):
         label_fixed = "yes"
     else:
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
index ea0c0689..f90cb347 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
@@ -8,7 +8,7 @@ def metadata():
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "gpt-4-32k (version 0314)",
         "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Weighted-F1": "0.372"},
+        "scores": {"Weighted-F1": "0.485"},
     }
 
 
@@ -19,21 +19,21 @@ def config():
         "model": OpenAIModel,
         "model_args": {
             "class_labels": ["yes", "no"],
-            "max_tries": 30,
+            "max_tries": 3,
         },
     }
 
 
 def prompt(input_sample):
     prompt_string = (
-        f'Annotate the "tweet" into one of the following categories: correct or incorrect\n\n'
+        f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n"
         f"tweet: {input_sample}\n"
         f"label: \n"
     )
     return [
         {
             "role": "system",
-            "content": "You are a social media expert, a fact-checker and you can annotate tweets.",  # You are capable of identifying and annotating tweets correct or incorrect
+            "content": "You are an expert fact-checker.",  # You are capable of identifying and annotating tweets correct or incorrect
         },
         {
             "role": "user",
@@ -43,15 +43,29 @@ def prompt(input_sample):
 
 
 def post_process(response):
-    label = response["choices"][0]["message"]["content"]
+    label = response["choices"][0]["message"]["content"].lower()
 
-    if label.startswith("I am unable to verify".lower()) or label.startswith(
-        "I am unable to categorize".lower()
+    if (
+        label.startswith("i am unable to verify")
+        or label.startswith("i am unable to categorize")
+        or label.startswith("i cannot")
+        or "cannot" in label
     ):
+        # print(label)
         label_fixed = None
-    elif "label: incorrect" in label or "incorrect" in label:
+    elif (
+        "label: incorrect" in label
+        or "incorrect" in label
+        or label == "no"
+        or label == "لا"
+    ):
         label_fixed = "no"
-    elif "label: correct" in label or "correct" in label:
+    elif (
+        "label: correct" in label
+        or "correct" in label
+        or "yes" in label
+        or "نعم" in label
+    ):
         label_fixed = "yes"
     else:
         label_fixed = None