diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py
index 05f9fd09..ab41d36f 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py
@@ -1,88 +1,88 @@
-import re
-
-from llmebench.datasets import ArProBinaryDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import ArProTask
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
-        "scores": {"Micro-F1": "0.592"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProBinaryDataset,
-        "task": ArProTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample, examples):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
-        f"Below you will find a few examples that can help you to understand:\n\n"
-    )
-
-    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": fs_prompt,
-        },
-    ]
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt
-    for index, example in enumerate(examples):
-        sent = example["input"]
-        label = example["label"]
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "text: "
-            + sent
-            + "\nlabel: "
-            + label
-            + "\n\n"
-        )
-
-    out_prompt = (
-        out_prompt
-        + "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
-    )
-    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
-
-    return out_prompt
-
-
-def post_process(response):
-    input_label = response["choices"][0]["message"]["content"]
-    pred_label = input_label.replace(".", "").strip().lower()
-
-    pred_label = pred_label.replace("label:", "").strip()
-
-    if "true" == pred_label:
-        pred_label = "true"
-
-    elif "false" == pred_label:
-        pred_label = "false"
-    else:
-        pred_label = None
-
-    return pred_label
+import re
+
+from llmebench.datasets import ArProBinaryDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.592"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProBinaryDataset,
+        "task": ArProTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"Below you will find a few examples that can help you to understand:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        label = example["label"]
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + label
+            + "\n\n"
+        )
+
+    out_prompt = (
+        out_prompt
+        + "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    pred_label = input_label.replace(".", "").strip().lower()
+
+    pred_label = pred_label.replace("label:", "").strip()
+
+    if "true" == pred_label:
+        pred_label = "true"
+
+    elif "false" == pred_label:
+        pred_label = "false"
+    else:
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py
index 854f03d8..459055b5 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py
@@ -1,86 +1,86 @@
-import re
-
-from llmebench.datasets import ArProBinaryDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import ArProTask
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Micro-F1": "0.526"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProBinaryDataset,
-        "task": ArProTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
-        f"text: {input_sample}\n"
-        f"label: \n"
-    )
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert fact checker.",
-        },
-        {
-            "role": "user",
-            "content": prompt_text,
-        },
-    ]
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt
-    for example in examples:
-        sent = example["input"]
-        label = example["label"]
-
-        out_prompt = (
-            out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n"
-        )
-
-    # Append the sentence we want the model to predict for but leave the Label blank
-    out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n"
-
-    # print("=========== FS Prompt =============\n")
-    # print(out_prompt)
-
-    return out_prompt
-
-
-def post_process(response):
-    input_label = response["choices"][0]["message"]["content"]
-    input_label = input_label.replace(".", "").strip().lower()
-
-    if (
-        "true" in input_label
-        or "label: 1" in input_label
-        or "label: yes" in input_label
-    ):
-        pred_label = "true"
-    elif (
-        "false" in input_label
-        or "label: 0" in input_label
-        or "label: no" in input_label
-    ):
-        pred_label = "false"
-    else:
-        print("label problem!! " + input_label)
-        pred_label = None
-
-    return pred_label
+import re
+
+from llmebench.datasets import ArProBinaryDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.526"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProBinaryDataset,
+        "task": ArProTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
+        f"text: {input_sample}\n"
+        f"label: \n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert fact checker.",
+        },
+        {
+            "role": "user",
+            "content": prompt_text,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for example in examples:
+        sent = example["input"]
+        label = example["label"]
+
+        out_prompt = (
+            out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "true" in input_label
+        or "label: 1" in input_label
+        or "label: yes" in input_label
+    ):
+        pred_label = "true"
+    elif (
+        "false" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+    ):
+        pred_label = "false"
+    else:
+        print("label problem!! " + input_label)
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py
index f8f74b16..3319a4c3 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py
@@ -1,119 +1,119 @@
-import re
-
-from llmebench.datasets import ArProCoarseDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
-        "scores": {"Micro-F1": "0.587"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProCoarseDataset,
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample, examples):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
-        f"The following coarse-grained propaganda techniques is defined based on their appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicate coarse-grained techniques and right side indicate fine-grained techniques.\n\n"
-        f"no_technique = ['no propaganda']\n"
-        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
-        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
-        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
-        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
-        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
-        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
-        f"Below you will find a few examples of text with coarse-grained propaganda techniques:\n\n"
-    )
-
-    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": fs_prompt,
-        },
-    ]
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt
-    for index, example in enumerate(examples):
-        sent = example["input"]
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "text: "
-            + sent
-            + "\nlabel: "
-            + tech_str
-            + "\n\n"
-        )
-
-    out_prompt = out_prompt + (
-        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
-    )
-    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
-
-    return out_prompt
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    # pred_label = eval(label)
-
-    labels = []
-
-    response = [
-        s.strip().replace("'", "").replace("[", "").replace("]", "")
-        for s in label.split(",")
-        if len(s) > 1
-    ]
-
-    # print(response)
-    for label in response:
-        label = label.lower()
-        if "manipulative" in label:
-            labels.append("Manipulative_Wording")
-        if "call" in label:
-            labels.append("Call")
-        if "reputation" in label:
-            labels.append("Reputation")
-        if "technique" in label or "propaganda" in label:
-            labels.append("no_technique")
-        if "justification" in label:
-            labels.append("Justification")
-        if "simplification" in label:
-            labels.append("Simplification")
-        if "distraction" in label:
-            labels.append("Distraction")
-
-    if len(labels) == 0:
-        labels.append("no_technique")
-    if len(labels) > 1 and "no_technique" in labels:
-        labels.remove("no_technique")
-
-    return labels
+import re
+
+from llmebench.datasets import ArProCoarseDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.587"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProCoarseDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"The following coarse-grained propaganda techniques is defined based on their appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicate coarse-grained techniques and right side indicate fine-grained techniques.\n\n"
+        f"no_technique = ['no propaganda']\n"
+        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
+        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
+        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
+        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
+        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
+        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
+        f"Below you will find a few examples of text with coarse-grained propaganda techniques:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + tech_str
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    # pred_label = eval(label)
+
+    labels = []
+
+    response = [
+        s.strip().replace("'", "").replace("[", "").replace("]", "")
+        for s in label.split(",")
+        if len(s) > 1
+    ]
+
+    # print(response)
+    for label in response:
+        label = label.lower()
+        if "manipulative" in label:
+            labels.append("Manipulative_Wording")
+        if "call" in label:
+            labels.append("Call")
+        if "reputation" in label:
+            labels.append("Reputation")
+        if "technique" in label or "propaganda" in label:
+            labels.append("no_technique")
+        if "justification" in label:
+            labels.append("Justification")
+        if "simplification" in label:
+            labels.append("Simplification")
+        if "distraction" in label:
+            labels.append("Distraction")
+
+    if len(labels) == 0:
+        labels.append("no_technique")
+    if len(labels) > 1 and "no_technique" in labels:
+        labels.remove("no_technique")
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py
index 94cecc25..47c538fa 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py
@@ -1,90 +1,90 @@
-from llmebench.datasets import ArProCoarseDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Micro-F1": "0.540"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProCoarseDataset,
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
-        f"The following coarse-grained propaganda techniques are defined based on the appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicates coarse-grained techniques and right side indicates fine-grained techniques.\n\n"
-        f"no_technique = ['no propaganda']\n"
-        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
-        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
-        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
-        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
-        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
-        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
-    )
-    out_prompt = prompt_text + (
-        f"Based on the instructions above analyze the following text and provide only coarse-grained propaganda techniques as a list of strings.\n\n"
-    )
-    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": out_prompt,
-        },
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    # pred_label = eval(label)
-
-    labels = []
-
-    response = [
-        s.strip().replace("'", "").replace("[", "").replace("]", "")
-        for s in label.split(",")
-        if len(s) > 1
-    ]
-
-    # print(response)
-    for label in response:
-        label = label.lower()
-        if "manipulative" in label:
-            labels.append("Manipulative_Wording")
-        if "call" in label:
-            labels.append("Call")
-        if "reputation" in label:
-            labels.append("Reputation")
-        if "technique" in label or "propaganda" in label:
-            labels.append("no_technique")
-        if "justification" in label:
-            labels.append("Justification")
-        if "simplification" in label:
-            labels.append("Simplification")
-        if "distraction" in label:
-            labels.append("Distraction")
-
-    if len(labels) == 0:
-        labels.append("no_technique")
-    if len(labels) > 1 and "no_technique" in labels:
-        labels.remove("no_technique")
-
-    return labels
+from llmebench.datasets import ArProCoarseDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.540"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProCoarseDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"The following coarse-grained propaganda techniques are defined based on the appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicates coarse-grained techniques and right side indicates fine-grained techniques.\n\n"
+        f"no_technique = ['no propaganda']\n"
+        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
+        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
+        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
+        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
+        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
+        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
+    )
+    out_prompt = prompt_text + (
+        f"Based on the instructions above analyze the following text and provide only coarse-grained propaganda techniques as a list of strings.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    # pred_label = eval(label)
+
+    labels = []
+
+    response = [
+        s.strip().replace("'", "").replace("[", "").replace("]", "")
+        for s in label.split(",")
+        if len(s) > 1
+    ]
+
+    # print(response)
+    for label in response:
+        label = label.lower()
+        if "manipulative" in label:
+            labels.append("Manipulative_Wording")
+        if "call" in label:
+            labels.append("Call")
+        if "reputation" in label:
+            labels.append("Reputation")
+        if "technique" in label or "propaganda" in label:
+            labels.append("no_technique")
+        if "justification" in label:
+            labels.append("Justification")
+        if "simplification" in label:
+            labels.append("Simplification")
+        if "distraction" in label:
+            labels.append("Distraction")
+
+    if len(labels) == 0:
+        labels.append("no_technique")
+    if len(labels) > 1 and "no_technique" in labels:
+        labels.remove("no_technique")
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py
index 8c43c895..25a420cb 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py
@@ -1,207 +1,207 @@
-import ast
-import re
-import codecs
-
-from llmebench.datasets import ArProMultilabelDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-ESCAPE_SEQUENCE_RE = re.compile(r'''
-    ( \\U........      # 8-digit hex escapes
-    | \\u....          # 4-digit hex escapes
-    | \\x..            # 2-digit hex escapes
-    | \\[0-7]{1,3}     # Octal escapes
-    | \\N\{[^}]+\}     # Unicode characters by name
-    | \\[\\'"abfnrtv]  # Single-character escapes
-    )''', re.UNICODE | re.VERBOSE)
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
-        "scores": {"Micro-F1": "0.467"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProMultilabelDataset,
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 10,
-        },
-    }
-
-
-def prompt(input_sample, examples):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
-        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , 'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
-        f"Below you will find a few examples of text with propaganda techniques:\n\n"
-    )
-
-    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": fs_prompt,
-        },
-    ]
-
-
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt
-    for index, example in enumerate(examples):
-        sent = example["input"]
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "text: "
-            + sent
-            + "\nlabel: "
-            + tech_str
-            + "\n\n"
-        )
-
-    out_prompt = out_prompt + (
-        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
-    )
-    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
-
-    return out_prompt
-
-
-def decode_escapes(s):
-    def decode_match(match):
-        return codecs.decode(match.group(0), 'unicode-escape')
-
-    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
-
-
-def fix_single_label(label):
-    label_fixed = ""
-    if "slogan" in label:
-        label_fixed = "Slogans"
-    if "loaded" in label:
-        label_fixed = "Loaded_Language"
-    if "prejudice" in label or "fear" in label or "mongering" in label:
-        label_fixed = "Appeal_to_Fear-Prejudice"
-    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
-        label_fixed = "Conversation_Killer"
-    if "calling" in label or label == "name c" or "labeling" in label:
-        label_fixed = "Name_Calling-Labeling"
-    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
-        label_fixed = "Exaggeration-Minimisation"
-    if "values" in label:
-        label_fixed = "Appeal_to_Values"
-    if "flag" in label or "wav" in label:
-        label_fixed = "Flag_Waving"
-    if "obfusc" in label or "vague" in label or "confusion" in label:
-        label_fixed = "Obfuscation-Vagueness-Confusion"
-    if "causal" in label:
-        label_fixed = "Causal_Oversimplification"
-    if "conseq" in label:
-        label_fixed = "Consequential_Oversimplification"
-    if "authority" in label:
-        label_fixed = "Appeal_to_Authority"
-    if "choice" in label or "dilemma" in label or "false" in label:
-        label_fixed = "False_Dilemma-No_Choice"
-    if "herring" in label or "irrelevant" in label:
-        label_fixed = "Red_Herring"
-    if "straw" in label or "misrepresentation" in label:
-        label_fixed = "Straw_Man"
-    if "guilt" in label or "association" in label:
-        label_fixed = "Guilt_by_Association"
-    if "questioning" in label or "reputation" in label:
-        label_fixed = "Questioning_the_Reputation"
-    if "whataboutism" in label:
-        label_fixed = "Whataboutism"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "time" in label:
-        label_fixed = "Appeal_to_Time"
-    if "popularity" in label:
-        label_fixed = "Appeal_to_Popularity"
-    if "repetition" in label:
-        label_fixed = "Repetition"
-    if "hypocrisy" in label:
-        label_fixed = "Appeal_to_Hypocrisy"
-
-    if ("no propaganda" in label or "no technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "sarcasm" in label
-            or "frustration" in label
-            or "analogy" in label
-            or "metaphor" in label
-            or "religious" in label
-            or "gratitude" in label
-            or 'no_technique' in label
-            or "technique" in label):
-        label_fixed = "no_technique"
-
-    #print(label_fixed)
-
-    return label_fixed
-
-def fix_multilabel(pred_label):
-    if "used in this text" in pred_label or "no technique" in pred_label:
-        return ["no_technique"]
-
-    labels_fixed = []
-    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
-
-
-    pred_label = decode_escapes(pred_label).replace("\'", "\"")
-    if not pred_label.startswith("["):
-        pred_label = "[" + pred_label + "]"
-    pred_label = ast.literal_eval(pred_label)
-
-    for label in pred_label:
-        label = label.strip().lower()
-        label_fixed = fix_single_label(label)
-        labels_fixed.append(label_fixed)
-
-    out_put_labels = []
-    # Remove no technique label when we have other techniques for the same text
-    if len(labels_fixed) > 1:
-        for flabel in labels_fixed:
-            if flabel != "no_technique":
-                out_put_labels.append(flabel)
-        return out_put_labels
-
-    return labels_fixed
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    labels = fix_multilabel(label)
-
-    return labels
+import ast
+import re
+import codecs
+
+from llmebench.datasets import ArProMultilabelDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.467"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProMultilabelDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , 'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+        f"Below you will find a few examples of text with propaganda techniques:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + tech_str
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    label_fixed = ""
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if ("no propaganda" in label or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    #print(label_fixed)
+
+    return label_fixed
+
+def fix_multilabel(pred_label):
+    if "used in this text" in pred_label or "no technique" in pred_label:
+        return ["no_technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
+
+
+    pred_label = decode_escapes(pred_label).replace("\'", "\"")
+    if not pred_label.startswith("["):
+        pred_label = "[" + pred_label + "]"
+    pred_label = ast.literal_eval(pred_label)
+
+    for label in pred_label:
+        label = label.strip().lower()
+        label_fixed = fix_single_label(label)
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no_technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    labels = fix_multilabel(label)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py
index 2f3c365e..384a035f 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py
@@ -1,179 +1,179 @@
-import ast
-import re
-import codecs
-
-from llmebench.datasets import ArProMultilabelDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-ESCAPE_SEQUENCE_RE = re.compile(r'''
-    ( \\U........      # 8-digit hex escapes
-    | \\u....          # 4-digit hex escapes
-    | \\x..            # 2-digit hex escapes
-    | \\[0-7]{1,3}     # Octal escapes
-    | \\N\{[^}]+\}     # Unicode characters by name
-    | \\[\\'"abfnrtv]  # Single-character escapes
-    )''', re.UNICODE | re.VERBOSE)
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
-        "scores": {"Micro-F1": "0.372"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProMultilabelDataset,
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 10,
-        },
-    }
-
-
-def prompt(input_sample):
-    prompt_text = (
-        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
-        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
-        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
-        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
-        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
-        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'. \nProvide only labels as a list of strings.\n\n"
-    )
-    out_prompt = prompt_text + "text: " + input_sample + "\nlabel: \n"
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": out_prompt,
-        },
-    ]
-
-
-def decode_escapes(s):
-    def decode_match(match):
-        return codecs.decode(match.group(0), 'unicode-escape')
-
-    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
-
-
-def fix_single_label(label):
-    label_fixed = ""
-    if "slogan" in label:
-        label_fixed = "Slogans"
-    if "loaded" in label:
-        label_fixed = "Loaded_Language"
-    if "prejudice" in label or "fear" in label or "mongering" in label:
-        label_fixed = "Appeal_to_Fear-Prejudice"
-    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
-        label_fixed = "Conversation_Killer"
-    if "calling" in label or label == "name c" or "labeling" in label:
-        label_fixed = "Name_Calling-Labeling"
-    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
-        label_fixed = "Exaggeration-Minimisation"
-    if "values" in label:
-        label_fixed = "Appeal_to_Values"
-    if "flag" in label or "wav" in label:
-        label_fixed = "Flag_Waving"
-    if "obfusc" in label or "vague" in label or "confusion" in label:
-        label_fixed = "Obfuscation-Vagueness-Confusion"
-    if "causal" in label:
-        label_fixed = "Causal_Oversimplification"
-    if "conseq" in label:
-        label_fixed = "Consequential_Oversimplification"
-    if "authority" in label:
-        label_fixed = "Appeal_to_Authority"
-    if "choice" in label or "dilemma" in label or "false" in label:
-        label_fixed = "False_Dilemma-No_Choice"
-    if "herring" in label or "irrelevant" in label:
-        label_fixed = "Red_Herring"
-    if "straw" in label or "misrepresentation" in label:
-        label_fixed = "Straw_Man"
-    if "guilt" in label or "association" in label:
-        label_fixed = "Guilt_by_Association"
-    if "questioning" in label or "reputation" in label:
-        label_fixed = "Questioning_the_Reputation"
-    if "whataboutism" in label:
-        label_fixed = "Whataboutism"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "time" in label:
-        label_fixed = "Appeal_to_Time"
-    if "popularity" in label:
-        label_fixed = "Appeal_to_Popularity"
-    if "repetition" in label:
-        label_fixed = "Repetition"
-    if "hypocrisy" in label:
-        label_fixed = "Appeal_to_Hypocrisy"
-
-    if ("no propaganda" in label or "no technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "sarcasm" in label
-            or "frustration" in label
-            or "analogy" in label
-            or "metaphor" in label
-            or "religious" in label
-            or "gratitude" in label
-            or 'no_technique' in label
-            or "technique" in label):
-        label_fixed = "no_technique"
-
-    #print(label_fixed)
-
-    return label_fixed
-
-def fix_multilabel(pred_label):
-    if "used in this text" in pred_label or "no technique" in pred_label:
-        return ["no_technique"]
-
-    labels_fixed = []
-    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
-
-
-    pred_label = decode_escapes(pred_label).replace("\'", "\"")
-    if not pred_label.startswith("["):
-        pred_label = "[" + pred_label + "]"
-    pred_label = ast.literal_eval(pred_label)
-
-    for label in pred_label:
-        label = label.strip().lower()
-        label_fixed = fix_single_label(label)
-        labels_fixed.append(label_fixed)
-
-    out_put_labels = []
-    # Remove no technique label when we have other techniques for the same text
-    if len(labels_fixed) > 1:
-        for flabel in labels_fixed:
-            if flabel != "no_technique":
-                out_put_labels.append(flabel)
-        return out_put_labels
-
-    return labels_fixed
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    labels = fix_multilabel(label)
-
-    return labels
+import ast
+import re
+import codecs
+
+from llmebench.datasets import ArProMultilabelDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.372"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProMultilabelDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'. \nProvide only labels as a list of strings.\n\n"
+    )
+    out_prompt = prompt_text + "text: " + input_sample + "\nlabel: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    label_fixed = ""
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if ("no propaganda" in label or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    #print(label_fixed)
+
+    return label_fixed
+
+def fix_multilabel(pred_label):
+    if "used in this text" in pred_label or "no technique" in pred_label:
+        return ["no_technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
+
+
+    pred_label = decode_escapes(pred_label).replace("\'", "\"")
+    if not pred_label.startswith("["):
+        pred_label = "[" + pred_label + "]"
+    pred_label = ast.literal_eval(pred_label)
+
+    for label in pred_label:
+        label = label.strip().lower()
+        label_fixed = fix_single_label(label)
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no_technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    labels = fix_multilabel(label)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py
index 179ca21a..d96c408c 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py
@@ -1,238 +1,238 @@
-import ast
-import json
-import re
-import codecs
-
-from llmebench.datasets import ArProSpanDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import ArProSpanTask
-
-ESCAPE_SEQUENCE_RE = re.compile(r'''
-    ( \\U........      # 8-digit hex escapes
-    | \\u....          # 4-digit hex escapes
-    | \\x..            # 2-digit hex escapes
-    | \\[0-7]{1,3}     # Octal escapes
-    | \\N\{[^}]+\}     # Unicode characters by name
-    | \\[\\'"abfnrtv]  # Single-character escapes
-    )''', re.UNICODE | re.VERBOSE)
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning",
-        "scores": {"Micro-F1": "0.267"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProSpanDataset,
-        "task": ArProSpanTask,
-        "task_args": {"correct_span": True},
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample, examples):
-    prompt_text = (
-        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques.\n\n"
-        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
-        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
-        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
-        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
-        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
-    )
-    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
-
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": fs_prompt,
-        },
-    ]
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\n"
-
-    for index, example in enumerate(examples):
-        sent = example["input"]
-        ex_labels = []
-
-        for l in example["label"]:
-            #print(l)
-            l.pop('par_txt',None)
-            ex_labels.append(l)
-
-        ex_labels = str(ex_labels)
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index+1)
-            + ":\n"
-            + "Paragraph: "
-            + sent
-            + "\nlabel: "
-            + ex_labels
-            + "\n\n"
-        )
-
-    out_prompt = out_prompt + (
-        f"Based on the instructions and examples above analyze the following Paragraph and answer exactly and only by returning a list of the matching labels from the aforementioned techniques and specify the start position and end position of the text span matching each technique."
-        f'Use the following template and return the results as a list of json strings  [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
-    )
-    out_prompt = out_prompt + "Paragraph: " + input_sample + "\nlabel: \n"
-
-    return out_prompt
-
-
-def decode_escapes(s):
-    def decode_match(match):
-        return codecs.decode(match.group(0), 'unicode-escape')
-
-    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
-
-
-def fix_single_label(label):
-    if "slogan" in label:
-        label_fixed = "Slogans"
-    if "loaded" in label:
-        label_fixed = "Loaded_Language"
-    if "prejudice" in label or "fear" in label or "mongering" in label:
-        label_fixed = "Appeal_to_Fear-Prejudice"
-    if (
-            "terminating" in label
-            or "thought" in label
-            or "conversation" in label
-            or "killer" in label
-    ):
-        label_fixed = "Conversation_Killer"
-    if "calling" in label or label == "name c" or "labeling" in label:
-        label_fixed = "Name_Calling-Labeling"
-    if (
-            "minimisation" in label
-            or label == "exaggeration minim"
-            or "exaggeration" in label
-    ):
-        label_fixed = "Exaggeration-Minimisation"
-    if "values" in label:
-        label_fixed = "Appeal_to_Values"
-    if "flag" in label or "wav" in label:
-        label_fixed = "Flag_Waving"
-    if "obfusc" in label or "vague" in label or "confusion" in label:
-        label_fixed = "Obfuscation-Vagueness-Confusion"
-    if "causal" in label:
-        label_fixed = "Causal_Oversimplification"
-    if "conseq" in label:
-        label_fixed = "Consequential_Oversimplification"
-    if "authority" in label:
-        label_fixed = "Appeal_to_Authority"
-    if "choice" in label or "dilemma" in label or "false" in label:
-        label_fixed = "False_Dilemma-No_Choice"
-    if "herring" in label or "irrelevant" in label:
-        label_fixed = "Red_Herring"
-    if "straw" in label or "misrepresentation" in label:
-        label_fixed = "Straw_Man"
-    if "guilt" in label or "association" in label:
-        label_fixed = "Guilt_by_Association"
-    if "questioning" in label or "reputation" in label:
-        label_fixed = "Questioning_the_Reputation"
-    if "whataboutism" in label:
-        label_fixed = "Whataboutism"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "time" in label:
-        label_fixed = "Appeal_to_Time"
-    if "popularity" in label:
-        label_fixed = "Appeal_to_Popularity"
-    if "repetition" in label:
-        label_fixed = "Repetition"
-    if "hypocrisy" in label:
-        label_fixed = "Appeal_to_Hypocrisy"
-
-    if (
-            "no propaganda" in label
-            or "no technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "sarcasm" in label
-            or "frustration" in label
-            or "analogy" in label
-            or "metaphor" in label
-            or "religious" in label
-            or "gratitude" in label
-            or 'no_technique' in label
-            or "technique" in label):
-        label_fixed = "no_technique"
-
-    return label_fixed
-
-
-def fix_span(prediction):
-    # print(prediction)
-    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
-        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
-
-    # print(prediction)
-
-    if "provide the paragraph" in prediction: return []
-
-    try:
-        pred_labels = ast.literal_eval(prediction)
-    except:
-        # print("ERRORRR!")
-        pred_labels = json.loads(prediction)
-
-    # print(pred_labels)
-
-    # print(prediction)
-    format_pred_label = []
-    for i, label in enumerate(pred_labels):
-        if 'technique' not in label or 'start' not in label or 'end' not in label \
-                or "text" not in label or len(label["text"]) < 2:
-            continue
-
-        label['technique'] = label['technique'].strip().lower()
-        label['technique'] = fix_single_label(label['technique'])
-
-        format_pred_label.append(label)
-
-    if len(format_pred_label) == 0:
-        return []
-
-    final_labels = []
-    for pred_label in format_pred_label:
-        if pred_label['technique'] != "no_technique":
-            final_labels.append(pred_label)
-
-    return final_labels
-
-
-def post_process(response):
-    labels = response["choices"][0]["message"]["content"].lower().replace("label: ", "").strip()
-    labels = fix_span(labels)
-
-    return labels
+import ast
+import json
+import re
+import codecs
+
+from llmebench.datasets import ArProSpanDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProSpanTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning",
+        "scores": {"Micro-F1": "0.267"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProSpanDataset,
+        "task": ArProSpanTask,
+        "task_args": {"correct_span": True},
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+    )
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        ex_labels = []
+
+        for l in example["label"]:
+            #print(l)
+            l.pop('par_txt',None)
+            ex_labels.append(l)
+
+        ex_labels = str(ex_labels)
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index+1)
+            + ":\n"
+            + "Paragraph: "
+            + sent
+            + "\nlabel: "
+            + ex_labels
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following Paragraph and answer exactly and only by returning a list of the matching labels from the aforementioned techniques and specify the start position and end position of the text span matching each technique."
+        f'Use the following template and return the results as a list of json strings  [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    )
+    out_prompt = out_prompt + "Paragraph: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if (
+            "terminating" in label
+            or "thought" in label
+            or "conversation" in label
+            or "killer" in label
+    ):
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if (
+            "minimisation" in label
+            or label == "exaggeration minim"
+            or "exaggeration" in label
+    ):
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if (
+            "no propaganda" in label
+            or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    return label_fixed
+
+
+def fix_span(prediction):
+    # print(prediction)
+    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
+        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
+
+    # print(prediction)
+
+    if "provide the paragraph" in prediction: return []
+
+    try:
+        pred_labels = ast.literal_eval(prediction)
+    except:
+        # print("ERRORRR!")
+        pred_labels = json.loads(prediction)
+
+    # print(pred_labels)
+
+    # print(prediction)
+    format_pred_label = []
+    for i, label in enumerate(pred_labels):
+        if 'technique' not in label or 'start' not in label or 'end' not in label \
+                or "text" not in label or len(label["text"]) < 2:
+            continue
+
+        label['technique'] = label['technique'].strip().lower()
+        label['technique'] = fix_single_label(label['technique'])
+
+        format_pred_label.append(label)
+
+    if len(format_pred_label) == 0:
+        return []
+
+    final_labels = []
+    for pred_label in format_pred_label:
+        if pred_label['technique'] != "no_technique":
+            final_labels.append(pred_label)
+
+    return final_labels
+
+
+def post_process(response):
+    labels = response["choices"][0]["message"]["content"].lower().replace("label: ", "").strip()
+    labels = fix_span(labels)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_FewShot_Explain.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot_Explain.py
similarity index 100%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_FewShot_Explain.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot_Explain.py
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py
index be678413..ae1c4060 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py
@@ -1,249 +1,249 @@
-import ast
-import json
-import re
-import codecs
-
-from llmebench.datasets import ArProSpanDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import ArProSpanTask
-
-ESCAPE_SEQUENCE_RE = re.compile(r'''
-    ( \\U........      # 8-digit hex escapes
-    | \\u....          # 4-digit hex escapes
-    | \\x..            # 2-digit hex escapes
-    | \\[0-7]{1,3}     # Octal escapes
-    | \\N\{[^}]+\}     # Unicode characters by name
-    | \\[\\'"abfnrtv]  # Single-character escapes
-    )''', re.UNICODE | re.VERBOSE)
-
-
-def metadata():
-    return {
-        "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "gpt-4-32k (version 0314)",
-        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT.",
-        "scores": {"Micro-F1": "0.117"},
-    }
-
-
-def config():
-    return {
-        "dataset": ArProSpanDataset,
-        "task": ArProSpanTask,
-        "task_args": {"correct_span": True},
-        "model": OpenAIModel,
-        "model_args": {
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    prompt_text = (
-        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques and then generate an Explanation/rationale for your predictions.\n"
-        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
-        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
-        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
-        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
-        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
-    )
-
-    out_prompt = prompt_text + (
-        f"\nBased on the instructions above analyze the following Paragraph and answer exactly and only by returning a list of "
-        f"the matching labels from the aforementioned techniques, and specify the start position and end position of the text span matching each technique, "
-        f"and for each predicted technique, return a 1-sentence long Explanation for your label."
-        f' Use the following template and return the results as a Labels list of json strings [{{"technique": ,"text": ,"start": ,"end": ,"explanation": }}]\n\n'
-    )
-    # out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nInitial Labels List: \n\n"
-
-    out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nLabels: \n\n"
-
-
-    # out_prompt = out_prompt + (
-    #     f"Based on the instructions above, and your predictions in Initial Labels List, "
-    #     f"analyze the Paragraph again and answer exactly and only by returning a list of the matching "
-    #     f"labels from the aforementioned techniques and specify the start position and end position of the "
-    #     f"text span matching each technique. Use the following template and return the results as a Final "
-    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
-    # )
-
-    # out_prompt = out_prompt + (
-    #     f"Given your predictions in Initial Labels List and the associated explanations, analyze the Paragraph again "
-    #     f"and revise your decision and make any "
-    #     f"needed corrections/updates on the predicted labels. "
-    #     f"Use the following template and return the predictions after revision as a Final "
-    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": , "explanation": }}]\n\n'
-    # )
-
-   # out_prompt = out_prompt + (
-    #f"Given your predictions in Labels list, read your explanation per prediction and revise your prediction. "
-    #f'Analyze the Paragraph AGAIN and answer exactly and only by returning a list of Final Labels as json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
-    #)
-    #
-    #out_prompt = out_prompt + "Final Labels: \n\n"
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert annotator.",
-        },
-        {
-            "role": "user",
-            "content": out_prompt,
-        },
-    ]
-
-
-def decode_escapes(s):
-    def decode_match(match):
-        return codecs.decode(match.group(0), 'unicode-escape')
-
-    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
-
-
-def fix_single_label(label):
-    if "slogan" in label:
-        label_fixed = "Slogans"
-    if "loaded" in label:
-        label_fixed = "Loaded_Language"
-    if "prejudice" in label or "fear" in label or "mongering" in label:
-        label_fixed = "Appeal_to_Fear-Prejudice"
-    if (
-            "terminating" in label
-            or "thought" in label
-            or "conversation" in label
-            or "killer" in label
-    ):
-        label_fixed = "Conversation_Killer"
-    if "calling" in label or label == "name c" or "labeling" in label:
-        label_fixed = "Name_Calling-Labeling"
-    if (
-            "minimisation" in label
-            or label == "exaggeration minim"
-            or "exaggeration" in label
-    ):
-        label_fixed = "Exaggeration-Minimisation"
-    if "values" in label:
-        label_fixed = "Appeal_to_Values"
-    if "flag" in label or "wav" in label:
-        label_fixed = "Flag_Waving"
-    if "obfusc" in label or "vague" in label or "confusion" in label:
-        label_fixed = "Obfuscation-Vagueness-Confusion"
-    if "causal" in label:
-        label_fixed = "Causal_Oversimplification"
-    if "conseq" in label:
-        label_fixed = "Consequential_Oversimplification"
-    if "authority" in label:
-        label_fixed = "Appeal_to_Authority"
-    if "choice" in label or "dilemma" in label or "false" in label:
-        label_fixed = "False_Dilemma-No_Choice"
-    if "herring" in label or "irrelevant" in label:
-        label_fixed = "Red_Herring"
-    if "straw" in label or "misrepresentation" in label:
-        label_fixed = "Straw_Man"
-    if "guilt" in label or "association" in label:
-        label_fixed = "Guilt_by_Association"
-    if "questioning" in label or "reputation" in label:
-        label_fixed = "Questioning_the_Reputation"
-    if "whataboutism" in label:
-        label_fixed = "Whataboutism"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "doubt" in label:
-        label_fixed = "Doubt"
-    if "time" in label:
-        label_fixed = "Appeal_to_Time"
-    if "popularity" in label:
-        label_fixed = "Appeal_to_Popularity"
-    if "repetition" in label:
-        label_fixed = "Repetition"
-    if "hypocrisy" in label:
-        label_fixed = "Appeal_to_Hypocrisy"
-
-    if (
-            "no propaganda" in label
-            or "no technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "sarcasm" in label
-            or "frustration" in label
-            or "analogy" in label
-            or "metaphor" in label
-            or "religious" in label
-            or "gratitude" in label
-            or 'no_technique' in label
-            or "technique" in label
-            or 'rhetorical' in label):
-        label_fixed = "no_technique"
-
-    return label_fixed
-
-
-def fix_span(prediction):
-    # print(prediction)
-    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
-        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
-
-    # print(prediction)
-
-    if "provide the paragraph" in prediction: return []
-
-    try:
-        pred_labels = ast.literal_eval(prediction)
-    except:
-        # print("ERRORRR!")
-        pred_labels = json.loads(prediction)
-
-    # print(pred_labels)
-
-    # print(prediction)
-    format_pred_label = []
-    for i, label in enumerate(pred_labels):
-        if 'technique' not in label or 'start' not in label or 'end' not in label \
-                or "text" not in label or len(label["text"]) < 2:
-            continue
-
-        label['technique'] = label['technique'].strip().lower()
-        label['technique'] = fix_single_label(label['technique'])
-
-        format_pred_label.append(label)
-
-    if len(format_pred_label) == 0:
-        return []
-
-    final_labels = []
-    for pred_label in format_pred_label:
-        if pred_label['technique'] != "no_technique":
-            final_labels.append(pred_label)
-
-    return final_labels
-
-
-def post_process(response):
-    labels = response["choices"][0]["message"]["content"].lower()
-    #labels1,labels2 = labels.split("final labels:")
-    #labels1 = labels1.replace('labels:','').split("\n")[0].strip()
-    #labels1 = fix_span(labels1)
-    #labels = fix_span(labels2)
-
-    labels = labels.replace("labels:","")
-    labels = fix_span(labels)
-
-    # if labels1 != labels:
-    #     print(labels1)
-    #     print('=' * 35)
-    #     print(labels)
-    # else:
-    #     print("=================LABELS BEFORE MATCH AFTER===================")
-
-    return labels
+import ast
+import json
+import re
+import codecs
+
+from llmebench.datasets import ArProSpanDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProSpanTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT.",
+        "scores": {"Micro-F1": "0.117"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProSpanDataset,
+        "task": ArProSpanTask,
+        "task_args": {"correct_span": True},
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques and then generate an Explanation/rationale for your predictions.\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+    )
+
+    out_prompt = prompt_text + (
+        f"\nBased on the instructions above analyze the following Paragraph and answer exactly and only by returning a list of "
+        f"the matching labels from the aforementioned techniques, and specify the start position and end position of the text span matching each technique, "
+        f"and for each predicted technique, return a 1-sentence long Explanation for your label."
+        f' Use the following template and return the results as a Labels list of json strings [{{"technique": ,"text": ,"start": ,"end": ,"explanation": }}]\n\n'
+    )
+    # out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nInitial Labels List: \n\n"
+
+    out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nLabels: \n\n"
+
+
+    # out_prompt = out_prompt + (
+    #     f"Based on the instructions above, and your predictions in Initial Labels List, "
+    #     f"analyze the Paragraph again and answer exactly and only by returning a list of the matching "
+    #     f"labels from the aforementioned techniques and specify the start position and end position of the "
+    #     f"text span matching each technique. Use the following template and return the results as a Final "
+    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    # )
+
+    # out_prompt = out_prompt + (
+    #     f"Given your predictions in Initial Labels List and the associated explanations, analyze the Paragraph again "
+    #     f"and revise your decision and make any "
+    #     f"needed corrections/updates on the predicted labels. "
+    #     f"Use the following template and return the predictions after revision as a Final "
+    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": , "explanation": }}]\n\n'
+    # )
+
+   # out_prompt = out_prompt + (
+    #f"Given your predictions in Labels list, read your explanation per prediction and revise your prediction. "
+    #f'Analyze the Paragraph AGAIN and answer exactly and only by returning a list of Final Labels as json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    #)
+    #
+    #out_prompt = out_prompt + "Final Labels: \n\n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if (
+            "terminating" in label
+            or "thought" in label
+            or "conversation" in label
+            or "killer" in label
+    ):
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if (
+            "minimisation" in label
+            or label == "exaggeration minim"
+            or "exaggeration" in label
+    ):
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if (
+            "no propaganda" in label
+            or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label
+            or 'rhetorical' in label):
+        label_fixed = "no_technique"
+
+    return label_fixed
+
+
+def fix_span(prediction):
+    # print(prediction)
+    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
+        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
+
+    # print(prediction)
+
+    if "provide the paragraph" in prediction: return []
+
+    try:
+        pred_labels = ast.literal_eval(prediction)
+    except:
+        # print("ERRORRR!")
+        pred_labels = json.loads(prediction)
+
+    # print(pred_labels)
+
+    # print(prediction)
+    format_pred_label = []
+    for i, label in enumerate(pred_labels):
+        if 'technique' not in label or 'start' not in label or 'end' not in label \
+                or "text" not in label or len(label["text"]) < 2:
+            continue
+
+        label['technique'] = label['technique'].strip().lower()
+        label['technique'] = fix_single_label(label['technique'])
+
+        format_pred_label.append(label)
+
+    if len(format_pred_label) == 0:
+        return []
+
+    final_labels = []
+    for pred_label in format_pred_label:
+        if pred_label['technique'] != "no_technique":
+            final_labels.append(pred_label)
+
+    return final_labels
+
+
+def post_process(response):
+    labels = response["choices"][0]["message"]["content"].lower()
+    #labels1,labels2 = labels.split("final labels:")
+    #labels1 = labels1.replace('labels:','').split("\n")[0].strip()
+    #labels1 = fix_span(labels1)
+    #labels = fix_span(labels2)
+
+    labels = labels.replace("labels:","")
+    labels = fix_span(labels)
+
+    # if labels1 != labels:
+    #     print(labels1)
+    #     print('=' * 35)
+    #     print(labels)
+    # else:
+    #     print("=================LABELS BEFORE MATCH AFTER===================")
+
+    return labels