From 491fd23743477ca21d61a95a891d9568e16721f6 Mon Sep 17 00:00:00 2001
From: Arid Hasan <arid.hasan.h@gmail.com>
Date: Tue, 2 Jan 2024 21:01:26 -0400
Subject: [PATCH 1/4] Add factuality, disinformation, harmful content assets
 for JAIS

---
 .../Adult_JAIS13b_ZeroShot.py                 |  46 +++++
 .../CT22Attentionworthy_JAIS13b_ZeroShot.py   |  73 ++++++++
 .../CT22Checkworthiness_JAIS13b_ZeroShot.py   |  64 +++++++
 .../CT22Claim_JAIS13b_ZeroShot.py             |  56 ++++++
 .../ANSFactuality_JAIS13b_ZeroShot.py         |  63 +++++++
 .../COVID19Factuality_JAIS13b_ZeroShot.py     |  55 ++++++
 .../UnifiedFCFactuality_JAIS13b_ZeroShot.py   |  60 +++++++
 .../CT22Harmful_JAIS13b_ZeroShot.py           |  70 ++++++++
 .../OSACT4SubtaskB_JAIS13b_ZeroShot.py        |  51 ++++++
 .../OSACT4SubtaskA_JAIS13b_ZeroShot.py        |  44 +++++
 .../propaganda/WANLP22T3_JAIS13b_ZeroShot.py  | 167 ++++++++++++++++++
 .../spam/Spam_JAIS13b_ZeroShot.py             |  48 +++++
 .../CT23Subjectivity_JAIS13b_ZeroShot.py      |  50 ++++++
 13 files changed, 847 insertions(+)
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
 create mode 100644 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..74149e7c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
@@ -0,0 +1,46 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Given the following tweet, label it as "ADULT" or "NOT_ADULT" based on the content of the tweet.\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"].replace("label: ", "")
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+    return out
diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..1fe36587
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
@@ -0,0 +1,73 @@
+from llmebench.datasets import CT22AttentionworthyDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AttentionworthyTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": CT22AttentionworthyDataset,
+        "task": AttentionworthyTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "yes_discusses_action_taken",
+                "harmful",
+                "yes_discusses_cure",
+                "yes_asks_question",
+                "no_not_interesting",
+                "yes_other",
+                "yes_blame_authorities",
+                "yes_contains_advice",
+                "yes_calls_for_action",
+            ],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = (
+        label.lower()
+        .replace(" - ", ", ")
+        .replace(",", "")
+        .replace(".", "")
+        .replace("label:", "")
+    )
+    label = label.strip()
+    # label = re.sub("\s+", "_", label)
+    if label.startswith("no"):
+        label_fixed = "no_not_interesting"
+    elif label == "yes_discusses_covid-19_vaccine_side_effects":
+        label_fixed = "yes_discusses_cure"
+    elif label == "yes_harmful":
+        label_fixed = "harmful"
+    elif label.startswith("yes"):
+        label_fixed = label
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..9f267caa
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
@@ -0,0 +1,64 @@
+import re
+
+from llmebench.datasets import CT22CheckworthinessDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import CheckworthinessTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": CT22CheckworthinessDataset,
+        "task": CheckworthinessTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    if label == "checkworthy" or label == "Checkworthy":
+        label_fixed = "1"
+    elif label == "Not_checkworthy." or label == "not_checkworthy":
+        label_fixed = "0"
+    elif "not_checkworthy" in label or "label: not_checkworthy" in label:
+        label_fixed = "0"
+    elif "checkworthy" in label or "label: checkworthy" in label:
+        label_fixed = "1"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..6d36b240
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
@@ -0,0 +1,56 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Given the following tweet, please identify if it contains a claim. If it does, annotate 'yes', if it does not, annotate 'no'\n\n"
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    if label == "yes" or label == "the sentence contains a factual claim":
+        label_fixed = "1"
+    if label == "no":
+        label_fixed = "0"
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..fac8db40
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
@@ -0,0 +1,63 @@
+from llmebench.datasets import ANSFactualityDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import FactualityTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": ANSFactualityDataset,
+        "task": FactualityTask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        "Detect whether the information in the sentence is factually true or false. "
+        "Answer only by true or false.\n\n"
+        + "Sentence: "
+        + input_sample
+        + "\nlabel: \n"
+    )
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "true" in input_label
+        or "label: 1" in input_label
+        or "label: yes" in input_label
+    ):
+        pred_label = "true"
+    elif (
+        "false" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+    ):
+        pred_label = "false"
+    else:
+        print("label problem!! " + input_label)
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..9f6965ae
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
@@ -0,0 +1,55 @@
+from llmebench.datasets import COVID19FactualityDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import FactualityTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": COVID19FactualityDataset,
+        "task": FactualityTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["yes", "no"],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Annotate the "tweet" into one of the following categories: correct or incorrect\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    if label.startswith("I am unable to verify".lower()) or label.startswith(
+        "I am unable to categorize".lower()
+    ):
+        label_fixed = None
+    elif "label: incorrect" in label or "incorrect" in label:
+        label_fixed = "no"
+    elif "label: correct" in label or "correct" in label:
+        label_fixed = "yes"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..f13af603
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
@@ -0,0 +1,60 @@
+from llmebench.datasets import UnifiedFCFactualityDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import FactualityTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": UnifiedFCFactualityDataset,
+        "task": FactualityTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["true", "false"],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Annotate the "text" into one of the following categories: correct or incorrect\n\n'
+        f"text: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    # label_fixed = label.replace("label:", "").strip()
+
+    if (
+        label.startswith("I am unable to verify".lower())
+        or label.startswith("I am unable to categorize".lower())
+        or label.startswith(
+            "I am an AI language model and I am unable to verify".lower()
+        )
+    ):
+        label_fixed = None
+    elif "label: incorrect" in label or "incorrect" in label:
+        label_fixed = "false"
+    elif "label: correct" in label or "correct" in label:
+        label_fixed = "true"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..1defc2f1
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
@@ -0,0 +1,70 @@
+import re
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Given the following tweet, analyze its content and identify if it could potentially cause harm to an individual, society, or an organization. Annotate the tweet as "harmful" if it has the potential to cause harm, or "not_harmful" if it does not.\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    if "label: harmful" in label:
+        label_fixed = "1"
+    elif "label: not_harmful" in label:
+        label_fixed = "0"
+    elif "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    elif label.startswith("harmful") or label.startswith("yes"):
+        label_fixed = "1"
+
+    elif (
+        label.startswith("no")
+        or label == "label: safe"
+        or label == "not_harmful"
+        or "not harmful" in label
+    ):
+        label_fixed = "0"
+    else:
+        label = label.replace(".", "").strip().lower()
+        label = label.replace("label:", "").strip()
+        label_fixed = label
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..79d29cf9
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
@@ -0,0 +1,51 @@
+from llmebench.datasets import OSACT4SubtaskBDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HateSpeechTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": OSACT4SubtaskBDataset,
+        "task": HateSpeechTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["HS", "NOT_HS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Given the following tweet, label it as "HS" or "NOT_HS" based on the content of the tweet: \n {input_sample}'
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+
+    if "not_hate_speech" in out or "no_hate_speech" in out or "NOT_HS" == out:
+        out = "NOT_HS"
+    elif "hate_speech" in out or "HS" == out:
+        out = "HS"
+    else:
+        out = None
+    return out
diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..a4ac941e
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
@@ -0,0 +1,44 @@
+from llmebench.datasets import OSACT4SubtaskADataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import OffensiveTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": OSACT4SubtaskADataset,
+        "task": OffensiveTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["OFF", "NOT_OFF"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'if the following Arabic sentence is offensive, just say "OFF", otherwise, say just "NOT_OFF" without explanation: \n {input_sample}'
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+    return out
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..5d437cdf
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
@@ -0,0 +1,167 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'Label this "tweet" based on the following propaganda techniques:\n\n'
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"\nAnswer (only yes/no) in the following format: \n"
+        f"'Doubt': 'yes', "
+        f"'Smears': 'no', \n\n"
+        f"tweet: {input_sample}\n\n"
+        f"label: \n"
+    )
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def fix_label(pred_label):
+    if "used in this text" in pred_label:
+        return ["no technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace('"', "'").split("', '")
+    pred_labels = []
+
+    for l in pred_label:
+        splits = l.replace(",", "").split(":")
+        if len(splits) > 1 and "no" in splits[1]:
+            continue
+        pred_labels.append(splits[0].replace("'", ""))
+
+    if len(pred_labels) == 0:
+        return ["no technique"]
+
+    for label in pred_labels:
+        label = label.replace(".", "").strip()
+        label = re.sub("-", " ", label)
+        label = label.strip().lower()
+
+        # Handle case of single word labels like "Smears" so we just capitalize it
+        label_fixed = label.capitalize()
+
+        # print(label)
+        if "slogan" in label:
+            label_fixed = "Slogans"
+        if "loaded" in label:
+            label_fixed = "Loaded Language"
+        if "prejudice" in label or "fear" in label or "mongering" in label:
+            label_fixed = "Appeal to fear/prejudice"
+        if "terminating" in label or "thought" in label:
+            label_fixed = "Thought-terminating cliché"
+        if "calling" in label or label == "name c":
+            label_fixed = "Name calling/Labeling"
+        if "minimisation" in label or label == "exaggeration minim":
+            label_fixed = "Exaggeration/Minimisation"
+        if "glittering" in label:
+            label_fixed = "Glittering generalities (Virtue)"
+        if "flag" in label:
+            label_fixed = "Flag-waving"
+        if "obfuscation" in label:
+            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
+        if "oversimplification" in label or "causal" in label:
+            label_fixed = "Causal Oversimplification"
+        if "authority" in label:
+            label_fixed = "Appeal to authority"
+        if "dictatorship" in label or "black" in label or "white" in label:
+            label_fixed = "Black-and-white Fallacy/Dictatorship"
+        if "herring" in label or "irrelevant" in label:
+            label_fixed = "Presenting Irrelevant Data (Red Herring)"
+        if "straw" in label or "misrepresentation" in label:
+            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
+        if "whataboutism" in label:
+            label_fixed = "Whataboutism"
+
+        if (
+            "no propaganda" in label
+            or "technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "religious" in label
+            or "gratitude" in label
+        ):
+            label_fixed = "no technique"
+
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    pred_label = fix_label(label)
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..534d5fd7
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
@@ -0,0 +1,48 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n {input_sample}\n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+    if "NOTADS" in out:
+        out = '__label__NOTADS'
+    elif "ADS" in out:
+        out = '__label__ADS'
+    return out
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
new file mode 100644
index 00000000..058fc5d8
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
@@ -0,0 +1,50 @@
+from llmebench.datasets import CT23SubjectivityDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SubjectivityTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": CT23SubjectivityDataset,
+        "task": SubjectivityTask,
+        "model": FastChatModel,
+        "general_args": {"test_split": "ar/dev"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"صنف الجملة إلى لاموضوعية أو موضوعية.\n\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower().replace('.', '')
+
+    if "لاموضوعية" in label:
+        label_fixed = "SUBJ"
+    elif (
+        label == "موضوعية" or label == "التصنيف: موضوعية" or "التصنيف: موضوعية" in label
+    ):
+        label_fixed = "OBJ"
+    else:
+        label_fixed = None
+
+    return label_fixed

From 1e077c7e6d1d3ad1279b204fdf9a3fb9aab2ca83 Mon Sep 17 00:00:00 2001
From: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
Date: Tue, 30 Jan 2024 16:03:09 +0300
Subject: [PATCH 2/4] Format code

---
 .../hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py        | 4 +---
 .../offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py | 4 +---
 .../spam/Spam_JAIS13b_ZeroShot.py                         | 8 +++-----
 .../subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py     | 2 +-
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
index 79d29cf9..03233b8e 100644
--- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
@@ -25,9 +25,7 @@ def config():
 
 
 def prompt(input_sample):
-    base_prompt = (
-        f'Given the following tweet, label it as "HS" or "NOT_HS" based on the content of the tweet: \n {input_sample}'
-    )
+    base_prompt = f'Given the following tweet, label it as "HS" or "NOT_HS" based on the content of the tweet: \n {input_sample}'
     return [
         {
             "role": "user",
diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
index a4ac941e..d56bb51e 100644
--- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
@@ -25,9 +25,7 @@ def config():
 
 
 def prompt(input_sample):
-    base_prompt = (
-        f'if the following Arabic sentence is offensive, just say "OFF", otherwise, say just "NOT_OFF" without explanation: \n {input_sample}'
-    )
+    base_prompt = f'if the following Arabic sentence is offensive, just say "OFF", otherwise, say just "NOT_OFF" without explanation: \n {input_sample}'
     return [
         {
             "role": "user",
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
index 534d5fd7..80909dfe 100644
--- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
@@ -25,9 +25,7 @@ def config():
 
 
 def prompt(input_sample):
-    base_prompt = (
-        f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n {input_sample}\n"
-    )
+    base_prompt = f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n {input_sample}\n"
     return [
         {
             "role": "user",
@@ -42,7 +40,7 @@ def post_process(response):
     if j > 0:
         out = out[0:j]
     if "NOTADS" in out:
-        out = '__label__NOTADS'
+        out = "__label__NOTADS"
     elif "ADS" in out:
-        out = '__label__ADS'
+        out = "__label__ADS"
     return out
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
index 058fc5d8..6acf4ca6 100644
--- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
@@ -36,7 +36,7 @@ def prompt(input_sample):
 
 
 def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower().replace('.', '')
+    label = response["choices"][0]["message"]["content"].lower().replace(".", "")
 
     if "لاموضوعية" in label:
         label_fixed = "SUBJ"

From 562921b2e182002144583b85a5530bc728ad3696 Mon Sep 17 00:00:00 2001
From: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
Date: Tue, 30 Jan 2024 16:40:33 +0300
Subject: [PATCH 3/4] Fix minor metadata issues and remove unused imports

---
 .../adult_content_detection/Adult_JAIS13b_ZeroShot.py    | 5 ++---
 .../CT22Attentionworthy_JAIS13b_ZeroShot.py              | 5 ++---
 .../CT22Checkworthiness_JAIS13b_ZeroShot.py              | 7 ++-----
 .../claim_detection/CT22Claim_JAIS13b_ZeroShot.py        | 5 ++---
 .../factuality/ANSFactuality_JAIS13b_ZeroShot.py         | 5 ++---
 .../factuality/COVID19Factuality_JAIS13b_ZeroShot.py     | 5 ++---
 .../factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py   | 5 ++---
 .../CT22Harmful_JAIS13b_ZeroShot.py                      | 7 ++-----
 .../hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py       | 5 ++---
 .../OSACT4SubtaskA_JAIS13b_ZeroShot.py                   | 5 ++---
 .../propaganda/WANLP22T3_JAIS13b_ZeroShot.py             | 9 ++-------
 .../spam/Spam_JAIS13b_ZeroShot.py                        | 5 ++---
 .../subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py    | 5 ++---
 13 files changed, 26 insertions(+), 47 deletions(-)

diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
index 74149e7c..faf29cdf 100644
--- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
index 1fe36587..6266751d 100644
--- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
index 9f267caa..54e10e93 100644
--- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py
@@ -1,5 +1,3 @@
-import re
-
 from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import FastChatModel
 from llmebench.tasks import CheckworthinessTask
@@ -8,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
index 6d36b240..76283eea 100644
--- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
index fac8db40..d2a4bd7f 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
index 9f6965ae..7addca53 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
index f13af603..3cba8362 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
index 1defc2f1..18de6cb1 100644
--- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py
@@ -1,5 +1,3 @@
-import re
-
 from llmebench.datasets import CT22HarmfulDataset
 from llmebench.models import FastChatModel
 from llmebench.tasks import HarmfulDetectionTask
@@ -8,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
index 03233b8e..651ea9f6 100644
--- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
index d56bb51e..b845742a 100644
--- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
index 5d437cdf..42b81aeb 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py
@@ -1,4 +1,3 @@
-import random
 import re
 
 from llmebench.datasets import WANLP22T3PropagandaDataset
@@ -6,15 +5,11 @@
 from llmebench.tasks import MultilabelPropagandaTask
 
 
-random.seed(1333)
-
-
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
index 80909dfe..e0342419 100644
--- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
index 6acf4ca6..9360df94 100644
--- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py
@@ -6,9 +6,8 @@
 def metadata():
     return {
         "author": "Arabic Language Technologies, QCRI, HBKU",
-        "model": "JAIS-13b",
-        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
-        "scores": {"Macro-F1": ""},
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
     }
 
 

From 9a717caadd340884f55f779d46885ddd14678ab2 Mon Sep 17 00:00:00 2001
From: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
Date: Tue, 30 Jan 2024 16:43:27 +0300
Subject: [PATCH 4/4] Fix paths in ANSFactuality dataset

---
 llmebench/datasets/ANSFactuality.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llmebench/datasets/ANSFactuality.py b/llmebench/datasets/ANSFactuality.py
index 72724643..e69481d9 100644
--- a/llmebench/datasets/ANSFactuality.py
+++ b/llmebench/datasets/ANSFactuality.py
@@ -26,8 +26,8 @@ def metadata():
             "link": "https://github.com/latynt/ans",
             "download_url": "https://github.com/latynt/ans/archive/refs/heads/master.zip",
             "splits": {
-                "test": "claim/test.csv",
-                "train": "claim/train.csv",
+                "test": "ans-master/data/claim/test.csv",
+                "train": "ans-master/data/claim/train.csv",
             },
             "task_type": TaskType.Classification,
             "class_labels": ["true", "false"],