From f0f20623e971b8b1c89d6749057364073c031c59 Mon Sep 17 00:00:00 2001 From: Arid Hasan Date: Tue, 2 Jan 2024 21:01:26 -0400 Subject: [PATCH] Add factuality, disinformation, harmful content assets for JAIS --- .../Adult_JAIS13b_ZeroShot.py | 46 +++++ .../CT22Attentionworthy_JAIS13b_ZeroShot.py | 73 ++++++++ .../CT22Checkworthiness_JAIS13b_ZeroShot.py | 64 +++++++ .../CT22Claim_JAIS13b_ZeroShot.py | 56 ++++++ .../ANSFactuality_JAIS13b_ZeroShot.py | 63 +++++++ .../COVID19Factuality_JAIS13b_ZeroShot.py | 55 ++++++ .../UnifiedFCFactuality_JAIS13b_ZeroShot.py | 60 +++++++ .../CT22Harmful_JAIS13b_ZeroShot.py | 70 ++++++++ .../OSACT4SubtaskB_JAIS13b_ZeroShot.py | 51 ++++++ .../OSACT4SubtaskA_JAIS13b_ZeroShot.py | 44 +++++ .../propaganda/WANLP22T3_JAIS13b_ZeroShot.py | 167 ++++++++++++++++++ .../spam/Spam_JAIS13b_ZeroShot.py | 48 +++++ .../CT23Subjectivity_JAIS13b_ZeroShot.py | 50 ++++++ 13 files changed, 847 insertions(+) create mode 100644 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py create mode 100644 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..74149e7c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py @@ -0,0 +1,46 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Given the following tweet, label it as "ADULT" or "NOT_ADULT" based on the content of the tweet.\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"].replace("label: ", "") + j = out.find(".") + if j > 0: + out = out[0:j] + return out diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..1fe36587 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py @@ -0,0 +1,73 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f'Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.lower() + .replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + ) + label = label.strip() + # label = re.sub("\s+", "_", label) + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif label == "yes_discusses_covid-19_vaccine_side_effects": + label_fixed = "yes_discusses_cure" + elif label == "yes_harmful": + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..9f267caa --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py @@ -0,0 +1,64 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + + if label == "checkworthy" or label == "Checkworthy": + label_fixed = "1" + elif label == "Not_checkworthy." or label == "not_checkworthy": + label_fixed = "0" + elif "not_checkworthy" in label or "label: not_checkworthy" in label: + label_fixed = "0" + elif "checkworthy" in label or "label: checkworthy" in label: + label_fixed = "1" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..6d36b240 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py @@ -0,0 +1,56 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"Given the following tweet, please identify if it contains a claim. If it does, annotate 'yes', if it does not, annotate 'no'\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + + if label == "yes" or label == "the sentence contains a factual claim": + label_fixed = "1" + if label == "no": + label_fixed = "0" + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..fac8db40 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py @@ -0,0 +1,63 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + "Detect whether the information in the sentence is factually true or false. " + "Answer only by true or false.\n\n" + + "Sentence: " + + input_sample + + "\nlabel: \n" + ) + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "label: 1" in input_label + or "label: yes" in input_label + ): + pred_label = "true" + elif ( + "false" in input_label + or "label: 0" in input_label + or "label: no" in input_label + ): + pred_label = "false" + else: + print("label problem!! " + input_label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..9f6965ae --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py @@ -0,0 +1,55 @@ +from llmebench.datasets import COVID19FactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": COVID19FactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["yes", "no"], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Annotate the "tweet" into one of the following categories: correct or incorrect\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + if label.startswith("I am unable to verify".lower()) or label.startswith( + "I am unable to categorize".lower() + ): + label_fixed = None + elif "label: incorrect" in label or "incorrect" in label: + label_fixed = "no" + elif "label: correct" in label or "correct" in label: + label_fixed = "yes" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..f13af603 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py @@ -0,0 +1,60 @@ +from llmebench.datasets import UnifiedFCFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": UnifiedFCFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["true", "false"], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Annotate the "text" into one of the following categories: correct or incorrect\n\n' + f"text: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + # label_fixed = label.replace("label:", "").strip() + + if ( + label.startswith("I am unable to verify".lower()) + or label.startswith("I am unable to categorize".lower()) + or label.startswith( + "I am an AI language model and I am unable to verify".lower() + ) + ): + label_fixed = None + elif "label: incorrect" in label or "incorrect" in label: + label_fixed = "false" + elif "label: correct" in label or "correct" in label: + label_fixed = "true" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..1defc2f1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py @@ -0,0 +1,70 @@ +import re + +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f'Given the following tweet, analyze its content and identify if it could potentially cause harm to an individual, society, or an organization. Annotate the tweet as "harmful" if it has the potential to cause harm, or "not_harmful" if it does not.\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + + if "label: harmful" in label: + label_fixed = "1" + elif "label: not_harmful" in label: + label_fixed = "0" + elif "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + + elif label.startswith("harmful") or label.startswith("yes"): + label_fixed = "1" + + elif ( + label.startswith("no") + or label == "label: safe" + or label == "not_harmful" + or "not harmful" in label + ): + label_fixed = "0" + else: + label = label.replace(".", "").strip().lower() + label = label.replace("label:", "").strip() + label_fixed = label + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..79d29cf9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py @@ -0,0 +1,51 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Given the following tweet, label it as "HS" or "NOT_HS" based on the content of the tweet: \n {input_sample}' + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + j = out.find(".") + if j > 0: + out = out[0:j] + + if "not_hate_speech" in out or "no_hate_speech" in out or "NOT_HS" == out: + out = "NOT_HS" + elif "hate_speech" in out or "HS" == out: + out = "HS" + else: + out = None + return out diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..a4ac941e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py @@ -0,0 +1,44 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'if the following Arabic sentence is offensive, just say "OFF", otherwise, say just "NOT_OFF" without explanation: \n {input_sample}' + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + j = out.find(".") + if j > 0: + out = out[0:j] + return out diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..5d437cdf --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py @@ -0,0 +1,167 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Label this "tweet" based on the following propaganda techniques:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"\nAnswer (only yes/no) in the following format: \n" + f"'Doubt': 'yes', " + f"'Smears': 'no', \n\n" + f"tweet: {input_sample}\n\n" + f"label: \n" + ) + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def fix_label(pred_label): + if "used in this text" in pred_label: + return ["no technique"] + + labels_fixed = [] + pred_label = pred_label.replace('"', "'").split("', '") + pred_labels = [] + + for l in pred_label: + splits = l.replace(",", "").split(":") + if len(splits) > 1 and "no" in splits[1]: + continue + pred_labels.append(splits[0].replace("'", "")) + + if len(pred_labels) == 0: + return ["no technique"] + + for label in pred_labels: + label = label.replace(".", "").strip() + label = re.sub("-", " ", label) + label = label.strip().lower() + + # Handle case of single word labels like "Smears" so we just capitalize it + label_fixed = label.capitalize() + + # print(label) + if "slogan" in label: + label_fixed = "Slogans" + if "loaded" in label: + label_fixed = "Loaded Language" + if "prejudice" in label or "fear" in label or "mongering" in label: + label_fixed = "Appeal to fear/prejudice" + if "terminating" in label or "thought" in label: + label_fixed = "Thought-terminating cliché" + if "calling" in label or label == "name c": + label_fixed = "Name calling/Labeling" + if "minimisation" in label or label == "exaggeration minim": + label_fixed = "Exaggeration/Minimisation" + if "glittering" in label: + label_fixed = "Glittering generalities (Virtue)" + if "flag" in label: + label_fixed = "Flag-waving" + if "obfuscation" in label: + label_fixed = "Obfuscation, Intentional vagueness, Confusion" + if "oversimplification" in label or "causal" in label: + label_fixed = "Causal Oversimplification" + if "authority" in label: + label_fixed = "Appeal to authority" + if "dictatorship" in label or "black" in label or "white" in label: + label_fixed = "Black-and-white Fallacy/Dictatorship" + if "herring" in label or "irrelevant" in label: + label_fixed = "Presenting Irrelevant Data (Red Herring)" + if "straw" in label or "misrepresentation" in label: + label_fixed = "Misrepresentation of Someone's Position (Straw Man)" + if "whataboutism" in label: + label_fixed = "Whataboutism" + + if ( + "no propaganda" in label + or "technique" in label + or label == "" + or label == "no" + or label == "appeal to history" + or label == "appeal to emotion" + or label == "appeal to" + or label == "appeal" + or label == "appeal to author" + or label == "emotional appeal" + or "no techn" in label + or "hashtag" in label + or "theory" in label + or "specific mention" in label + or "religious" in label + or "gratitude" in label + ): + label_fixed = "no technique" + + labels_fixed.append(label_fixed) + + out_put_labels = [] + # Remove no technique label when we have other techniques for the same text + if len(labels_fixed) > 1: + for flabel in labels_fixed: + if flabel != "no technique": + out_put_labels.append(flabel) + return out_put_labels + + return labels_fixed + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + pred_label = fix_label(label) + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..534d5fd7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py @@ -0,0 +1,48 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n {input_sample}\n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + j = out.find(".") + if j > 0: + out = out[0:j] + if "NOTADS" in out: + out = '__label__NOTADS' + elif "ADS" in out: + out = '__label__ADS' + return out diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..058fc5d8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot.py @@ -0,0 +1,50 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "JAIS-13b", + "description": "Locally hosted JAIS-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, + } + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"صنف الجملة إلى لاموضوعية أو موضوعية.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower().replace('.', '') + + if "لاموضوعية" in label: + label_fixed = "SUBJ" + elif ( + label == "موضوعية" or label == "التصنيف: موضوعية" or "التصنيف: موضوعية" in label + ): + label_fixed = "OBJ" + else: + label_fixed = None + + return label_fixed