diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py index 0fd76530..a21c5d98 100644 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py @@ -8,6 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "bloomz-176b (8bit quantized)", "description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.", + "scores": {"Weighted-F1": "0.749"}, } @@ -29,8 +30,7 @@ def prompt(input_sample): input_sample = arr[:1000] prompt_string = ( - f"Classify following the tweet as yes or no.\n" - f"Provide only label.\n\n" + f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n" f"tweet: {input_sample}\n" f"label: \n" ) @@ -46,13 +46,26 @@ def post_process(response): label = label.replace("", "") label = label.lower() - if label.startswith("I am unable to verify".lower()) or label.startswith( - "I am unable to categorize".lower() + if ( + label.startswith("i am unable to verify") + or label.startswith("i am unable to categorize") + or label.startswith("i cannot") + or "cannot" in label ): label_fixed = None - elif "label: incorrect" in label or "incorrect" in label or label == "no": + elif ( + "label: incorrect" in label + or "incorrect" in label + or label == "no" + or label == "لا" + ): label_fixed = "no" - elif "label: correct" in label or "correct" in label or label == "yes": + elif ( + "label: correct" in label + or "correct" in label + or "yes" in label + or "نعم" in label + ): label_fixed = "yes" else: label_fixed = None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py index 3f3a66b3..9fe2da47 100644 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py @@ -8,7 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "gpt-35-turbo (version 0301)", "description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.", - "scores": {"Weighted-F1": "0.103"}, + "scores": {"Weighted-F1": "0.393"}, } @@ -26,8 +26,8 @@ def config(): def prompt(input_sample): prompt_string = ( - f"Detect the information in the sentence as correct or incorrect. Use label as yes or no.\n\n" - f"text: {input_sample}\n" + f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n" + f"tweet: {input_sample}\n" f"label: \n" ) return { @@ -44,16 +44,27 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["text"].lower().replace(".", "").lower() - if label.startswith("I am unable to verify".lower()) or label.startswith( - "I am unable to categorize".lower() + if ( + label.startswith("i am unable to verify") + or label.startswith("i am unable to categorize") + or label.startswith("i cannot") + or "cannot" in label ): label_fixed = None - elif "incorrect" in label or "label: no" in label: + elif ( + "label: incorrect" in label + or "incorrect" in label + or label == "no" + or label == "لا" + ): label_fixed = "no" - elif "correct" in label or "label: yes" in label: + elif ( + "label: correct" in label + or "correct" in label + or "yes" in label + or "نعم" in label + ): label_fixed = "yes" - elif "no" == label or "yes" == label: - label_fixed = label else: label_fixed = None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py index a294c5d0..6510c4e0 100644 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py @@ -8,7 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "gpt-4-32k (version 0314)", "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.", - "scores": {"Weighted-F1": "0.497"}, + "scores": {"Weighted-F1": "0.491"}, } @@ -49,11 +49,11 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = f'Annotate the "tweet" into one of the following categories: yes or no. Provide only label.' + base_prompt = f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no." return [ { "role": "system", - "content": "You are a social media expert, a fact-checker and you can annotate tweets.", + "content": "You are an expert fact-checker.", }, { "role": "user", @@ -63,20 +63,28 @@ def prompt(input_sample, examples): def post_process(response): - label = response["choices"][0]["message"]["content"] + label = response["choices"][0]["message"]["content"].lower() if ( + label.startswith("i am unable to verify") + or label.startswith("i am unable to categorize") + or label.startswith("i cannot") + or "cannot" in label + ): + # print(label) + label_fixed = None + elif ( "label: incorrect" in label or "incorrect" in label or label == "no" - or "label: no" in label + or label == "لا" ): label_fixed = "no" elif ( "label: correct" in label or "correct" in label - or label == "yes" - or "label: yes" in label + or "yes" in label + or "نعم" in label ): label_fixed = "yes" else: diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py index ea0c0689..f90cb347 100644 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py @@ -8,7 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "gpt-4-32k (version 0314)", "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.", - "scores": {"Weighted-F1": "0.372"}, + "scores": {"Weighted-F1": "0.485"}, } @@ -19,21 +19,21 @@ def config(): "model": OpenAIModel, "model_args": { "class_labels": ["yes", "no"], - "max_tries": 30, + "max_tries": 3, }, } def prompt(input_sample): prompt_string = ( - f'Annotate the "tweet" into one of the following categories: correct or incorrect\n\n' + f"Does the following tweet contain a factually correct claim or not? Answer only by yes or no.\n\n" f"tweet: {input_sample}\n" f"label: \n" ) return [ { "role": "system", - "content": "You are a social media expert, a fact-checker and you can annotate tweets.", # You are capable of identifying and annotating tweets correct or incorrect + "content": "You are an expert fact-checker.", # You are capable of identifying and annotating tweets correct or incorrect }, { "role": "user", @@ -43,15 +43,29 @@ def prompt(input_sample): def post_process(response): - label = response["choices"][0]["message"]["content"] + label = response["choices"][0]["message"]["content"].lower() - if label.startswith("I am unable to verify".lower()) or label.startswith( - "I am unable to categorize".lower() + if ( + label.startswith("i am unable to verify") + or label.startswith("i am unable to categorize") + or label.startswith("i cannot") + or "cannot" in label ): + # print(label) label_fixed = None - elif "label: incorrect" in label or "incorrect" in label: + elif ( + "label: incorrect" in label + or "incorrect" in label + or label == "no" + or label == "لا" + ): label_fixed = "no" - elif "label: correct" in label or "correct" in label: + elif ( + "label: correct" in label + or "correct" in label + or "yes" in label + or "نعم" in label + ): label_fixed = "yes" else: label_fixed = None