From b3300e6e348b343fd037819705d4331f4f77daf2 Mon Sep 17 00:00:00 2001 From: MohamedBayan Date: Tue, 19 Nov 2024 14:32:47 +0300 Subject: [PATCH 1/3] Add wise-24 assets --- .../Adult_GPT4_FewShot_Arabic.py | 91 ++++++ .../Adult_GPT4_FewShot_English.py | 88 ++++++ .../Adult_GPT4_FewShot_Mixed.py | 88 ++++++ .../Adult_GPT4_ZeroShot_Arabic.py | 65 ++++ .../Adult_GPT4_ZeroShot_English.py | 65 ++++ .../Adult_GPT4_ZeroShot_Mixed.py | 64 ++++ .../Adult_JAIS13b_FewShot_Arabic.py | 78 +++++ .../Adult_JAIS13b_FewShot_English.py | 75 +++++ .../Adult_JAIS13b_FewShot_Mixed.py | 74 +++++ .../Adult_JAIS13b_ZeroShot_Arabic.py | 65 ++++ .../Adult_JAIS13b_ZeroShot_English.py | 65 ++++ .../Adult_JAIS13b_ZeroShot_Mixed.py | 64 ++++ .../Adult_Llama3-8b_FewShot_Arabic.py | 75 +++++ .../Adult_Llama3-8b_FewShot_English.py | 91 ++++++ .../Adult_Llama3-8b_FewShot_Mixed.py | 67 ++++ .../Adult_Llama3-8b_ZeroShot_Arabic.py | 60 ++++ .../Adult_Llama3-8b_ZeroShot_English.py | 61 ++++ .../Adult_Llama3-8b_ZeroShot_Mixed.py | 54 ++++ ...CT22Attentionworthy_GPT4_FewShot_Arabic.py | 140 +++++++++ ...T22Attentionworthy_GPT4_FewShot_English.py | 125 ++++++++ .../CT22Attentionworthy_GPT4_FewShot_Mixed.py | 120 ++++++++ ...T22Attentionworthy_GPT4_ZeroShot_Arabic.py | 105 +++++++ ...22Attentionworthy_GPT4_ZeroShot_English.py | 108 +++++++ ...CT22Attentionworthy_GPT4_ZeroShot_Mixed.py | 103 +++++++ ...2Attentionworthy_JAIS13b_FewShot_Arabic.py | 115 +++++++ ...Attentionworthy_JAIS13b_FewShot_English.py | 115 +++++++ ...22Attentionworthy_JAIS13b_FewShot_Mixed.py | 148 +++++++++ ...Attentionworthy_JAIS13b_ZeroShot_Arabic.py | 101 ++++++ ...ttentionworthy_JAIS13b_ZeroShot_English.py | 102 +++++++ ...2Attentionworthy_JAIS13b_ZeroShot_Mixed.py | 104 +++++++ ...ttentionworthy_Llama3-8b_FewShot_Arabic.py | 124 ++++++++ ...tentionworthy_Llama3-8b_FewShot_English.py | 118 +++++++ ...Attentionworthy_Llama3-8b_FewShot_Mixed.py | 125 ++++++++ ...tentionworthy_Llama3-8b_ZeroShot_Arabic.py | 104 +++++++ ...entionworthy_Llama3-8b_ZeroShot_English.py | 105 +++++++ ...ttentionworthy_Llama3-8b_ZeroShot_Mixed.py | 103 +++++++ ...CT22Checkworthiness_GPT4_FewShot_Arabic.py | 85 ++++++ ...T22Checkworthiness_GPT4_FewShot_English.py | 86 ++++++ .../CT22Checkworthiness_GPT4_FewShot_Mixed.py | 82 +++++ ...T22Checkworthiness_GPT4_ZeroShot_Arabic.py | 65 ++++ ...22Checkworthiness_GPT4_ZeroShot_English.py | 65 ++++ ...CT22Checkworthiness_GPT4_ZeroShot_Mixed.py | 69 +++++ ...2Checkworthiness_JAIS13b_FewShot_Arabic.py | 83 +++++ ...Checkworthiness_JAIS13b_FewShot_English.py | 79 +++++ ...22Checkworthiness_JAIS13b_FewShot_Mixed.py | 81 +++++ ...Checkworthiness_JAIS13b_ZeroShot_Arabic.py | 59 ++++ ...heckworthiness_JAIS13b_ZeroShot_English.py | 68 +++++ ...2Checkworthiness_JAIS13b_ZeroShot_Mixed.py | 57 ++++ ...heckworthiness_Llama3-8b_FewShot_Arabic.py | 89 ++++++ ...eckworthiness_Llama3-8b_FewShot_English.py | 88 ++++++ ...Checkworthiness_Llama3-8b_FewShot_Mixed.py | 86 ++++++ ...heckworthiness_Llama3-8b_ZeroShot_Mixed.py | 63 ++++ ...eckworthiness_Llama3-8b_Zeroshot_Arabic.py | 65 ++++ ...ckworthiness_Llama3-8b_Zeroshot_English.py | 63 ++++ .../CT22Claim_GPT4_FewShot_Arabic.py | 94 ++++++ .../CT22Claim_GPT4_FewShot_English.py | 90 ++++++ .../CT22Claim_GPT4_FewShot_Mixed.py | 94 ++++++ .../CT22Claim_GPT4_ZeroShot_Arabic.py | 81 +++++ .../CT22Claim_GPT4_ZeroShot_English.py | 75 +++++ .../CT22Claim_GPT4_ZeroShot_Mixed.py | 81 +++++ .../CT22Claim_JAIS13b_FewShot_Arabic.py | 70 +++++ .../CT22Claim_JAIS13b_FewShot_English.py | 84 +++++ .../CT22Claim_JAIS13b_FewShot_Mixed.py | 75 +++++ .../CT22Claim_JAIS13b_ZeroShot_Arabic.py | 68 +++++ .../CT22Claim_JAIS13b_ZeroShot_English.py | 64 ++++ .../CT22Claim_JAIS13b_ZeroShot_Mixed.py | 75 +++++ .../CT22Claim_Llama3-8b_FewShot_Arabic.py | 73 +++++ .../CT22Claim_Llama3-8b_FewShot_English.py | 78 +++++ .../CT22Claim_Llama3-8b_FewShot_Mixed.py | 69 +++++ .../CT22Claim_Llama3-8b_ZeroShot_Arabic.py | 69 +++++ .../CT22Claim_Llama3-8b_ZeroShot_English.py | 69 +++++ .../CT22Claim_Llama3-8b_ZeroShot_Mixed.py | 67 ++++ .../ANSFactuality_GPT4_FewShot_Arabic.py | 95 ++++++ .../ANSFactuality_GPT4_FewShot_English.py | 89 ++++++ .../ANSFactuality_GPT4_FewShot_Mixed.py | 93 ++++++ .../ANSFactuality_GPT4_ZeroShot_Arabic.py | 79 +++++ .../ANSFactuality_GPT4_ZeroShot_English.py | 78 +++++ .../ANSFactuality_GPT4_ZeroShot_Mixed.py | 78 +++++ .../ANSFactuality_JAIS13b_FewShot_Arabic.py | 99 ++++++ .../ANSFactuality_JAIS13b_FewShot_English.py | 91 ++++++ .../ANSFactuality_JAIS13b_FewShot_Mixed.py | 89 ++++++ .../ANSFactuality_JAIS13b_ZeroShot_Arabic.py | 76 +++++ .../ANSFactuality_JAIS13b_ZeroShot_English.py | 70 +++++ .../ANSFactuality_JAIS13b_ZeroShot_Mixed.py | 73 +++++ .../ANSFactuality_Llama3-8b_FewShot_Arabic.py | 99 ++++++ ...ANSFactuality_Llama3-8b_FewShot_English.py | 92 ++++++ .../ANSFactuality_Llama3-8b_FewShot_Mixed.py | 96 ++++++ ...ANSFactuality_Llama3-8b_ZeroShot_Arabic.py | 83 +++++ ...NSFactuality_Llama3-8b_ZeroShot_English.py | 77 +++++ .../ANSFactuality_Llama3-8b_ZeroShot_Mixed.py | 81 +++++ .../CT22Harmful_GPT4_FewShot_Arabic.py | 93 ++++++ .../CT22Harmful_GPT4_FewShot_English.py | 93 ++++++ .../CT22Harmful_GPT4_FewShot_Mixed.py | 92 ++++++ .../CT22Harmful_GPT4_ZeroShot_Arabic.py | 80 +++++ .../CT22Harmful_GPT4_ZeroShot_English.py | 80 +++++ .../CT22Harmful_GPT4_ZeroShot_Mixed.py | 79 +++++ .../CT22Harmful_JAIS13b_FewShot_Arabic.py | 69 +++++ .../CT22Harmful_JAIS13b_FewShot_English.py | 77 +++++ .../CT22Harmful_JAIS13b_FewShot_Mixed.py | 74 +++++ .../CT22Harmful_JAIS13b_ZeroShot_Arabic.py | 56 ++++ .../CT22Harmful_JAIS13b_ZeroShot_English.py | 60 ++++ .../CT22Harmful_JAIS13b_ZeroShot_Mixed.py | 54 ++++ .../CT22Harmful_Llama3-8b_FewShot_Arabic.py | 72 +++++ .../CT22Harmful_Llama3-8b_FewShot_English.py | 79 +++++ .../CT22Harmful_Llama3-8b_FewShot_Mixed.py | 65 ++++ .../CT22Harmful_Llama3-8b_ZeroShot_Arabic.py | 59 ++++ .../CT22Harmful_Llama3-8b_ZeroShot_English.py | 60 ++++ .../CT22Harmful_Llama3-8b_ZeroShot_Mixed.py | 56 ++++ .../OSACT4SubtaskB_GPT4_FewShot_Arabic.py | 83 +++++ .../OSACT4SubtaskB_GPT4_FewShot_English.py | 78 +++++ .../OSACT4SubtaskB_GPT4_FewShot_Mixed.py | 77 +++++ .../OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py | 60 ++++ .../OSACT4SubtaskB_GPT4_ZeroShot_English.py | 55 ++++ .../OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py | 58 ++++ .../OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py | 61 ++++ .../OSACT4SubtaskB_JAIS13b_FewShot_English.py | 64 ++++ .../OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py | 60 ++++ .../OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py | 60 ++++ ...OSACT4SubtaskB_JAIS13b_ZeroShot_English.py | 54 ++++ .../OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py | 48 +++ ...OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py | 71 +++++ ...SACT4SubtaskB_Llama3-8b_FewShot_English.py | 71 +++++ .../OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py | 70 +++++ ...SACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py | 58 ++++ ...ACT4SubtaskB_Llama3-8b_ZeroShot_English.py | 58 ++++ ...OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py | 57 ++++ .../OSACT4SubtaskA_GPT4_FewShot_Arabic.py | 78 +++++ .../OSACT4SubtaskA_GPT4_FewShot_English.py | 78 +++++ .../OSACT4SubtaskA_GPT4_FewShot_Mixed.py | 77 +++++ .../OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py | 55 ++++ .../OSACT4SubtaskA_GPT4_ZeroShot_English.py | 54 ++++ .../OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py | 52 ++++ .../OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py | 68 +++++ .../OSACT4SubtaskA_JAIS13b_FewShot_English.py | 60 ++++ .../OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py | 67 ++++ .../OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py | 52 ++++ ...OSACT4SubtaskA_JAIS13b_ZeroShot_English.py | 56 ++++ .../OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py | 52 ++++ ...OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py | 68 +++++ ...SACT4SubtaskA_Llama3-8b_FewShot_English.py | 64 ++++ .../OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py | 65 ++++ ...SACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py | 49 +++ ...ACT4SubtaskA_Llama3-8b_ZeroShot_English.py | 50 +++ ...OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py | 55 ++++ .../WANLP22T3_GPT4_FewShot_Arabic.py | 125 ++++++++ .../WANLP22T3_GPT4_FewShot_English.py | 199 ++++++++++++ .../WANLP22T3_GPT4_FewShot_Mixed.py | 129 ++++++++ .../WANLP22T3_GPT4_ZeroShot_Arabic.py | 106 +++++++ .../WANLP22T3_GPT4_ZeroShot_English.py | 177 +++++++++++ .../WANLP22T3_GPT4_ZeroShot_Mixed.py | 108 +++++++ .../WANLP22T3_JAIS13b_FewShot_Arabic.py | 147 +++++++++ .../WANLP22T3_JAIS13b_FewShot_English.py | 178 +++++++++++ .../WANLP22T3_JAIS13b_FewShot_Mixed.py | 120 ++++++++ .../WANLP22T3_JAIS13b_ZeroShot_Arabic.py | 152 +++++++++ .../WANLP22T3_JAIS13b_ZeroShot_English.py | 170 +++++++++++ .../WANLP22T3_JAIS13b_ZeroShot_Mixed.py | 147 +++++++++ .../WANLP22T3_Llama3-8b_FewShot_Arabic.py | 289 ++++++++++++++++++ .../WANLP22T3_Llama3-8b_FewShot_English.py | 148 +++++++++ .../WANLP22T3_Llama3-8b_FewShot_Mixed.py | 124 ++++++++ .../WANLP22T3_Llama3-8b_ZeroShot_Arabic.py | 127 ++++++++ .../WANLP22T3_Llama3-8b_ZeroShot_English.py | 197 ++++++++++++ .../WANLP22T3_Llama3-8b_ZeroShot_Mixed.py | 122 ++++++++ .../spam/Spam_GPT4_FewShot_Arabic.py | 96 ++++++ .../spam/Spam_GPT4_FewShot_English.py | 98 ++++++ .../spam/Spam_GPT4_FewShot_Mixed.py | 94 ++++++ .../spam/Spam_GPT4_ZeroShot_Arabic.py | 64 ++++ .../spam/Spam_GPT4_ZeroShot_English.py | 59 ++++ .../spam/Spam_GPT4_ZeroShot_Mixed.py | 61 ++++ .../spam/Spam_JAIS13b_FewShot_Arabic.py | 72 +++++ .../spam/Spam_JAIS13b_FewShot_English.py | 78 +++++ .../spam/Spam_JAIS13b_FewShot_Mixed.py | 68 +++++ .../spam/Spam_JAIS13b_ZeroShot_Arabic.py | 59 ++++ .../spam/Spam_JAIS13b_ZeroShot_English.py | 59 ++++ .../spam/Spam_JAIS13b_ZeroShot_Mixed.py | 59 ++++ .../spam/Spam_Llama3-8b_FewShot_Arabic.py | 87 ++++++ .../spam/Spam_Llama3-8b_FewShot_English.py | 93 ++++++ .../spam/Spam_Llama3-8b_FewShot_Mixed.py | 78 +++++ .../spam/Spam_Llama3-8b_ZeroShot_Arabic.py | 72 +++++ .../spam/Spam_Llama3-8b_ZeroShot_English.py | 63 ++++ .../spam/Spam_Llama3-8b_ZeroShot_Mixed.py | 74 +++++ .../CT23Subjectivity_GPT4_FewShot_Arabic.py | 74 +++++ .../CT23Subjectivity_GPT4_FewShot_English.py | 80 +++++ .../CT23Subjectivity_GPT4_FewShot_Mixed.py | 71 +++++ .../CT23Subjectivity_GPT4_ZeroShot_Arabic.py | 56 ++++ .../CT23Subjectivity_GPT4_ZeroShot_English.py | 58 ++++ .../CT23Subjectivity_GPT4_ZeroShot_Mixed.py | 52 ++++ ...CT23Subjectivity_JAIS13b_FewShot_Arabic.py | 80 +++++ ...T23Subjectivity_JAIS13b_FewShot_English.py | 78 +++++ .../CT23Subjectivity_JAIS13b_FewShot_Mixed.py | 75 +++++ ...23Subjectivity_JAIS13b_ZeroShot_English.py | 54 ++++ ...CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py | 46 +++ ...T23Subjectivity_Jais13b_ZeroShot_Arabic.py | 51 ++++ ...23Subjectivity_LLama3-8b_FewShot_Arabic.py | 88 ++++++ ...3Subjectivity_LLama3-8b_FewShot_English.py | 84 +++++ ...T23Subjectivity_LLama3-8b_FewShot_Mixed.py | 84 +++++ ...3Subjectivity_LLama3-8b_ZeroShot_Arabic.py | 62 ++++ ...Subjectivity_LLama3-8b_ZeroShot_English.py | 63 ++++ ...23Subjectivity_LLama3-8b_ZeroShot_Mixed.py | 62 ++++ 198 files changed, 16460 insertions(+) create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..3661556f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py @@ -0,0 +1,91 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 30, + }, + "general_args": { + "fewshot": { + "deduplicate": True, + }, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "اليك بعض الامثلة:\n\n" + for index, example in enumerate(examples): + label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير." + return [ + { + "role": "system", + "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + if not response: + return None + + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py new file mode 100755 index 00000000..f17885a9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py @@ -0,0 +1,88 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 30, + }, + "general_args": { + "fewshot": { + "deduplicate": True, + }, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.' + return [ + { + "role": "system", + "content": "You are an expert annotator, you can identify and label adult content within a tweet.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..11fd3385 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py @@ -0,0 +1,88 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 30, + }, + "general_args": { + "fewshot": { + "deduplicate": True, + }, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "اليك بعض الامثلة:\n\n" + for index, example in enumerate(examples): + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = f'بالنظر إلى التغريدة التالية، صنفها على أنها "ADULT" أو "NOT_ADULT" بناء على محتوى التغريدة' + return [ + { + "role": "system", + "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +def post_process(response): + if not response: + return None + + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..fff31f29 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,65 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_string = ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n" + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + ) + return [ + { + "role": "system", + "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..461c56a9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py @@ -0,0 +1,65 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_string = ( + # f'Classify whether the following Arabic sentence is appropriate for children. If not appropriate, just write "ADULT" without explaination, and if appropriate, just write "NOT_ADULT" without explaination \n {input_sample}' + f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are an expert annotator, you can identify and label adult content within a tweet.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..f9c9956e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,64 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + prompt_string = ( + f'هل تحتوي التغريدة التالية على لغة للبالغين؟ صنفها كـ "ADULT" أو "NOT_ADULT" بناءً على محتوى التغريدة.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وسائل التواصل، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"].replace("label: ", "") + j = out.find(".") + if j > 0: + out = out[0:j] + label = out.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..74c96636 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,78 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["للبالغين", "ليس للبالغين"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. " + "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n" + ) + for index, example in enumerate(examples): + label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" + f"التصنيف: " + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label or "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..e6f868e5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py @@ -0,0 +1,75 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label.\n\n" + ) + for index, example in enumerate(examples): + label = "adult" if example["label"] == "ADULT" else "not adult" + base_prompt += ( + f"Example {index + 1}:\n" + f"Tweet: '{example['input']}'\n" + f"Label: {label}\n\n" + ) + base_prompt += ( + f"Now classify the new tweet:\nTweet: '{input_sample}'\n" + f"Label: " + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..e4a333d8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,74 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" + ) + for index, example in enumerate(examples): + label = "adult" if example["label"] == "ADULT" else "not adult" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"صنف الآن التغريدة الجديدة:\nجملة: '{input_sample}'\n" + f"التصنيف: " + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label or "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..6df883fa --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,65 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n" + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..8dcf7898 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py @@ -0,0 +1,65 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. " + "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n" + + "Sentence: " + + input_sample + + "\nLabel: " + ), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("", "").replace("", "") + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..e36fc907 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,64 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AdultTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" + + "تغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + return None + elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + return "NOT_ADULT" + elif "غير مناسب للنشر العام" in label: + return "ADULT" + elif "not_adult" in label or "not adult" in label: + return "NOT_ADULT" + elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + return "ADULT" + elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + return "NOT_ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..1c8b080a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,75 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. " + "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n" + ) + for index, example in enumerate(examples): + label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" + f"التصنيف: " + ) + return base_prompt + + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + + +def post_process(response): + #if not response or 'error' in response or 'output' not in response: + #print("Error or missing output in response:", response) + #return "NOT_ADULT" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..dbce9c74 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py @@ -0,0 +1,91 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 30, + }, + "general_args": { + "fewshot": { + "deduplicate": True, + }, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + +def post_process(response): + + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "NOT_ADULT" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..9cf7d89a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,67 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" + ) + for index, example in enumerate(examples): + label = "adult" if example["label"] == "ADULT" else "not adult" + base_prompt += ( + f"مثال {index + 1}:\n" + f"جملة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"صنف الآن الجملة الجديدة:\nجملة: '{input_sample}'\n" + f"التصنيف: " + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + + label = response["output"].strip().lower() + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..65cc5771 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,60 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n" + + "تغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + +def post_process(response): + #if not response or 'error' in response or 'output' not in response: + #print("Error or missing output in response:", response) + #return "No Response" + + label = response["output"].strip().lower() + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..45aa4d62 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,61 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. " + "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n" + + "Sentence: " + + input_sample + + "\nLabel: " + ), + } + ] + +def post_process(response): + #if not response or 'error' in response or 'output' not in response: + #print("Error or missing output in response:", response) + #return "NOT_ADULT" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + + label = label.replace("", "").replace("", "") + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..1409183c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,54 @@ +from llmebench.datasets import AdultDataset +from llmebench.models import AzureModel +from llmebench.tasks import AdultTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": AdultDataset, + "task": AdultTask, + "model": AzureModel, + "model_args": { + "class_labels": ["ADULT", "NOT_ADULT"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" + + "تغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "").lower() + + label = label.replace("التصنيف:", "").strip() + label = label.replace("label:", "").strip() + label = label.lower() + + if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + return "NOT_ADULT" + elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: + return "ADULT" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..7742c116 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py @@ -0,0 +1,140 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + label_translation = { + "yes_discusses_action_taken": "تناقش_الإجراء_المتخذ", + "harmful": "ضارة", + "yes_discusses_cure": "تناقش_العلاج", + "yes_asks_question": "تطرح_سؤال", + "no_not_interesting": "غير_مثيرة_للاهتمام", + "yes_other": "آخر", + "yes_blame_authorities": "تلوم_السلطات", + "yes_contains_advice": "تحتوي_على_نصيحة", + "yes_calls_for_action": "تدعو_لإجراء", + } + + out_prompt = base_prompt + "\n" + for example in examples: + translated_label = label_translation.get(example["label"], example["label"]) + out_prompt = ( + out_prompt + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + translated_label + + "\n\n" + ) + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + return out_prompt + + + + + + + + + + + + +def prompt(input_sample, examples): + base_prompt = f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: تناقش_الإجراء_المتخذ، ضارة، تناقش_العلاج، تطرح_سؤال، غير_مثيرة_للاهتمام، آخر، تلوم_السلطات، تحتوي_على_نصيحة، تدعو_لإجراء. قدم التصنيف فقط.\n\n' + + return [ + { + "role": "system", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + .replace("التصنيف: ", "") + ) + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "تناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضارة" in label_fixed: + label_fixed = "harmful" + elif "تلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "تدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "تناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "تطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "تطرح_سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثيرة_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "تدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py new file mode 100755 index 00000000..143ca5ca --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py @@ -0,0 +1,125 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "tweet: " + + example["input"] + + "\n" + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\n\n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'Does the tweet require attention from journalists, fact-checker, and government entities? Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n' + return [ + { + "role": "system", + "content": "You are social media expert. You can annotate and classify tweets." + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + ) + + if "**Category:** " in label: + label = label.split("**Category:** ")[-1] + if '**Annotation:** ' in label: + label = label.split("**Annotation:** ")[-1] + + label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label: + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed + diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..9459930d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py @@ -0,0 +1,120 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + example["label"] + + "\n\n" + ) + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + return out_prompt + +def prompt(input_sample, examples): + base_prompt = f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action. قدم التصنيف فقط.\n\n' + + return [ + { + "role": "system", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + ) + + if "**Category:** " in label: + label = label.split("**Category:** ")[-1] + if '**Annotation:** ' in label: + label = label.split("**Annotation:** ")[-1] + + label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label: + label_fixed = "harmful" + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + elif label.startswith("yes"): + label_fixed = label + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..2d29026a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,105 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: تناقش_الإجراء_المتخذ، ضارة، تناقش_العلاج، تطرح_سؤال، غير_مثيرة_للاهتمام، آخر، تلوم_السلطات، تحتوي_على_نصيحة، تدعو_لإجراء. قدم التصنيف فقط.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + .replace("التصنيف: " ,"") + ) + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "تناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضارة" in label_fixed: + label_fixed = "harmful" + elif "تلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "تدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "تناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "تطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "تطرح_سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثيرة_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "تدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..62b87274 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py @@ -0,0 +1,108 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'Does the tweet require attention from journalists, fact-checker, and government entities? Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are social media expert. You can annotate and classify tweets.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.lower() + .replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + ) + label_fixed = None + + label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + # Update conditions to match labels without surrounding whitespace + if label_fixed.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label_fixed: + label_fixed = "harmful" + elif label_fixed.startswith("yes"): + label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + elif "yes_blame_authoritie" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label_fixed: + label_fixed = "harmful" + elif "yes_discusses_cure" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label_fixed: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label_fixed: + label_fixed = "no_not_interesting" + elif "yes_other" in label_fixed: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label_fixed: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..d673c19c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,103 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action. قدم التصنيف فقط.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = ( + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + ) + + label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label: + label_fixed = "harmful" + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + elif label.startswith("yes"): + label_fixed = label + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..48eb9c22 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,115 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: يناقش_الإجراء_المتخذ، ضار، يناقش_العلاج، يطرح_سؤال، غير_مثير_للاهتمام، آخر، يلوم_السلطات، يحتوي_على_نصيحة، يدعو_لإجراء. قدم التصنيف فقط.\n\n' + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +import re + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "يناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "يلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "يدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "يناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "يطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثير_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "يدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..c862ace5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py @@ -0,0 +1,115 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "tweet: " + + example["input"] + + "\nlabel: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'Annotate the following "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action. Provide only label.\n\n' + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +import re + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + # Update conditions to match labels without surrounding whitespace + if label_fixed.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label_fixed: + label_fixed = "harmful" + elif label_fixed.startswith("yes"): + label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + elif "yes_blame_authoritie" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label_fixed: + label_fixed = "harmful" + elif "yes_discusses_cure" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label_fixed: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label_fixed: + label_fixed = "no_not_interesting" + elif "yes_other" in label_fixed: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label_fixed: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..5abdd7a0 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,148 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +# def few_shot_prompt(input_sample, examples): +# base_prompt = ( +# 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' +# 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' +# 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' +# "إليك بعض الأمثلة:\n\n" +# ) +# for index, example in enumerate(examples): +# base_prompt += ( +# f"مثال {index + 1}:\n" +# f"التغريدة: '{example['input']}'\n" +# f"التصنيف: {example['label']}\n\n" +# ) +# base_prompt += ( +# f"الآن، قم بتقييم التغريدة الجديدة التالية:\n" +# f"التغريدة: '{input_sample}'\n" +# f"التصنيف: " +# ) +# return base_prompt + +# def prompt(input_sample, examples): +# return [ +# { +# "role": "user", +# "content": few_shot_prompt(input_sample, examples), +# }, +# ] +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' + 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' + 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "إليك بعض الأمثلة:\n\n" + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +import re + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"] + label_list = config()["model_args"]["class_labels"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif label == "yes_discusses_covid-19_vaccine_side_effects": + label_fixed = "yes_discusses_cure" + elif label == "yes_harmful": + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + elif label in label_list: + label_fixed = label + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..ea0a8b0b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,101 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: يناقش_الإجراء_المتخذ، ضار، يناقش_العلاج، يطرح_سؤال، غير_مثير_للاهتمام، آخر، يلوم_السلطات، يحتوي_على_نصيحة، يدعو_لإجراء. قدم التصنيف فقط.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + + { + "role": "user", + "content": prompt_string, + }, + ] + +import re + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "يناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "يلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "يدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "يناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "يطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثير_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "يدعو" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..1b7a6023 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py @@ -0,0 +1,102 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'"You are social media expert. You can annotate important tweets and require attention from journalists, fact-checker, and government entities.' + f'Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + + { + "role": "user", + "content": prompt_string, + }, + ] + +import re + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + # Update conditions to match labels without surrounding whitespace + if label_fixed.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label_fixed: + label_fixed = "harmful" + elif label_fixed.startswith("yes"): + label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + elif "yes_blame_authoritie" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label_fixed: + label_fixed = "harmful" + elif "yes_discusses_cure" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label_fixed: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label_fixed: + label_fixed = "no_not_interesting" + elif "yes_other" in label_fixed: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label_fixed: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..b81c916e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,104 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import FastChatModel +from llmebench.tasks import AttentionworthyTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 3, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + base_prompt = ( + 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' + 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' + 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + f"التغريدة: '{input_sample}'\n" + "التصنيف: " + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +import re + +import re + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"] + label_list = config()["model_args"]["class_labels"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif label == "yes_discusses_covid-19_vaccine_side_effects": + label_fixed = "yes_discusses_cure" + elif label == "yes_harmful": + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + elif label in label_list: + label_fixed = label + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..34bb191a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,124 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: يناقش_الإجراء_المتخذ، ضار، يناقش_العلاج، يطرح_سؤال، غير_مثير_للاهتمام، آخر، يلوم_السلطات، يحتوي_على_نصيحة، يدعو_لإجراء. قدم التصنيف فقط.\n\n' + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + + + + + + +import re + +def post_process(response): + print(response) + if "output" in response: + # إذا كان "المحتوى" في استجابة "الرسائل" + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("استجابة .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "يناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "يلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "يدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "يناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "يطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثير_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "يدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..138a755a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py @@ -0,0 +1,118 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + out_prompt = ( + out_prompt + + "tweet: " + + example["input"] + + "\nlabel: " + + example["label"] + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'Annotate the following "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action. Provide only label.\n\n' + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +import re + +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label: + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + else : + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..1effd23d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,125 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' + 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' + 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "إليك بعض الأمثلة:\n\n" + ) + for index, example in enumerate(examples): + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {example['label']}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم التغريدة الجديدة التالية:\n" + f"التغريدة: '{input_sample}'\n" + f"التصنيف: " + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + }, + ] + +import re + +def post_process(response): + print(response) + + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() + label_list = config()["model_args"]["class_labels"] + + label = label.replace("label:", "").strip() + + + label = label.replace("", "").replace("", "") + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + + if label.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label: + label_fixed = "harmful" + elif label.startswith("yes"): + label_fixed = label + elif "yes_blame_authoritie" in label: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label: + label_fixed = "harmful" + elif "yes_discusses_cure" in label: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label: + label_fixed = "no_not_interesting" + elif "yes_other" in label: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label: + label_fixed = "yes_calls_for_action" + elif label in label_list: + label_fixed = label + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..12e7fd5c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,104 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: يناقش_الإجراء_المتخذ، ضار، يناقش_العلاج، يطرح_سؤال، غير_مثير_للاهتمام، آخر، يلوم_السلطات، يحتوي_على_نصيحة، يدعو_لإجراء. قدم التصنيف فقط.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + + { + "role": "user", + "content": prompt_string, + }, + ] + +import re + +def post_process(response): + print(response) + if "output" in response: + # إذا كان "المحتوى" في استجابة "الرسائل" + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("استجابة .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "يناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "يلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "يدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "يناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "يطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثير_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "يدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..f3e1c213 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,105 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'"You are social media expert. You can annotate important tweets and require attention from journalists, fact-checker, and government entities.' + f'Annotate "tweet" into one of the following categories: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + + { + "role": "user", + "content": prompt_string, + }, + ] + +import re + +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + + # Update conditions to match labels without surrounding whitespace + if label_fixed.startswith("no"): + label_fixed = "no_not_interesting" + elif "yes_discusses_covid-19_vaccine_side_effects" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_harmful" in label_fixed: + label_fixed = "harmful" + elif label_fixed.startswith("yes"): + label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + elif "yes_blame_authoritie" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "yes_discusses_action_taken" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "harmful" in label_fixed: + label_fixed = "harmful" + elif "yes_discusses_cure" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "yes_asks_question" in label_fixed: + label_fixed = "yes_asks_question" + elif "no_not_interesting" in label_fixed: + label_fixed = "no_not_interesting" + elif "yes_other" in label_fixed: + label_fixed = "yes_other" + elif "yes_blame_authorities" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "yes_contains_advice" in label_fixed: + label_fixed = "yes_contains_advice" + elif "yes_calls_for_action" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..655a230c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,103 @@ +from llmebench.datasets import CT22AttentionworthyDataset +from llmebench.models import AzureModel +from llmebench.tasks import AttentionworthyTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22AttentionworthyDataset, + "task": AttentionworthyTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "yes_discusses_action_taken", + "harmful", + "yes_discusses_cure", + "yes_asks_question", + "no_not_interesting", + "yes_other", + "yes_blame_authorities", + "yes_contains_advice", + "yes_calls_for_action", + ], + "max_tries": 100, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' + 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' + 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + f"التغريدة: '{input_sample}'\n" + "التصنيف: " + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +import re + +def post_process(response): + print(response) + if "output" in response: + # إذا كان "المحتوى" في استجابة "الرسائل" + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("استجابة .. " + str(response)) + label = "" + + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة + + label_fixed = label_fixed.replace("التصنيف:", "") + if label_fixed.startswith("لا"): + label_fixed = "no_not_interesting" + elif "يناقش_العلاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "يلوم_السلطات" in label_fixed: + label_fixed = "yes_blame_authoritie" + elif "يدعو_لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + elif "يناقش_الإجراء_المتخذ" in label_fixed: + label_fixed = "yes_discusses_action_taken" + elif "ضار" in label_fixed: + label_fixed = "harmful" + elif "علاج" in label_fixed: + label_fixed = "yes_discusses_cure" + elif "يطرح سؤال" in label_fixed: + label_fixed = "yes_asks_question" + elif "غير_مثير_للاهتمام" in label_fixed: + label_fixed = "no_not_interesting" + elif "آخر" in label_fixed: + label_fixed = "yes_other" + elif "السلطات" in label_fixed: + label_fixed = "yes_blame_authorities" + elif "نصيحة" in label_fixed: + label_fixed = "yes_contains_advice" + elif "يدعو لإجراء" in label_fixed: + label_fixed = "yes_calls_for_action" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..10cef400 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py @@ -0,0 +1,85 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "اليك بعض الامثلة:\n\n" + for index, example in enumerate(examples): + label = "لا" if example["label"] == "0" else "نعم" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "التصنيف: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'هل تحتوي هذه "التغريدة" على ادعاء يستحق التحقق منه؟ أجب بـ نعم أو لا. قدم التصنيف فقط دون شرح.' + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py new file mode 100755 index 00000000..61ec297b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py @@ -0,0 +1,86 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "no" if example["label"] == "0" else "yes" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy. Provide only label.' + return [ + { + "role": "system", + "content": "You can analyze and classify tweets.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..f88a3f6c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py @@ -0,0 +1,82 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "اليك بعض الامثلة:\n\n" + for index, example in enumerate(examples): + label = "not_checkworthy" if example["label"] == "0" else "checkworthy" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = 'هل تحتوي هذه "التغريدة" على ادعاء يستحق التحقق منه؟ أجب بـ checkworthy أو not_checkworthy' + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..e6992844 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,65 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'هل تحتوي هذه "التغريدة" على ادعاء يستحق التحقق منه؟ أجب بـ نعم أو لا. قدم التصنيف فقط دون شرح.\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..2a69cf50 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py @@ -0,0 +1,65 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'Does the following "tweet" contain a factual claim that is worth fact-checing? return checkworthy if it does or not_checkworthy otherwise. Return only label.\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You can analyze and classify tweets.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..27ff241a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,69 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'هل تحتوي هذه "التغريدة" على ادعاء يستحق التحقق منه؟ أجب بـ checkworthy أو not_checkworthy\n\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + + + + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + + if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + return "0" + elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + return "1" + else: + return None + + + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..84f8dff8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,83 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "غير جديرة بالتحقق" if example["label"] == "0" else "جديرة بالتحقق" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.lower() + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..e127207a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py @@ -0,0 +1,79 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "These are some examples:\n\n" + for index, example in enumerate(examples): + label = "not checkworthy" if example["label"] == "0" else "checkworthy" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":\n" + + "Tweet: " + + example["input"] + + "\n" + + "Classification: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "Tweet: " + input_sample + "\nClassification: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + label = label.lower() + if "لا" in label or "not" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..3d50b9cb --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,81 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "not checkworthy" if example["label"] == "0" else "checkworthy" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + + label = label.lower() + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..293a46e3 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,59 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": base_prompt+ input_sample, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + + label = label.lower() + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..ed20dfeb --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py @@ -0,0 +1,68 @@ +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + +, + } + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + + label = label.lower() + if label == "checkworthy" or label == "Checkworthy": + label_fixed = "1" + elif label == "Not_checkworthy." or label == "not_checkworthy": + label_fixed = "0" + elif "not_checkworthy" in label or "label: not_checkworthy" in label: + label_fixed = "0" + elif "checkworthy" in label or "label: checkworthy" in label: + label_fixed = "1" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..1b7cfbb0 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,57 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import FastChatModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + { + "role": "user", + "content": base_prompt + input_sample, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + + + + label = label.lower() + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..9a55e26b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,89 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "غير جديرة بالتحقق" if example["label"] == "0" else "جديرة بالتحقق" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.lower() + + if "لا أستطيع" in label: + return random.choice(["0","1"]) + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..2a3fa2fa --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py @@ -0,0 +1,88 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 100, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "These are some examples:\n\n" + for index, example in enumerate(examples): + label = "not checkworthy" if example["label"] == "0" else "checkworthy" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":\n" + + "Tweet: " + + example["input"] + + "\n" + + "Classification: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "Tweet: " + input_sample + "\nClassification: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + label = label.lower() + + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + if "not" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..82ce1f00 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,86 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "not checkworthy" if example["label"] == "0" else "checkworthy" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + label = label.lower() + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label or "not" in label: + return "0" + return "1" + return label + diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..4f70e746 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,63 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + { + "role": "user", + "content": base_prompt + input_sample, + }, + ] + +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + label = label.lower() + + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py new file mode 100755 index 00000000..c6b8248b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py @@ -0,0 +1,65 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": base_prompt+ input_sample, + }, + ] + +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + label = label.lower() + if "لا أستطيع" in label: + return random.choice(["0","1"]) + if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py new file mode 100755 index 00000000..c5671140 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py @@ -0,0 +1,63 @@ +import re + +from llmebench.datasets import CT22CheckworthinessDataset +from llmebench.models import AzureModel +from llmebench.tasks import CheckworthinessTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22CheckworthinessDataset, + "task": CheckworthinessTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' + ) + return [ + { + "role": "user", + "content": base_prompt + input_sample, + }, + ] +import random +def post_process(response): + print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + label = label.lower() + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + if "لا" in label or "not" in label or "no" in label or "ليس" in label or "ليست" in label: + return "0" + return "1" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..4290717c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py @@ -0,0 +1,94 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "لا" if example["label"] == "0" else "نعم" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label or "not" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py new file mode 100755 index 00000000..6bb020a6 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py @@ -0,0 +1,90 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "You are expert in text analysis and classification.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "no" if example["label"] == "0" else "yes" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the tweet we want the model to predict for but leave the label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + pred_label = "" + + if ( + "yes" in input_label + or "contains a factual claim" in input_label + or "label: 1" in input_label + ): + pred_label = "1" + if ( + input_label == "no" + or "label: 0" in input_label + or "label: no" in input_label + or "not contain a factual claim" in input_label + or "doesn't contain a factual claim" in input_label + ): + pred_label = "0" + + if pred_label == "": + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..63efaa29 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py @@ -0,0 +1,94 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "no" if example["label"] == "0" else "yes" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "\التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "no" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..9bc989bb --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,81 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + + + + + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label or "not" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..fbb6ff60 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are expert in text analysis and classification.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + pred_label = "" + + if ( + "yes" in input_label + or "contains a factual claim" in input_label + or "label: 1" in input_label + ): + pred_label = "1" + if ( + input_label == "no" + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + pred_label = "0" + + if pred_label == "": + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..681f91c7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,81 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + + + + + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "no" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..272d4c44 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,70 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + example["input"] + "\n" + + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + input_sample + "\n" + + "الإجابة: " + ) + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.lower() + + if "لا يمكنني" in label: + return None + if "التصنيف: " in label: + arr = label.split("التصنيف: ") + label = arr[1].strip() + + if "نعم" in label: + label_fixed = "1" + elif "لا" in label: + label_fixed = "0" + else: + label_fixed = None + + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..93409450 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py @@ -0,0 +1,84 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def prompt(input_sample, examples=None): + base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" + if examples: + user_message_content = few_shot_prompt(input_sample, base_prompt, examples) + else: + user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " + + return [ + { + "role": "user", + "content": user_message_content + } + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + label = "no" if example['label'] == "0" else "yes" + out_prompt += ( + "Sentence: " + example['input'] + "\nLabel: " + label + "\n\n" + ) + out_prompt += "Sentence: " + input_sample + "\nLabel: " + + return out_prompt + +def post_process(response): + + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + label = label.lower() + + if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + return None + if "هذه التغريدة تحتوي" in label: + return "1" + + if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + return "0" + return "1" + + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + if "yes" in label: + pred_label = "1" + elif "no" in label: + pred_label = "0" + else: + pred_label = "0" + + print(f"Predicted Label: {pred_label}") + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..e082dd3d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + example["input"] + "\n" + + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + input_sample + "\n" + + "الإجابة: " + ) + } + ] + +def post_process(response): + try: + label = response["choices"][0]["message"]["content"] + + label = label.replace("الإجابة:", "").strip() + label = label.lower() + if "هذه التغريدة تحتوي" in label: + return "1" + + if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + return None + if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + return "0" + + + + if "نعم" in label or 'yes' in label: + pred_label = "1" + elif "لا" in label or 'no' in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..c04a54b1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,68 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.lower() + + if "لا يمكنني" in label: + return None + if "التصنيف: " in label: + arr = label.split("التصنيف: ") + label = arr[1].strip() + + if "نعم" in label: + label_fixed = "1" + elif "لا" in label: + label_fixed = "0" + else: + label_fixed = None + + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..d934916e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py @@ -0,0 +1,64 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + label = label.lower() + + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + + if "yes" in label or "نعم" in label or "the sentence contains a factual claim" in label: + label_fixed = "1" + if "no" in label or "لا" in label: + label_fixed = "0" + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..bb3b9961 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + f"تغريدة: {input_sample}\n" + f"التسمية: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + + +def post_process(response): + try: + label = response["choices"][0]["message"]["content"] + + label = label.replace("الإجابة:", "").strip() + label = label.lower() + if "هذه التغريدة تحتوي" in label: + return "1" + + if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + return None + if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + return "0" + + + + if "نعم" in label or 'yes' in label: + pred_label = "1" + elif "لا" in label or 'no' in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..78ecee0b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,73 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "الجملة: " + example["input"] + "\n" + + "الإجابة: " + ("نعم" if example["label"] == "1" else "لا") + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه الجملة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "الجملة: " + input_sample + "\n" + + "الإجابة: " + ) + } + ] +import random +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + + if "نعم" in label: + pred_label = "1" + elif "لا" in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..a192d1bf --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def prompt(input_sample, examples=None): + base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" + if examples: + user_message_content = few_shot_prompt(input_sample, base_prompt, examples) + else: + user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " + + return [ + { + "role": "user", + "content": user_message_content + } + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + label = "no" if example['label'] == "0" else "yes" + out_prompt += ( + "Sentence: " + example['input'] + "\nLabel: " + label + "\n\n" + ) + out_prompt += "Sentence: " + input_sample + "\nLabel: " + + return out_prompt +import random +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + + if "yes" in label: + pred_label = "1" + elif "no" in label: + pred_label = "0" + else: + pred_label = "0" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "No Response " diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..db002968 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,69 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask +import random + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + example["input"] + "\n" + + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + input_sample + "\n" + + "الإجابة: " + ) + } + ] + +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + if "نعم" in label or 'yes' in label: + pred_label = "1" + elif "لا" in label or 'no' in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..498b6d36 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,69 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n" + + f"التغريدة: {input_sample}\n" + + "التصنيف: " + ), + } + ] +import random +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + + # Determining the prediction label based on the response content + if "نعم" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif "لا" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + #print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return "" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..eec5a9b1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,69 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only.\n\n" + + f"Sentence: {input_sample}\n" + + "Label: " + ), + } + ] +import random +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + + # Determining the prediction label based on the response content + if "yes" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif "no" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..103090b0 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,67 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'.\n\n" + + f"التغريدة: {input_sample}\n" + + "التصنيف: " + ), + } + ] +import random +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0","1"]) + + # Determining the prediction label based on the response content + if "yes" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif "no" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + #print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return "" diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..8cd97af0 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py @@ -0,0 +1,95 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "حدد ما إذا كانت المعلومات في الجملة صحيحة أم خاطئة. أجب فقط بـ 'صحيحة' أو 'خاطئة'.\n\n" + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "system", + "content": "أنت خبير في التحقق من الحقائق.", + }, + { + "role": "user", + "content": fs_prompt, + }, + ] + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + label = "صحيحة" if example["label"] == "true" else "خاطئة" + + + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py new file mode 100755 index 00000000..212c28de --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py @@ -0,0 +1,89 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "Detect whether the information in the sentence is factually true or false. Answer only by true or false.\n\n" + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "system", + "content": "You are an expert fact checker.", + }, + { + "role": "user", + "content": fs_prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + label = example["label"] + + out_prompt = ( + out_prompt + "sentence: " + sent + "\n" + "label: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "sentence: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..879e77c6 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py @@ -0,0 +1,93 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "حدد ما إذا كانت المعلومات في الجملة صحيحة أم خاطئة. أجب فقط بـ 'true' أو 'false'.\n\n" + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "system", + "content": "أنت خبير في التحقق من الحقائق.", + }, + { + "role": "user", + "content": fs_prompt, + }, + ] + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + label = example["label"] + + + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..18a2d945 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,79 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "حدد ما إذا كانت المعلومات في الجملة صحيحة أم خاطئة. " + "أجب فقط بـ 'صحيحة' أو 'خاطئة'.\n\n" + + "الجملة: " + + input_sample + + "\nالتصنيف: \n" + ) + + return [ + { + "role": "system", + "content": "أنت خبير في التحقق من الحقائق.", + }, + { + "role": "user", + "content": prompt_text, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..a37d2325 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "Detect whether the information in the sentence is factually true or false. Answer only by true or false.\n\n" + + "sentence: " + + input_sample + + "\nlabel: \n" + ) + + return [ + { + "role": "system", + "content": "You are an expert fact checker.", + }, + { + "role": "user", + "content": prompt_text, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..6100b220 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,78 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "حدد ما إذا كانت المعلومات في الجملة صحيحة أم خاطئة. " + "أجب فقط بـ 'true' أو 'false'.\n\n" + + "الجملة: " + + input_sample + + "\nالتصنيف: \n" + ) + + return [ + { + "role": "system", + "content": "أنت خبير في التحقق من الحقائق.", + }, + { + "role": "user", + "content": prompt_text, + }, + ] + + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "right" in input_label + or "صحيح" in input_label + or "صحيحة" in input_label + or "نعم" in input_label + ): + return "true" + + elif ( + "false" in input_label + or "wrong" in input_label + or "خطأ" in input_label + or "لا" in input_label + or "not" in input_label + or "خاطئة" in input_label + or "خاطئ" in input_label + ): + return "false" + + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..7119e69c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,99 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + + +def prompt(input_sample, examples): + prompt_text = "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'نعم' إذا كانت صحيحة و'لا' إذا لم تكن صحيحة. " + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + if example["label"] == "true": + label = "نعم" + elif example["label"] == "false": + label = "لا" + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace(".", "").strip().lower() + if "آسف" in label or "لا أستطيع" in label: + return None + + + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + or "غير صحيح" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + or "صحيح" in label + or "صح" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..420744a9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py @@ -0,0 +1,91 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "Detect whether the information in the sentence is factually true or false. Answer only by true or false.\n\n" + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + label = example["label"] + + out_prompt = ( + out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace(".", "").strip().lower() + + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "yes" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..90ca31d1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,89 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'true' إذا كانت صحيحة و'false' إذا لم تكن صحيحة. " + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + if example["label"] == "true": + label = "true" + elif example["label"] == "false": + label = "false" + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace(".", "").strip().lower() + + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..f4c82ee7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,76 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'نعم' إذا كانت صحيحة و'لا' إذا لم تكن صحيحة. " + + "الجملة: " + + input_sample + + "\التصنيف: \n" + ) + + return [ + { + "role": "user", + "content": prompt_text, + }, + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace(".", "").strip().lower() + + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..a5edf196 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py @@ -0,0 +1,70 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + "Detect whether the information in the sentence is factually true or false. " + "Answer only by true or false.\n\n" + + "Sentence: " + + input_sample + + "\nlabel: \n" + ) + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "true" in input_label + or "yes" in input_label + or "label: 1" in input_label + or "label: yes" in input_label + ): + pred_label = "true" + elif ( + "false" in input_label + or "label: 0" in input_label + or "label: no" in input_label + ): + pred_label = "false" + else: + print("label problem!! " + input_label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..5b7affe9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,73 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'true' إذا كانت صحيحة و'false' إذا لم تكن صحيحة. " + + "الجملة: " + + input_sample + + "\nالتصنيف: \n" + ) + + return [ + { + "role": "user", + "content": prompt_text, + + }] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace(".", "").strip().lower() + + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..3c506d13 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,99 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'نعم' إذا كانت صحيحة و'لا' إذا لم تكن صحيحة." + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + if example["label"] == "true": + label = "نعم" + elif example["label"] == "false": + label = "لا" + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..0492381c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py @@ -0,0 +1,92 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "Detect whether the information in the sentence is factually true or false. Answer only by true or false.\n\n" + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + label = example["label"] + + out_prompt = ( + out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + + if ( + "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + elif ( + "false" in label + or "label: 0" in label + or "label: no" in label + ): + pred_label = "false" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label + diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..0cbcfb96 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,96 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample, examples): + prompt_text = "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'true' إذا كانت صحيحة و'false' إذا لم تكن صحيحة. " + + fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) + return [ + { + "role": "user", + "content": fs_prompt, + }, + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + for example in examples: + sent = example["input"] + if example["label"] == "true": + label = "true" + elif example["label"] == "false": + label = "false" + out_prompt = ( + out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "الجملة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..76bf39e1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,83 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'نعم' إذا كانت صحيحة و'لا' إذا لم تكن صحيحة. " + + "الجملة: " + + input_sample + + "\التصنيف: \n" + ) + + return [ + { + "role": "user", + "content": prompt_text, + }, + ] + + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "خطا" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "صح" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..e58a2325 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,77 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "Detect whether the information in the sentence is factually true or false. " + "Answer only by 'true' or 'false'.\n\n" + + "Sentence: " + + input_sample + + "\nlabel: \n" + ) + + return [ + { + "role": "user", + "content": prompt_text, + }, + ] + + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + if ( + "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + elif ( + "false" in label + or "label: 0" in label + or "label: no" in label + ): + pred_label = "false" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..3cad1d69 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,81 @@ +from llmebench.datasets import ANSFactualityDataset +from llmebench.models import AzureModel +from llmebench.tasks import FactualityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": ANSFactualityDataset, + "task": FactualityTask, + "model": AzureModel, + "model_args": { + "max_tries": 100, + }, + } + + +def prompt(input_sample): + prompt_text = ( + "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'true' إذا كانت صحيحة و'false' إذا لم تكن صحيحة. " + + "الجملة: " + + input_sample + + "\nالتصنيف: \n" + ) + + return [ + { + "role": "user", + "content": prompt_text, + + }] + + + +def post_process(response): + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + else: + print("Response .. " + str(response)) + label = "" + + label = label.replace(".", "").strip().lower() + + if ( + "لا" in label + or "خطأ" in label + or "ليست" in label + or "false" in label + or "label: 0" in label + or "label: no" in label + + ): + pred_label = "false" + elif ( + "نعم" in label + or "صحيحة" in label + or "true" in label + or "label: 1" in label + or "label: yes" in label + ): + pred_label = "true" + else: + print("label problem!! " + label) + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..82567e94 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py @@ -0,0 +1,93 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'ضارة'. أجب بـ 'غير ضارة' إن لم تكن كذلك. قدم فقط التصنيف.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "غير ضارة" if example["label"] == "0" else "ضارة" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label_fixed = None + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + label_fixed = "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + label_fixed = "1" + + return label_fixed \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py new file mode 100755 index 00000000..de688645 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py @@ -0,0 +1,93 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "Can this tweet cause harm to any person, organization or entity? Answer only by 'harmful' or 'not harmful'. Provide only label.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "You are an expert in tweet analysis and classification.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "harmful" if example["label"] == "1" else "not harmful" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + return "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + return "1" + + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..9045b1fe --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py @@ -0,0 +1,92 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'harmful'. أجب بـ 'not harmful' إن لم تكن كذلك. قدم فقط التصنيف.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "not harmful" if example["label"] == "0" else "harmful" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label_fixed = None + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + return "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + return "1" + + return label_fixed + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..0535b298 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,80 @@ +import re + +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'ضارة'. أجب بـ 'غير ضارة' إن لم تكن كذلك. قدم فقط التصنيف.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + return "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + return "1" + + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..e06226f9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py @@ -0,0 +1,80 @@ +import re + +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"Can this tweet cause harm to any person, organization or entity? Answer only by 'harmful' or 'not harmful'. Provide only label.\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are an expert in tweet analysis and classification.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + return "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + return "1" + + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..b0404ded --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,79 @@ +import re + +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HarmfulDetectionTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'harmful'. أجب بـ 'not harmful' إن لم تكن كذلك. قدم فقط التصنيف.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + label_fixed = None + label = label.replace("label:","") + label = label.replace("التصنيف:","") + label = label.strip().lower() + + if ("غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label + ): + return "0" + elif ("ضارة" in label + or "harmful" in label + or "نعم" in label + or "yes" in label): + return "1" + + return label_fixed + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..0397fe0f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,69 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Translate labels to Arabic + label = "غير ضارة" if example["label"] == "0" else "ضارة" + out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + print("label: ",label) + if "لا يمكنني" in label or "ai" in label: + return None + if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label: + return "0" + + + else: + return "1" \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..e9828a87 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py @@ -0,0 +1,77 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "not_harmful" if example["label"] == "0" else "harmful" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + print("label: ",label) + if "label: harmful" in label or "is harmful" in label: + return "1" + if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label: + return None + if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label or "not" in label: + return "0" + + + else: + return "1" \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..1b0bdadc --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,74 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Translate labels to Arabic + label = "not_harmful" if example["label"] == "0" else "harmful" + out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + print("label: ",label) + if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label or "ما هي المعايير" in label: + return None + if "التغريدة الأخيرة هي 'harmful'" in label: + return "1" + if "التغريدة الثالثة مصنفة على أنها 'غير مؤذية'" in label: + return "0" + if "التغريدة الثالثة مصنفة على أنها 'مؤذية'" in label or "غير آمن" in label or "لأنها لا تحتوي على محتوى ضار" in label: + return "1" + + if "label: harmful" in label or "is harmful" in label: + return "1" + + if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليس" in label or "no" in label: + return "0" + + + else: + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..25942750 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,56 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط.\n\n" + + "تغريدة: " + + input_sample + + "\التصنيف: " + ), + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + print("label: ",label) + if "لا يمكنني" in label or "ai" in label: + return None + if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label: + return "0" + + + else: + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..af387715 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py @@ -0,0 +1,60 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label.\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + print("label: ",label) + if "label: harmful" in label or "is harmful" in label: + return "1" + if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label: + return None + if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label or "not" in label: + return "0" + + + else: + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..9dd5fc01 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,54 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + + "تغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + + if "غير" in label or "ليس" in label or "not" in label: + return "0" + elif "ضار" in label or "مس" in label or "مؤ" in label or "harm" in label or "مض" in label: + return "1" + + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..1ef503b5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,72 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Translate labels to Arabic + label = "غير ضارة" if example["label"] == "0" else "ضارة" + out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + + if "غير ضارة" in label or "لا" in label: + return "0" + elif label== "ضارة" or "ضارة" in label or "نعم" in label: + return "1" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..6c393e41 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py @@ -0,0 +1,79 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "not_harmful" if example["label"] == "0" else "harmful" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + + + + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + + + if "not_harmful" in label: + return "0" + elif label== "harmful": + return "1" + + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..7099d265 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,65 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Translate labels to Arabic + label = "not_harmful" if example["label"] == "0" else "harmful" + out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + if "لا أستطيع تقديم هذا النوع من الإجابات." in label: + return None + elif "غير" in label or "ليس" in label or "not" in label: + return "0" + else: + return "1" + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..999ba0ff --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,59 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط.\n\n" + + "تغريدة: " + + input_sample + + "\التصنيف: " + ), + } + ] + + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + + if "غير ضارة" in label or "لا" in label: + return "0" + elif label== "ضارة" or "ضارة" in label or "نعم" in label: + return "1" + + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..3a664ad5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,60 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label.\n\n" + + "tweet: " + + input_sample + + "\nlabel: " + ), + } + ] + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + + + if "not_harmful" in label: + return "0" + elif label== "harmful": + return "1" + + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..8bf062c3 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,56 @@ +from llmebench.datasets import CT22HarmfulDataset +from llmebench.models import AzureModel +from llmebench.tasks import HarmfulDetectionTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT22HarmfulDataset, + "task": HarmfulDetectionTask, + "model": AzureModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + + "تغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + +def post_process(response): + # Extract the label from the response + if "output" in response: + label = response["output"].strip().lower() + + print("label: ",label) + if "لا أستطيع تقديم هذا النوع من الإجابات." in label: + return None + elif "غير" in label or "ليس" in label or "not" in label: + return "0" + else: + return "1" + diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..2d795bdc --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py @@ -0,0 +1,83 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "كراهية" إن احتوت على خطاب كراهية، و أجب بـ "ليس كراهية" إن لم تكن كذلك.' + " هذه بعض الأمثلة:\n" + ) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + + + + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "ليس كراهية" if example["label"] == "NOT_HS" else "كراهية" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف:\n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py new file mode 100755 index 00000000..b9c76e51 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "Does the tweet contain hate speech? Answer by " + " 'hate speech' if it does, and 'not hate speech' otherwise. Here are some examples:\n" + ) + + return [ + { + "role": "system", + "content": "You are an expert in tweets analysis and classification.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "not hate speech" if example["label"] == "NOT_HS" else "hate speech" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel:\n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..f42b50f5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py @@ -0,0 +1,77 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "hate speech" إن احتوت على خطاب كراهية، و أجب بـ "not hate speech" إن لم تكن كذلك.\n' + ) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + + + + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "not hate speech" if example["label"] == "NOT_HS" else "hate speech" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف:\n" + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..de484631 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,60 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": f' هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "كراهية" إن احتوت على خطاب كراهية، و أجب بـ "ليس كراهية" إن لم تكن كذلك. \n' + "التغريدة: " + input_sample + "\n" + "التصنيف: " + }, + ] + + + + + + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..a813f726 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py @@ -0,0 +1,55 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "You are an expert in tweets analysis and classification.", + }, + { + "role": "user", + "content": f'Does the tweet contain hate speech? Answer by "hate speech" if it does, and "not hate speech" otherwise. \n\ntweet: {input_sample}\nlabel: ', + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..95135ae7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,58 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import HateSpeechTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": f'هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "hate speech" إن احتوت على خطاب كراهية، و أجب بـ "not hate speech" إن لم تكن كذلك. \n {input_sample}', + }, + ] + + + + + + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.lower().strip() + + if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + return "HS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..7701e603 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,61 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط." + "إليك بعض الأمثلة لتوجيهك:\n\n" + ) + for index, example in enumerate(examples): + label = "كراهية" if example["label"] == "HS" else "ليست كراهية" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\n" + f"التصنيف (يرجى الرد فقط بـ 'كراهية' أو 'ليست كراهية'):" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"].lower() + + if "لا" in label or "ليست" in label or "ليس" in label or "not" in label: + return "NOT_HS" + return "HS" diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..0001013d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py @@ -0,0 +1,64 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "not_hate_speech" if example["label"] == "NOT_HS" else "hate_speech" + out_prompt = ( + out_prompt + "Tweet: " + example["input"] + "\nLabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel:\n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = ("Respond only with 'Hate speech' if it is hate speech and 'Not hate speech' if it is not hate speech from the following tweets. " + "Here are some examples to guide you:\n") + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + +def post_process(response): + print(response) + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + print("label", label) + + if "NOT" in label or "NO" in label or "لا" in label or "ليس" in label: + return "NOT_HS" + return "HS" diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..0e238ce8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,60 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "أجب فقط بـ 'hate speech' إذا كانت خطاب كراهية و'not hate speech' إذا لم تكن خطاب كراهية من التغريدات التالية. " + "إليك بعض الأمثلة لتوجيهك:\n\n" + ) + for index, example in enumerate(examples): + label = "hate speech" if example["label"] == "HS" else "not hate speech" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\n" + f"التصنيف (يرجى الرد فقط بـ 'hate speech' أو 'not hate speech'):" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"].lower() + + if "لا" in label or "ليست" in label or "ليس" in label or "not" in label: + return "NOT_HS" + return "HS" diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..1db408d5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,60 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية جسب احتوائها على خطاب كراهية أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط.\n\n" + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + +def post_process(response): + print(response) + label = ( + response["choices"][0]["message"]["content"].lower() + ) + if "لا يمكنني" in label: + return None + if "لا كراهية" in label or "لا تحتوي" in label or "ليست كراهية" in label or "ليس" in label or "ليست" in label or "not" in label or "لا" in label: + return "NOT_HS" + if "تحتوي على خطاب كراهية" in label: + return "HS" + if "not" in label or "not_hs" in label or "لا كراهية" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label: + return "HS" + else: + print("No clear label found.") + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..de4147b7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py @@ -0,0 +1,54 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + base_prompt = f"Classify the following tweet into 'Hate Speech' or 'Not Hate Speech'. Provide only label \n {input_sample}" + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + + if "no" in label or "not hate speech" in label or "not_hs" in label or "not" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label: + return "HS" + else: + print("No clear label found.") + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..9e91b81b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,48 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import FastChatModel +from llmebench.tasks import HateSpeechTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب بـ 'HS' أو 'NOT_HS' وقدم التصنيف فقط.\n\n" + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + + +def post_process(response): + print(response) + label = response["choices"][0]["message"]["content"].lower() + + if "لا" in label or "ليست" in label or "ليس" in label: + return "NOT_HS" + return "HS" diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..5d28b966 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,71 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط." + "إليك بعض الأمثلة لتوجيهك:\n\n" + ) + for index, example in enumerate(examples): + label = "كراهية" if example["label"] == "HS" else "لا كراهية" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\n" + f" التصنيف:" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..3fb3263d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py @@ -0,0 +1,71 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "Respond only with 'Hate speech' if it is hate speech and 'Not hate speech' if it is not hate speech from the following tweets. " + "Here are some examples to guide you:\n\n" + ) + for index, example in enumerate(examples): + label = "Hate_speech" if example["label"] == "HS" else "not_hate_speech" + base_prompt += ( + f"Example {index + 1}:\n" + f"Tweet: '{example['input']}'\n" + f"Classification: {label}\n\n" + ) + base_prompt += ( + f"Now, evaluate the following new tweet:\nTweet: '{input_sample}'\n" + f"Classification (please respond only with 'Hate speech' or 'Not hate speech'):" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] +import random +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + print("No clear label found.") + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..74a314d8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,70 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask +import random + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "أجب فقط بـ 'hate_speech' إذا كانت خطاب كراهية و'not_hate_speech' إذا لم تكن خطاب كراهية من التغريدات التالية. " + "إليك بعض الأمثلة لتوجيهك:\n\n" +) + + for index, example in enumerate(examples): + label = "hate_speech" if example["label"] == "HS" else "not_hate_speech" + base_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\n" + f"التصنيف (يرجى الرد فقط بـ 'كراهية' أو 'ليست كراهية'):" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + print("No clear label found.") + return random.choice(["HS","NOT_HS"]) \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..e505d147 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,58 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط.\n\n" + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + print("No clear label found.") + return random.choice(["HS","NOT_HS"]) \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..e8ed7817 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,58 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Classify the following tweet into 'Hate Speech' or 'Not Hate Speech'. Provide only label.\n\n" + + "tweet: " + + input_sample + + "label: " + ), + } + ] + +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + print("No clear label found.") + return random.choice(["HS","NOT_HS"]) diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..0ba553cd --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,57 @@ +from llmebench.datasets import OSACT4SubtaskBDataset +from llmebench.models import AzureModel +from llmebench.tasks import HateSpeechTask +import random + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskBDataset, + "task": HateSpeechTask, + "model": AzureModel, + "model_args": { + "class_labels": ["HS", "NOT_HS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب بـ 'hate_speech' أو 'not_hate_speech' وقدم التصنيف فقط.\n\n" + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + +def post_process(response): + print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "").replace("", "").lower() + else: + print("Response .. " + str(response)) + return "NOT_HS" # Default to "NOT_HS" when unsure + + if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label or "not_hate_speech" in label: + return "NOT_HS" + elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": + return "HS" + else: + print("No clear label found.") + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..ca8bd5a1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py @@ -0,0 +1,78 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. هذه بعض الأمثلة: " + "\n" + ) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" + + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py new file mode 100755 index 00000000..8c27391b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise. Here are some examples:\n" + ) + + return [ + { + "role": "system", + "content": "You are an expert in tweet analysis and classification.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "not offensive" if example["label"] == "NOT_OFF" else "offensive" + + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..335c4e78 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py @@ -0,0 +1,77 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. هذه بعض الأمثلة: " + "\n" + ) + + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + # label = "no" if example["label"] == "0" else "yes" + label = "not offensive" if example["label"] == "NOT_OFF" else "offensive" + + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..7d54e5f9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,55 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات", + }, + { + "role": "user", + "content": ("إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. " + "\n\nالتغريدة" + input_sample + "\nالتصنيف: ") + }, + ] + + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..13db2e4f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py @@ -0,0 +1,54 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "You are an expert in tweet analysis and classification.", + }, + { + "role": "user", + "content": ("If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise." + "\n\ntweet: " + input_sample + "\nlabel: ") + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..2f6c7c6f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,52 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import OpenAIModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "أنت خبير في تحليل وتصنيف التغريدات", + }, + { + "role": "user", + "content": ("إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. " + "\n\nالتغريدة" + input_sample + "\nالتصنيف: ") + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.strip().lower() + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..339ca0b8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,68 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" + ) + for index, example in enumerate(examples): + label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" + base_prompt += ( + f"مثال {index + 1}:\n" + f"الجملة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم الجملة الجديدة التالية:\nالجملة: '{input_sample}'\n" + f" التصنيف:" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + label = re.sub(r"|", "", label) + + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label: + return "OFF" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..f486f68a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py @@ -0,0 +1,60 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = "Review the content provided and classify it as 'OFF' for Offensive or 'NOT_OFF' for Not Offensive. Here are some examples to guide your classification:\n\n" + for index, example in enumerate(examples): + label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" + base_prompt += f"Example {index + 1}: Content - '{example['input']}' -> Classification: {label}\n" + base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + label = re.sub(r"|", "", label) + #print("label", label) + + # Ensure only the expected labels are returned + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label or "off" in label: + return "OFF" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..4dbe7e63 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,67 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" + ) + for index, example in enumerate(examples): + label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" + base_prompt += ( + f"مثال {index + 1}:\n" + f"المحتوى: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم المحتوى الجديد التالي:\nالمحتوى: '{input_sample}'\n" + f"التصنيف (يرجى الرد فقط بـ 'OFF' أو 'NOT_OFF'):" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + print("Received response:", response) + if not response or 'output' not in response: + print("Error or missing output in response:", response) + return "NOT_OFF" # Default to "NOT_OFF" when unsure + + label = response["output"].strip().upper() + label = re.sub(r"|", "", label) + label = label.lower() + + if "not" in label or "غير" in label or "no" in label : + return "NOT_OFF" + return "OFF" + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..a799210a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,52 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask +import re + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 10, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.:\n{input_sample}" + ), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + label = re.sub(r"|", "", label) + + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..8afed567 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py @@ -0,0 +1,56 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask +import re + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 10, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"Please assess the following sentence to determine if it contains any offensive content. If the sentence contains any degree of offense, respond with 'OFF'. If the sentence does not contain any offensive, insulting, or harmful material, respond with 'NOT_OFF'. Do not provide any additional explanation:\n{input_sample}" + ), + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip().lower() + label = re.sub(r"|", "", label) + + #print("label", label) + + # Ensure only the expected labels are returned + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label or "off" in label: + return "OFF" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..617b2014 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,52 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import FastChatModel +from llmebench.tasks import OffensiveTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 10, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'OFF'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'NOT_OFF'.:\n{input_sample}" + ), + } + ] + +def post_process(response): + + label = response["choices"][0]["message"]["content"] + + label = label.strip().lower() + print("label", label) + + + if "not" in label or "غير" in label or "ليس" in label or "لا تحتوي" in label or "not" in label or "'not_off'" in label: + return "NOT_OFF" + elif "off" in label or "مس" in label or "ضار" in label : + return "OFF" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..2331e893 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,68 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" + ) + for index, example in enumerate(examples): + label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" + base_prompt += ( + f"مثال {index + 1}:\n" + f"الجملة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم الجملة الجديدة التالية:\nالجملة: '{input_sample}'\n" + f" التصنيف:" + ) + return base_prompt + + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + print("No clear label found:", label) + return None + + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..f9fd0d70 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py @@ -0,0 +1,64 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +# def few_shot_prompt(input_sample, examples): +# base_prompt = "Review the content provided and classify it as 'OFF' for Offensive or 'NOT_OFF' for Not Offensive. Here are some examples to guide your classification:\n\n" +# for index, example in enumerate(examples): +# label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" +# base_prompt += f"Example {index + 1}: Content - '{example['input']}' -> Classification: {label}\n" +# base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" +# return base_prompt + +def few_shot_prompt(input_sample, examples): + base_prompt = f"Please assess the following sentence to determine if it contains any offensive content. If the sentence contains any degree of offense, respond with 'OFF'. If the sentence does not contain any offensive, insulting, or harmful material, respond with 'NOT_OFF'. Do not provide any additional explanation.Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" + base_prompt += f"Example {index + 1}: Content - '{example['input']}' -> Classification: {label}\n" + base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" + return base_prompt +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + label = re.sub(r"|", "", label) # Removing potential formatting characters + label = label.lower() + + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..7ee972ad --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,65 @@ +import re +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + base_prompt = ( + "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" + ) + for index, example in enumerate(examples): + label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" + base_prompt += ( + f"مثال {index + 1}:\n" + f"المحتوى: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + base_prompt += ( + f"الآن، قم بتقييم المحتوى الجديد التالي:\nالمحتوى: '{input_sample}'\n" + f"التصنيف:" + ) + return base_prompt + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + label = re.sub(r"|", "", label) # Removing potential formatting characters + label = label.lower() + + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label or "off" in label: + return "OFF" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..95da9fed --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,49 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 10, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.:\n{input_sample}" + ), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + + if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: + return "NOT_OFF" + elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + return "OFF" + else: + print("No clear label found:", label) + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..b455eaa4 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,50 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask +import re + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 10, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"Please assess the following sentence to determine if it contains any offensive content. If the sentence contains any degree of offense, respond with 'OFF'. If the sentence does not contain any offensive, insulting, or harmful material, respond with 'NOT_OFF'. Do not provide any additional explanation:\n{input_sample}" + ), + } + ] + +def post_process(response): + label = response["output"].strip().lower() + label = re.sub(r"|", "", label) # Removing potential formatting characters + label = label.lower() + + if "not" in label or "غير" in label or "ليس" in label: + return "NOT_OFF" + elif "is" in label or "مسيء" in label or "off" in label: + return "OFF" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..e3365fe4 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,55 @@ +from llmebench.datasets import OSACT4SubtaskADataset +from llmebench.models import AzureModel +from llmebench.tasks import OffensiveTask + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": OSACT4SubtaskADataset, + "task": OffensiveTask, + "model": AzureModel, + "model_args": { + "class_labels": ["OFF", "NOT_OFF"], + "max_tries": 1, + }, + } + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'OFF'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'NOT_OFF'.:\n{input_sample}" + ), + } + ] + +def post_process(response): + print(response) + if not response or 'output' not in response: + print("Error or missing output in response:", response) + return "NOT_OFF" # Safely default to "NOT_OFF" when unsure + + label = response["output"] + label = label.lower() + print("label", label) + + + if "not" in label or "غير" in label or "ليس" in label or "لا تحتوي" in label or "NOT" in label or "'not_off'" in label: + return "NOT_OFF" + elif "off" in label: + return "OFF" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..298b118c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py @@ -0,0 +1,125 @@ +import random +import re +import ast +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + +def translate_labels(label): + label_mapping = { + "no technique": "بدون تقنية", "Smears": "تشويه", "Exaggeration/Minimisation": "مبالغة/تقليل", + "Loaded Language": "لغة محملة بالمشاعر", "Appeal to fear/prejudice": "الاحتكام إلى الخوف/التحيز", + "Name calling/Labeling": "التسمية/الملصقات", "Slogans": "الشعارات", "Repetition": "التكرار", + "Doubt": "الشك", "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", + "Flag-waving": "التلويح بالعلم", "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", + "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (مغالطة رجل القش)", + "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", + "Appeal to authority": "الاحتكام إلى السلطة", "Whataboutism": "ماذا عن", + "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", + "Thought-terminating cliché": "الكليشيه القاطع للفكر", "Causal Oversimplification": "التبسيط السببي" + } + return label_mapping.get(label, label) + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]]) + out_prompt += f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، + "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + """ + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] # .lower() + label = label.strip().lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + label_mapping = { + "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", + "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if + arabic_label in label or english_label.lower() in label] + + final_labels = [l for l in detected_labels if "no technique" not in l] + + return list(set(final_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py new file mode 100755 index 00000000..2a7c1f0f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py @@ -0,0 +1,199 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + tech_str = "" + for t in example["label"]: + tech_str += "'" + t + "', " + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + tech_str + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + f'Label this "tweet" based on the following propaganda techniques:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"Provide only labels as a list of strings.\n" + ) + + return [ + { + "role": "system", + "content": "You are an expert social media content analyst.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def fix_label(pred_label): + if "used in this text" in pred_label: + return ["no technique"] + + labels_fixed = [] + pred_label = pred_label.replace('"', "'").split("', '") + pred_labels = [] + + for l in pred_label: + pred_labels.append(l) + + if len(pred_labels) == 0: + return ["no technique"] + + for label in pred_labels: + label = label.replace(".", "").strip() + label = re.sub("-", " ", label) + label = label.strip().lower() + + # Handle case of single word labels like "Smears" so we just capitalize it + label_fixed = label.capitalize() + + # print(label) + if "slogan" in label: + label_fixed = "Slogans" + if "loaded" in label: + label_fixed = "Loaded Language" + if "prejudice" in label or "fear" in label or "mongering" in label: + label_fixed = "Appeal to fear/prejudice" + if "terminating" in label or "thought" in label: + label_fixed = "Thought-terminating cliché" + if "calling" in label or label == "name c": + label_fixed = "Name calling/Labeling" + if "minimisation" in label or label == "exaggeration minim": + label_fixed = "Exaggeration/Minimisation" + if "glittering" in label: + label_fixed = "Glittering generalities (Virtue)" + if "flag" in label: + label_fixed = "Flag-waving" + if "obfuscation" in label: + label_fixed = "Obfuscation, Intentional vagueness, Confusion" + if "oversimplification" in label or "causal" in label: + label_fixed = "Causal Oversimplification" + if "authority" in label: + label_fixed = "Appeal to authority" + if "dictatorship" in label or "black" in label or "white" in label: + label_fixed = "Black-and-white Fallacy/Dictatorship" + if "herring" in label or "irrelevant" in label: + label_fixed = "Presenting Irrelevant Data (Red Herring)" + if "straw" in label or "misrepresentation" in label: + label_fixed = "Misrepresentation of Someone's Position (Straw Man)" + if "whataboutism" in label: + label_fixed = "Whataboutism" + + if ( + "no propaganda" in label + or "technique" in label + or label == "" + or label == "no" + or label == "appeal to history" + or label == "appeal to emotion" + or label == "appeal to" + or label == "appeal" + or label == "appeal to author" + or label == "emotional appeal" + or "no techn" in label + or "hashtag" in label + or "theory" in label + or "specific mention" in label + or "religious" in label + or "gratitude" in label + ): + label_fixed = "no technique" + + labels_fixed.append(label_fixed) + + out_put_labels = [] + # Remove no technique label when we have other techniques for the same text + if len(labels_fixed) > 1: + for flabel in labels_fixed: + if "no technique" not in flabel: + out_put_labels.append(flabel) + return out_put_labels + + return labels_fixed + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + pred_label = fix_label(label) + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..e7dd966a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py @@ -0,0 +1,129 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + tech_str = "" + for t in example["label"]: + tech_str += "'" + t + "', " + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\n" + + "التصنيف: " + + tech_str + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"قدم التصنيفات فقط كقائمة من النصوص.\n" + ) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] # .lower() + label = label.strip().lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + + label_mapping = { + "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", + "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if + arabic_label in label or english_label.lower() in label] + final_labels = [l for l in detected_labels if "no technique" not in l] + + return list(set(final_labels)) + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..6393fb8c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,106 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، + "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + """ + + prompt_string = base_prompt + "التغريدة: \n\n" + input_sample + "التصنيف: \n" + + + return [ + { + "role": "system", + "content": ( + "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي." + ), + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = label.strip().lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + label_mapping = { + "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", + "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if + arabic_label in label or english_label.lower() in label] + final_labels = [l for l in detected_labels if "no technique" not in l] + + return list(set(final_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..833d5bd8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py @@ -0,0 +1,177 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + prompt_string = ( + f'Label this "tweet" based on the following propaganda techniques:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"Provide only labels as a list of strings.\n" + f"tweet: {input_sample}\n\n" + f"label: \n" + ) + + return [ + { + "role": "system", + "content": ( + "You are an expert social media content analyst." + ), + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + + +def fix_label(pred_label): + if "used in this text" in pred_label: + return ["no technique"] + + labels_fixed = [] + pred_label = pred_label.replace('"', "'").split("\n") + pred_labels = [] + + for l in pred_label: + pred_labels.append(l) + + if len(pred_labels) == 0: + return ["no technique"] + + for label in pred_labels: + label = label.replace(".", "").strip() + label = re.sub("-", " ", label) + label = label.strip().lower() + + # Handle case of single word labels like "Smears" so we just capitalize it + label_fixed = label.capitalize() + + # print(label) + if "slogan" in label: + label_fixed = "Slogans" + if "loaded" in label: + label_fixed = "Loaded Language" + if "prejudice" in label or "fear" in label or "mongering" in label: + label_fixed = "Appeal to fear/prejudice" + if "terminating" in label or "thought" in label: + label_fixed = "Thought-terminating cliché" + if "calling" in label or label == "name c": + label_fixed = "Name calling/Labeling" + if "minimisation" in label or label == "exaggeration minim": + label_fixed = "Exaggeration/Minimisation" + if "glittering" in label: + label_fixed = "Glittering generalities (Virtue)" + if "flag" in label: + label_fixed = "Flag-waving" + if "obfuscation" in label: + label_fixed = "Obfuscation, Intentional vagueness, Confusion" + if "oversimplification" in label or "causal" in label: + label_fixed = "Causal Oversimplification" + if "authority" in label: + label_fixed = "Appeal to authority" + if "dictatorship" in label or "black" in label or "white" in label: + label_fixed = "Black-and-white Fallacy/Dictatorship" + if "herring" in label or "irrelevant" in label: + label_fixed = "Presenting Irrelevant Data (Red Herring)" + if "straw" in label or "misrepresentation" in label: + label_fixed = "Misrepresentation of Someone's Position (Straw Man)" + if "whataboutism" in label: + label_fixed = "Whataboutism" + + if ( + "no propaganda" in label + or "technique" in label + or label == "" + or label == "no" + or label == "appeal to history" + or label == "appeal to emotion" + or label == "appeal to" + or label == "appeal" + or label == "appeal to author" + or label == "emotional appeal" + or "no techn" in label + or "hashtag" in label + or "theory" in label + or "specific mention" in label + or "religious" in label + or "gratitude" in label + ): + label_fixed = "no technique" + + labels_fixed.append(label_fixed) + + out_put_labels = [] + # Remove no technique label when we have other techniques for the same text + if len(labels_fixed) > 1: + for flabel in labels_fixed: + if "no technique" not in flabel: + out_put_labels.append(flabel) + return out_put_labels + + return labels_fixed + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + pred_label = fix_label(label) + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..ed833d4f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,108 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": OpenAIModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"قدم التصنيفات فقط كقائمة من النصوص.\n" + ) + + prompt_string = ( + base_prompt + + f"التغريدة: {input_sample}\n\n" + f"التصنيف: \n" + ) + + return [ + { + "role": "system", + "content": ( + "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي." + ), + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = label.strip().lower() + if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + return [] + label_mapping = { + "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", + "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if + arabic_label in label or english_label.lower() in label] + final_labels = [l for l in detected_labels if "no technique" not in l] + + return list(set(final_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..792e0d3f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,147 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + instruction = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، + "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + """ + label_mapping = { + "no technique": "بدون تقنية", + "Smears": "تشويه", + "Exaggeration/Minimisation": "مبالغة/تقليل", + "Loaded Language": "لغة محملة", + "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", + "Name calling/Labeling": "التسمية/الملصقات", + "Slogans": "الشعارات", + "Repetition": "التكرار", + "Doubt": "الشك", + "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك", + "Flag-waving": "التلويح بالعلم", + "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", + "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)", + "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", + "Appeal to authority": "النداء إلى السلطة", + "Whataboutism": "ماذا عن", + "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", + "Thought-terminating cliché": "الكليشيه القاطع للفكر", + "Causal Oversimplification": "التبسيط السببي" + } + + few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + print(f"Processing example {index + 1}") + print(f"Example label: {example['label']}") + + try: + labels = ", ".join(label_mapping[l] for l in example["label"] if example["label"]) + print("Labels in few_shot:", labels) + except KeyError as e: + print(f"KeyError: {e} in example {index + 1}") + labels = "Unknown Label" + + few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " + return few_shot_text + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples) + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = label.lower() + label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # this is for duplicates values + detected_labels = list(set(detected_labels)) + + return detected_labels diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..94add2d8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py @@ -0,0 +1,178 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + tech_str = "" + for t in example["label"]: + tech_str += "'" + t + "', " + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "Tweet: " + + example["input"] + + "\nLabel: " + + tech_str[:-2] # Remove the trailing comma and space + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + instruction = """ +Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears," + "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt," + "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue)," + "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority," +"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification." + """ + base_prompt = instruction.strip() + + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + +def post_process(response): + + label = response["choices"][0]["message"]["content"].lower() + + + label = label.replace("", "").replace("", "") + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + print("label: ", label) + detected_labels = [] + + if "no technique" in label: + detected_labels.append(label_mapping["بدون تقنية"]) + if "Smears" in label: + detected_labels.append(label_mapping["تشويه"]) + if "Exaggeration/Minimisation" in label or "مبالغة" in label: + detected_labels.append(label_mapping["مبالغة/تقليل"]) + if "Loaded Language" in label: + detected_labels.append(label_mapping["لغة محملة"]) + if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label: + detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"]) + if "Name calling/Labeling" in label or "التسمية" or "name" in label: + detected_labels.append(label_mapping["التسمية/الملصقات"]) + if "Slogans" in label: + detected_labels.append(label_mapping["الشعارات"]) + if "Repetition" in label: + detected_labels.append(label_mapping["التكرار"]) + if "Doubt" in label: + detected_labels.append(label_mapping["الشك"]) + if "Obfuscation, Intentional vagueness, Confusion" in label or "Obfuscation" in label or "Intentional vagueness" in label or "Confusion" in label: + detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"]) + if "Flag-waving" in label or "flag": + detected_labels.append(label_mapping["التلويح بالعلم"]) + if "Glittering generalities (Virtue)" in label or "الفضيلة" in label or "Glittering": + detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"]) + if "Misrepresentation of Someone's Position (Straw Man)" in label or "تحريف موقف شخص" in label: + detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"]) + if "Presenting Irrelevant Data (Red Herring)" in label or "عرض بيانات غير ذات صلة" in label: + detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"]) + if "Appeal to authority" in label: + detected_labels.append(label_mapping["النداء إلى السلطة"]) + if "Whataboutism" in label: + detected_labels.append(label_mapping["ماذا عن"]) + if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label: + detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"]) + if "Thought-terminating cliché" in label or "الكليشيه" in label: + detected_labels.append(label_mapping["الكليشيه القاطع للفكر"]) + if "Causal Oversimplification" in label or "التبسيط" in label: + detected_labels.append(label_mapping["التبسيط السببي"]) + + + return detected_labels + + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..01c1c3c8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,120 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + +random.seed(1333) + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + instruction = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، + "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، + "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification". + """ + + few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + labels = ", ".join(example["label"]) + few_shot_text += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {labels}\n\n" + ) + + few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " + return few_shot_text + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples) + } + ] + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..8e63b704 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,152 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + instruction = """ + "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، +"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + + """ + return [ + { + "role": "user", + "content": ( + f' \n{instruction}\n' + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = label.lower() + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + print("label: ", label) + detected_labels = [] + + if "بدون تقنية" in label: + detected_labels.append(label_mapping["بدون تقنية"]) + if "تشويه" in label: + detected_labels.append(label_mapping["تشويه"]) + if "تقليل" in label or "مبالغة" in label: + detected_labels.append(label_mapping["مبالغة/تقليل"]) + if "لغة محملة" in label: + detected_labels.append(label_mapping["لغة محملة"]) + if "التحيز" in label or "الخوف" in label: + detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"]) + if "الملصقات" in label or "التسمية" in label: + detected_labels.append(label_mapping["التسمية/الملصقات"]) + if "الشعارات" in label: + detected_labels.append(label_mapping["الشعارات"]) + if "التكرار" in label: + detected_labels.append(label_mapping["التكرار"]) + if "الشك" in label: + detected_labels.append(label_mapping["الشك"]) + if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label: + detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"]) + if "التلويح بالعلم" in label: + detected_labels.append(label_mapping["التلويح بالعلم"]) + if "التعميمات البراقة" in label or "الفضيلة" in label: + detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"]) + if "رجل القش" in label or "تحريف موقف شخص" in label: + detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"]) + if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label: + detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"]) + if "النداء إلى السلطة" in label: + detected_labels.append(label_mapping["النداء إلى السلطة"]) + if "ماذا عن" in label: + detected_labels.append(label_mapping["ماذا عن"]) + if "الأبيض والأسود" in label or "الديكتاتورية" in label: + detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"]) + if "القاطع للفكر" in label or "الكليشيه" in label: + detected_labels.append(label_mapping["الكليشيه القاطع للفكر"]) + if "السببي" in label or "التبسيط" in label: + detected_labels.append(label_mapping["التبسيط السببي"]) + + # this is for duplicates values + detected_labels = list(set(detected_labels)) + + return detected_labels + + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..e1d7cd36 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py @@ -0,0 +1,170 @@ +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 30, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f'Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from:\n\n' + f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" + f"\nAnswer (only yes/no) in the following format: \n" + f"'Doubt': 'yes', " + f"'Smears': 'no', \n\n" + f"tweet: {input_sample}\n\n" + f"label: \n" + ) + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def fix_label(pred_label): + if "used in this text" in pred_label: + return ["no technique"] + + labels_fixed = [] + pred_label = pred_label.replace('"', "'").split("', '") + pred_labels = [] + + for l in pred_label: + splits = l.replace(",", "").split(":") + if len(splits) > 1 and "no" in splits[1]: + continue + pred_labels.append(splits[0].replace("'", "")) + + if len(pred_labels) == 0: + return ["no technique"] + + for label in pred_labels: + label = label.replace(".", "").strip() + label = re.sub("-", " ", label) + label = label.strip().lower() + + # Handle case of single word labels like "Smears" so we just capitalize it + label_fixed = label.capitalize() + + # print(label) + if "slogan" in label: + label_fixed = "Slogans" + if "loaded" in label: + label_fixed = "Loaded Language" + if "prejudice" in label or "fear" in label or "mongering" in label: + label_fixed = "Appeal to fear/prejudice" + if "terminating" in label or "thought" in label: + label_fixed = "Thought-terminating cliché" + if "calling" in label or label == "name c": + label_fixed = "Name calling/Labeling" + if "minimisation" in label or label == "exaggeration minim": + label_fixed = "Exaggeration/Minimisation" + if "glittering" in label: + label_fixed = "Glittering generalities (Virtue)" + if "flag" in label: + label_fixed = "Flag-waving" + if "obfuscation" in label: + label_fixed = "Obfuscation, Intentional vagueness, Confusion" + if "oversimplification" in label or "causal" in label: + label_fixed = "Causal Oversimplification" + if "authority" in label: + label_fixed = "Appeal to authority" + if "dictatorship" in label or "black" in label or "white" in label: + label_fixed = "Black-and-white Fallacy/Dictatorship" + if "herring" in label or "irrelevant" in label: + label_fixed = "Presenting Irrelevant Data (Red Herring)" + if "straw" in label or "misrepresentation" in label: + label_fixed = "Misrepresentation of Someone's Position (Straw Man)" + if "whataboutism" in label: + label_fixed = "Whataboutism" + + if ( + "no propaganda" in label + or "technique" in label + or label == "" + or label == "no" + or label == "appeal to history" + or label == "appeal to emotion" + or label == "appeal to" + or label == "appeal" + or label == "appeal to author" + or label == "emotional appeal" + or "no techn" in label + or "hashtag" in label + or "theory" in label + or "specific mention" in label + or "religious" in label + or "gratitude" in label + ): + label_fixed = "no technique" + + labels_fixed.append(label_fixed) + + out_put_labels = [] + # Remove no technique label when we have other techniques for the same text + if len(labels_fixed) > 1: + for flabel in labels_fixed: + if flabel != "no technique": + out_put_labels.append(flabel) + return out_put_labels + + return labels_fixed + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + pred_label = fix_label(label) + print(pred_label) + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..7c75471e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,147 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import FastChatModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": FastChatModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + instruction = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، + "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، + "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification". + """ + return [ + { + "role": "user", + "content": ( + f' \n{instruction}\n' + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + label = label.lower() + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + print("label: ", label) + detected_labels = [] + + if "بدون تقنية" in label: + detected_labels.append(label_mapping["بدون تقنية"]) + if "تشويه" in label: + detected_labels.append(label_mapping["تشويه"]) + if "تقليل" in label or "مبالغة" in label: + detected_labels.append(label_mapping["مبالغة/تقليل"]) + if "لغة محملة" in label: + detected_labels.append(label_mapping["لغة محملة"]) + if "التحيز" in label or "الخوف" in label: + detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"]) + if "الملصقات" in label or "التسمية" in label: + detected_labels.append(label_mapping["التسمية/الملصقات"]) + if "الشعارات" in label: + detected_labels.append(label_mapping["الشعارات"]) + if "التكرار" in label: + detected_labels.append(label_mapping["التكرار"]) + if "الشك" in label: + detected_labels.append(label_mapping["الشك"]) + if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label: + detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"]) + if "التلويح بالعلم" in label: + detected_labels.append(label_mapping["التلويح بالعلم"]) + if "التعميمات البراقة" in label or "الفضيلة" in label: + detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"]) + if "رجل القش" in label or "تحريف موقف شخص" in label: + detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"]) + if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label: + detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"]) + if "النداء إلى السلطة" in label: + detected_labels.append(label_mapping["النداء إلى السلطة"]) + if "ماذا عن" in label: + detected_labels.append(label_mapping["ماذا عن"]) + if "الأبيض والأسود" in label or "الديكتاتورية" in label: + detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"]) + if "القاطع للفكر" in label or "الكليشيه" in label: + detected_labels.append(label_mapping["الكليشيه القاطع للفكر"]) + if "السببي" in label or "التبسيط" in label: + detected_labels.append(label_mapping["التبسيط السببي"]) + + + return detected_labels + + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..61926954 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,289 @@ +# import random +# import re + +# from llmebench.datasets import WANLP22T3PropagandaDataset +# from llmebench.models import AzureModel +# from llmebench.tasks import MultilabelPropagandaTask + +# random.seed(1333) + +# def metadata(): +# return { +# "author": "Arabic Language Technologies, QCRI, HBKU", +# "model": "LLama 3 8b", +# "description": "Deployed on Azure.", +# "scores": {"Micro-F1": "0.52864"}, # 1 - 0.3631 , 3- 0.48027 , 5- 0.52864 +# } + +# def config(): +# return { +# "dataset": WANLP22T3PropagandaDataset, +# "dataset_args": {"techniques_path": "classes.txt"}, +# "task": MultilabelPropagandaTask, +# "model": AzureModel, +# "model_args": { +# "class_labels": [ +# "no technique", +# "Smears", +# "Exaggeration/Minimisation", +# "Loaded Language", +# "Appeal to fear/prejudice", +# "Name calling/Labeling", +# "Slogans", +# "Repetition", +# "Doubt", +# "Obfuscation, Intentional vagueness, Confusion", +# "Flag-waving", +# "Glittering generalities (Virtue)", +# "Misrepresentation of Someone's Position (Straw Man)", +# "Presenting Irrelevant Data (Red Herring)", +# "Appeal to authority", +# "Whataboutism", +# "Black-and-white Fallacy/Dictatorship", +# "Thought-terminating cliché", +# "Causal Oversimplification", +# ], +# "max_tries": 3, +# }, +# } + +# def few_shot_prompt(input_sample, examples): +# instruction = """ +# "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، +# "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، +# "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." +# """ +# label_mapping = { +# "no technique": "بدون تقنية", +# "Smears": "تشويه", +# "Exaggeration/Minimisation": "مبالغة/تقليل", +# "Loaded Language": "لغة محملة", +# "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", +# "Name calling/Labeling": "التسمية/الملصقات", +# "Slogans": "الشعارات", +# "Repetition": "التكرار", +# "Doubt": "الشك", +# "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك", +# "Flag-waving": "التلويح بالعلم", +# "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", +# "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)", +# "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", +# "Appeal to authority": "النداء إلى السلطة", +# "Whataboutism": "ماذا عن", +# "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", +# "Thought-terminating cliché": "الكليشيه القاطع للفكر", +# "Causal Oversimplification": "التبسيط السببي" +# } + +# few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n" +# for index, example in enumerate(examples): +# labels_list = [label_mapping.get(label,"") for label in example["label"]] +# labels = ", ".join(labels_list) +# few_shot_text += ( +# f"مثال {index + 1}:\n" +# f"التغريدة: '{example['input']}'\n" +# f"التصنيف: {labels}\n\n" +# ) + +# few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " +# return few_shot_text + +# def few_shot_prompt(input_sample, base_prompt, examples): +# label_mapping = { +# "no technique": "بدون تقنية", +# "Smears": "تشويه", +# "Exaggeration/Minimisation": "مبالغة/تقليل", +# "Loaded Language": "لغة محملة", +# "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", +# "Name calling/Labeling": "التسمية/الملصقات", +# "Slogans": "الشعارات", +# "Repetition": "التكرار", +# "Doubt": "الشك", +# "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك", +# "Flag-waving": "التلويح بالعلم", +# "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", +# "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)", +# "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", +# "Appeal to authority": "النداء إلى السلطة", +# "Whataboutism": "ماذا عن", +# "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", +# "Thought-terminating cliché": "الكليشيه القاطع للفكر", +# "Causal Oversimplification": "التبسيط السببي" +# } + +# out_prompt = base_prompt + "\n" +# out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" +# for index, example in enumerate(examples): +# tech_str = "" +# for t in example["label"]: +# tech_str += "'" + label_mapping[t] + "', " + +# out_prompt = ( +# out_prompt +# + "مثال " +# + str(index) +# + ":" +# + "\n" +# + "التغريدة: " +# + example["input"] +# + "\التصنيف: " +# + tech_str +# + "\n\n" +# ) + +# # Append the sentence we want the model to predict for but leave the Label blank +# out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + +# return out_prompt + +# def prompt(input_sample, examples): +# return [ +# { +# "role": "user", +# "content": few_shot_prompt(input_sample, examples) +# } +# ] + +# def post_process(response): +# if not response or 'error' in response or 'output' not in response: +# print("Error or missing output in response:", response) +# return None + +# label = response["output"].strip().lower() +# label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags +# label = label.lower() + +# label_mapping = { +# "بدون تقنية": "no technique", +# "تشويه": "Smears", +# "مبالغة/تقليل": "Exaggeration/Minimisation", +# "لغة محملة": "Loaded Language", +# "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", +# "التسمية/الملصقات": "Name calling/Labeling", +# "الشعارات": "Slogans", +# "التكرار": "Repetition", +# "الشك": "Doubt", +# "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", +# "التلويح بالعلم": "Flag-waving", +# "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", +# "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", +# "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", +# "النداء إلى السلطة": "Appeal to authority", +# "ماذا عن": "Whataboutism", +# "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", +# "الكليشيه القاطع للفكر": "Thought-terminating cliché", +# "التبسيط السببي": "Causal Oversimplification" +# } + +# detected_labels = [] +# for arabic_label, english_label in label_mapping.items(): +# if arabic_label in label: +# detected_labels.append(english_label) +# elif english_label.lower() in label: +# detected_labels.append(english_label) + +# print("Detected labels:", detected_labels) + +# # this is for duplicates values +# detected_labels = list(set(detected_labels)) + +# return detected_labels +import random +import re +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", "Smears", "Exaggeration/Minimisation", "Loaded Language", + "Appeal to fear/prejudice", "Name calling/Labeling", "Slogans", "Repetition", + "Doubt", "Obfuscation, Intentional vagueness, Confusion", "Flag-waving", + "Glittering generalities (Virtue)", "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", "Appeal to authority", "Whataboutism", + "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché", "Causal Oversimplification" + ], + "max_tries": 3, + }, + } + +def translate_labels(label): + label_mapping = { + "no technique": "بدون تقنية", "Smears": "تشويه", "Exaggeration/Minimisation": "مبالغة/تقليل", + "Loaded Language": "لغة محملة", "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", + "Name calling/Labeling": "التسمية/الملصقات", "Slogans": "الشعارات", "Repetition": "التكرار", + "Doubt": "الشك", "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", + "Flag-waving": "التلويح بالعلم", "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", + "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)", + "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", + "Appeal to authority": "النداء إلى السلطة", "Whataboutism": "ماذا عن", + "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", + "Thought-terminating cliché": "الكليشيه القاطع للفكر", "Causal Oversimplification": "التبسيط السببي" + } + return label_mapping.get(label, label) + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]]) + out_prompt += f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" + return out_prompt + +def prompt(input_sample, examples): + base_prompt = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، + "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + """ + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return None + + label = re.sub(r'<[^>]+>', '', response["output"].strip().lower()) + label_mapping = { + "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", + "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if arabic_label in label or english_label.lower() in label] + print("Detected labels:", list(set(detected_labels))) + return list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..856aff3b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py @@ -0,0 +1,148 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + tech_str = "" + for t in example["label"]: + tech_str += "'" + t + "', " + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "Tweet: " + + example["input"] + + "\nLabel: " + + tech_str[:-2] # Remove the trailing comma and space + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n" + + return out_prompt + + +def prompt(input_sample, examples): + instruction = """ +Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears," + "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt," + "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue)," + "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority," +"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification." + """ + base_prompt = instruction.strip() + + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples) + ), + } + ] + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "No respose" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "") + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..4f68be80 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,124 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + +random.seed(1333) + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, examples): + instruction = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، + "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، + "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification". + """ + + few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + labels = ", ".join(example["label"]) + few_shot_text += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {labels}\n\n" + ) + + few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " + return few_shot_text + +def prompt(input_sample, examples): + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, examples) + } + ] + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "No respose" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "") + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..bbdd2ca5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,127 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + instruction = """ + "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، + "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، +"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." + + """ + return [ + { + "role": "user", + "content": ( + f' \n{instruction}\n' + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "No respose" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "") + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels + + + diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..0c473744 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,197 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + +def prompt(input_sample): + instruction = """ +Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears," + "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt," + "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue)," + "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority," +"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification." + """ + return [ + { + "role": "user", + "content": ( + f'{instruction}\n' + + "Tweet: " + + input_sample + + "\nLabel: " + ), + } + ] + +# def post_process(response): +# if not response or 'error' in response or 'output' not in response: +# print("Error or missing output in response:", response) +# return "No respose" # Safely default to NOT_ADULT when unsure + +# label = response["output"].strip().lower() +# label = label.replace("", "").replace("", "") +# label = label.lower() +# label_mapping = { +# "بدون تقنية": "no technique", +# "تشويه": "Smears", +# "مبالغة/تقليل": "Exaggeration/Minimisation", +# "لغة محملة": "Loaded Language", +# "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", +# "التسمية/الملصقات": "Name calling/Labeling", +# "الشعارات": "Slogans", +# "التكرار": "Repetition", +# "الشك": "Doubt", +# "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", +# "التلويح بالعلم": "Flag-waving", +# "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", +# "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", +# "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", +# "النداء إلى السلطة": "Appeal to authority", +# "ماذا عن": "Whataboutism", +# "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", +# "الكليشيه القاطع للفكر": "Thought-terminating cliché", +# "التبسيط السببي": "Causal Oversimplification" +# } +# print("label: ", label) +# detected_labels = [] + +# if "no technique" in label: +# detected_labels.append(label_mapping["بدون تقنية"]) +# if "Smears" in label: +# detected_labels.append(label_mapping["تشويه"]) +# if "Exaggeration/Minimisation" in label or "مبالغة" in label: +# detected_labels.append(label_mapping["مبالغة/تقليل"]) +# if "Loaded Language" in label: +# detected_labels.append(label_mapping["لغة محملة"]) +# if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label: +# detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"]) +# if "Name calling/Labeling" in label or "التسمية" or "name" in label: +# detected_labels.append(label_mapping["التسمية/الملصقات"]) +# if "Slogans" in label: +# detected_labels.append(label_mapping["الشعارات"]) +# if "Repetition" in label: +# detected_labels.append(label_mapping["التكرار"]) +# if "Doubt" in label: +# detected_labels.append(label_mapping["الشك"]) +# if "Obfuscation, Intentional vagueness, Confusion" in label or "Obfuscation" in label or "Intentional vagueness" in label or "Confusion" in label: +# detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"]) +# if "Flag-waving" in label or "flag": +# detected_labels.append(label_mapping["التلويح بالعلم"]) +# if "Glittering generalities (Virtue)" in label or "الفضيلة" in label or "Glittering": +# detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"]) +# if "Misrepresentation of Someone's Position (Straw Man)" in label or "تحريف موقف شخص" in label: +# detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"]) +# if "Presenting Irrelevant Data (Red Herring)" in label or "عرض بيانات غير ذات صلة" in label: +# detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"]) +# if "Appeal to authority" in label: +# detected_labels.append(label_mapping["النداء إلى السلطة"]) +# if "Whataboutism" in label: +# detected_labels.append(label_mapping["ماذا عن"]) +# if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label: +# detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"]) +# if "Thought-terminating cliché" in label or "الكليشيه" in label: +# detected_labels.append(label_mapping["الكليشيه القاطع للفكر"]) +# if "Causal Oversimplification" in label or "التبسيط" in label: +# detected_labels.append(label_mapping["التبسيط السببي"]) + + +# return detected_labels + + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "No respose" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "") + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..e7cf5a1a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,122 @@ +import random +import re + +from llmebench.datasets import WANLP22T3PropagandaDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultilabelPropagandaTask + + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": WANLP22T3PropagandaDataset, + "dataset_args": {"techniques_path": "classes.txt"}, + "task": MultilabelPropagandaTask, + "model": AzureModel, + "model_args": { + "class_labels": [ + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", + ], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + instruction = """ + "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، + "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، + "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification". + """ + return [ + { + "role": "user", + "content": ( + f' \n{instruction}\n' + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + + + +def post_process(response): + if not response or 'error' in response or 'output' not in response: + print("Error or missing output in response:", response) + return "No respose" # Safely default to NOT_ADULT when unsure + + label = response["output"].strip().lower() + label = label.replace("", "").replace("", "") + label = label.lower() + + label_mapping = { + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", + "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", + "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification" + } + + detected_labels = [] + for arabic_label, english_label in label_mapping.items(): + if arabic_label in label: + detected_labels.append(english_label) + elif english_label.lower() in label: + detected_labels.append(english_label) + + print("Detected labels:", detected_labels) + + # Remove duplicates + detected_labels = list(set(detected_labels)) + + return detected_labels + diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..d0f5dea4 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py @@ -0,0 +1,96 @@ +import random +import re + +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "إعلان" if example["label"] == "__label__ADS" else "ليس إعلان" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\n" + + "التصنيف: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + + + + + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py new file mode 100755 index 00000000..c57c2568 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py @@ -0,0 +1,98 @@ +import random +import re + +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + +random.seed(1333) + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "spam" if example["label"] == "__label__ADS" else "not spam" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + label + + "\n\n" + ) + + # Append the tweet we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + + + + + +def prompt(input_sample, examples): + base_prompt = ( + f"If the following tweet can be classified as spam or contains an advertisemnt, write 'spam' without explnanation, otherwise write 'not spam' without explanantion.\n\n" + f"Provide only labels as a list of string.\n" + ) + + return [ + { + "role": "system", + "content": "You are an expert social media content analyst.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..eb0320cf --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py @@ -0,0 +1,94 @@ +import random +import re + +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + +random.seed(1333) + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "spam" if example["label"] == "__label__ADS" else "not spam" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\n" + + "التصنيف: " + + label + + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n" + + return out_prompt + + + + + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..7c0424c6 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,64 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: " + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": base_prompt + }, + ] + + + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..cac11026 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py @@ -0,0 +1,59 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "You are an AI assistant that helps people find information.", + }, + { + "role": "user", + "content": f"If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n\ntweet: {input_sample}\nlabel: ", + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..63654173 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,61 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SpamTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: " + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": base_prompt + }, + ] + + + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + label = out.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..dadc5685 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,72 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt += "إليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "نعم" if example["label"] == "__label__ADS" else "لا" + out_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n" + + return out_prompt +def prompt(input_sample, examples): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip().lower() + if "لا يمكن تحديد" in label: + return None + if "هي محتوى غير مرغوب فيه" in label or "التغريدة الأخيرة تحتوي على محتوى غير مرغوب فيه" in label or "والرابعة تحتويان على محتوى" in label or "هذه التغريدة تصنف ضمن الرسائل غير المرغوب فيها" in label or "هي على الأرجح إعلان" in label: + return "__label__ADS" + if "هي إعلان" in label or "قدمتها تحتوي على محتوى غير مرغوب" in label: + return "__label__ADS" + + #print("label", label) + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..e3cc83f7 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + example["label"] + + "\n\n" + ) + + # Append the tweet we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt="If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion." + return [ + { + "role": "user", + "content": ( + few_shot_prompt(input_sample, base_prompt, examples ) + ), + } + ] + +def post_process(response): + out = response["choices"][0]["message"]["content"] + j = out.find(".") + if j > 0: + out = out[0:j] + + label = out.replace("label:", "").strip().lower() + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label or "notads" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label or "ads" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..d8ef1cf4 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,68 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + + + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt += "إليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "__label__ADS" if example["label"] == "__label__ADS" else "__label__NOTADS" + out_prompt += ( + f"مثال {index + 1}:\n" + f"الجملة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += f"الجملة: '{input_sample}'\nالتصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = "صنف الجملة التالية كـ '__label__ADS' أو '__label__NOTADS'، أعد التسمية فقط بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + + label = label.replace("label:", "").strip() + print("label", label) + if "ليس" in label or "ليست" in label or "not" in label: + return "__label__NOTADS" + return "__label__ADS" diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..2bf036df --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,59 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + + + + +def prompt(input_sample): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": base_prompt + + "التغريدة: " + + input_sample + + "التصنيف: " + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip().lower() + #print("label", label) + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..1afb2932 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py @@ -0,0 +1,59 @@ + + + +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + + + + +def prompt(input_sample): + base_prompt = "If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n tweet: {input_sample}\nlabel: \n" + return [ + { + "role": "user", + "content": base_prompt + + "Tweet: " + + input_sample + + "Classification: " + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + + label = label.replace("label:", "").strip() + print("label", label) + if "ليس" in label or "ليست" in label or "NOT" in label: + return "__label__NOTADS" + return "__label__ADS" diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..528c262c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,59 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SpamTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 3, + }, + } + + + + + +def prompt(input_sample): + base_prompt = "صنف الجملة التالية كـ '__label__ADS' أو '__label__NOTADS'، أعد التسمية فقط بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": base_prompt + + "التغريدة: " + + input_sample + + "التصنيف: " + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].lower() + + label = label.replace("label:", "").strip() + label = label.lower() + print("label", label) + + if "لا يمكنني" in label: + return None + + if "ليس" in label or "ليست" in label or "not" in label: + return "__label__NOTADS" + return "__label__ADS" diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..6bad23e8 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,87 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt += "إليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "إعلان" if example["label"] == "__label__ADS" else "ليس إعلان" + out_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + + # Append the sentence we want the model to predict for but leave the label blank + out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = ( + "هل تحتوي التغريدة التالية على محتوى إعلان؟\n" + "حدد التصنيف المناسب بناءً على المحتوى:\n" + " - إذا كانت التغريدة تحتوي على إعلان، أجب بـ 'إعلان'.\n" + " - إذا كانت التغريدة لا تحتوي على إعلان، أجب بـ 'ليس إعلان'.\n" + "قدم التصنيف فقط.\n" + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] + +def post_process(response): + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "ليست" in label or "not" in label or "غير" in label or "no" in label or "ليس" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..2870bd01 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py @@ -0,0 +1,93 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + + for index, example in enumerate(examples): + label = "'spam'" if example["label"] == "__label__ADS" else "'not spam'" + out_prompt += ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + label + + "\n\n" + ) + + # Append the tweet we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + +def prompt(input_sample, examples): + base_prompt = ( + "Classify the following tweet as either 'spam' or 'not spam'. " + "Consider it 'spam' if it contains advertising or promotional content. " + "Respond with 'spam' or 'not spam' only, and do not provide any explanation." + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] + + +def post_process(response): + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + print("label: ", label) + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "notads" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..bd48b997 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,78 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n\n" + out_prompt += "إليك بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "spam" if example["label"] == "__label__ADS" else "not spam" + out_prompt += ( + f"مثال {index + 1}:\n" + f"التغريدة: '{example['input']}'\n" + f"التصنيف: {label}\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n" + + return out_prompt + + +def prompt(input_sample, examples): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples) + } + ] + +def post_process(response): + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..81f7346c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,72 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask +import random + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + + + + +def prompt(input_sample): + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" + return [ + { + "role": "user", + "content": base_prompt + + "التغريدة: " + + input_sample + + "التصنيف: " + } + ] + + +def post_process(response): + #print(response) + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..44ced30c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,63 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n tweet: {input_sample}\nlabel: \n", + }, + ] + +def post_process(response): + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label: + return None + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..e9096ed1 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,74 @@ +from llmebench.datasets import SpamDataset +from llmebench.models import AzureModel +from llmebench.tasks import SpamTask +import random + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": SpamDataset, + "task": SpamTask, + "model": AzureModel, + "model_args": { + "class_labels": ["__label__ADS", "__label__NOTADS"], + "max_tries": 100, + }, + } + + + + + +def prompt(input_sample): + base_prompt = ( + "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج / إعلان؟\n" + "أجب بـ 'yes' إذا كانت تحتوي على محتوى من هذا النوع، أو أجب بـ 'no' إذا لم تكن تحتوي على ذلك.\n" + "يرجى تقديم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n\n" + ) + return [ + { + "role": "user", + "content": base_prompt + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + } + ] + + +def post_process(response): + if "output" in response: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Error or missing output in response:", response) + return None + + label = label.replace("التصنيف:", "").strip().lower() + label = label.replace("label:", "").strip().lower() + if "لا يمكنني" in label or "لا توجد معلومات" in label: + return None + + #print("label", label) + if "لا أستطيع" in label or "I cannot" in label or "'ads' أو 'label__NOTADS'".lower() in label: + return None + if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + return "__label__NOTADS" + elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + return "__label__ADS" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..ca21cb59 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py @@ -0,0 +1,74 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt += "هنا بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "موضوعية" if example["label"] == "OBJ" else "غير موضوعية" + + out_prompt += ( + f"مثال {index}:\n" + f"التغريدة: {example['input']}\n" + f"التصنيف: {label}\n\n" + ) + + out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py new file mode 100755 index 00000000..6b9fe702 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py @@ -0,0 +1,80 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "Classify the 'tweet' as subjective or objective. Provide only label.\n" + ) + return [ + { + "role": "system", + "content": "You are an expert in tweet classification and analysis.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "objective" if example["label"] == "OBJ" else "subjective" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":" + + "\n" + + "tweet: " + + example["input"] + + "\nlabel: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "obj" in label or "موضوعي" in label: + return "OBJ" + elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + return "SUBJ" + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..4f40b6dd --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py @@ -0,0 +1,71 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'صنف التغريدة الى "subjective" أو "objective". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt += "هنا بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "objective" if example["label"] == "OBJ" else "subjective" + + out_prompt += ( + f"مثال {index}:\n" + f"التغريدة: {example['input']}\n" + f"التصنيف: {label}\n\n" + ) + + out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" + + return out_prompt + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "obj" in label or "موضوعي" in label: + return "OBJ" + elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + return "SUBJ" + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..684b75fc --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,56 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط دون أي تبرير إضافي.\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..d66e0eec --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py @@ -0,0 +1,58 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + prompt_string = ( + f'Classify the tweet as "objective" or "subjective". Provide only the label.\n\n' + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are an expert in tweet classification and analysis.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "obj" in label or "موضوعي" in label: + return "OBJ" + elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + return "SUBJ" + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..7eb4ba7e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,52 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": OpenAIModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + prompt_string = ( + 'صنف التغريدة الى "objective" أو "subjective". قم بتقديم التصنيف دون أي تبرير إضافي.\n' + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"].strip().lower() + if "obj" in label or "موضوعي" in label: + return "OBJ" + elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + return "SUBJ" + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..21224666 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,80 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "موضوعية" if example["label"] == "OBJ" else "غير موضوعية" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..71c384ed --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py @@ -0,0 +1,78 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "Objective" if example["label"] == "OBJ" else "Subjective" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":\n" + + "Tweet: " + + example["input"] + + "\nClassification: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "Tweet: " + input_sample + "\nClassification: \n" + + return out_prompt + + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None \ No newline at end of file diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..ddcb1821 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ("صنف الجملة على أنها subjective أو objective. قدم فقط التسمية.\n\nالنص: {input_sample}\التصنيف: ") + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "objective" if example["label"] == "OBJ" else "subjective" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + print(label) + label = label.lower() + if "subjective" in label or "sub" in label: + return "SUBJ" + return "OBJ" + return label diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..825527a6 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py @@ -0,0 +1,54 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + 'Classify the tweet as "Objective" or "Subjective". Provide only the classification.\n\n' + + "Tweet: " + + input_sample + + "Classification: " + ), + } + ] + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + #print(label) + + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..5031d68d --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,46 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + base_prompt = f"صنف الجملة على أنها subjective أو objective. قدم فقط التسمية.\n\nالنص: {input_sample}\nالتصنيف: " + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + ) + + if "subjective" in label or "subj" "ليس" in label or "غير" in label: + return "SUBJ" + return "OBJ" diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..49ab149f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py @@ -0,0 +1,51 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": FastChatModel, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + base_prompt = f'صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط.\n\nالتغريدة: {input_sample}\nالتصنيف: ' + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = ( + response["choices"][0]["message"]["content"].lower().replace(".", "").strip() + + ) + + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..69cc2cbc --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py @@ -0,0 +1,88 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' + ) + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "موضوعية" if example["label"] == "OBJ" else "غير موضوعية" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "\التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + return out_prompt + + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py new file mode 100755 index 00000000..bbcedcb5 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py @@ -0,0 +1,84 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' + ) + return [ + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "Here are some examples:\n\n" + for index, example in enumerate(examples): + label = "Objective" if example["label"] == "OBJ" else "Subjective" + + out_prompt = ( + out_prompt + + "Example " + + str(index) + + ":\n" + + "Tweet: " + + example["input"] + + "\nClassification: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "Tweet: " + input_sample + "\nClassification: \n" + + return out_prompt + + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..ec6b6fae --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py @@ -0,0 +1,84 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar/dev", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = 'صنف التغريدة الى "subjective" أو "objective". قم بتقديم التصنيف فقط.' + return [ + + { + "role": "user", + "content": few_shot_prompt(input_sample, base_prompt, examples), + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" + for index, example in enumerate(examples): + label = "objective" if example["label"] == "OBJ" else "subjective" + + out_prompt = ( + out_prompt + + "مثال " + + str(index) + + ":" + + "\n" + + "التغريدة: " + + example["input"] + + "التصنيف: " + + label + + "\n\n" + ) + + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + return out_prompt + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + + diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..9cb5163c --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,62 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط.\n\n' + + "التغريدة: " + + input_sample + + "التصنيف: " + ), + } + ] + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..386cbf1a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py @@ -0,0 +1,63 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + 'Classify the tweet as "Objective" or "Subjective". Provide only the classification.\n\n' + + "Tweet: " + + input_sample + + "Classification: " + ), + } + ] + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + + diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..a5202e02 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,62 @@ +from llmebench.datasets import CT23SubjectivityDataset +from llmebench.models import AzureModel +from llmebench.tasks import SubjectivityTask + + + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + } + + +def config(): + return { + "dataset": CT23SubjectivityDataset, + "task": SubjectivityTask, + "model": AzureModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 3, + }, + "general_args": {"test_split": "ar/dev"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + 'صنف الجملة الى "subjective" أو "objective". قم بتقديم التصنيف فقط.\n\n' + + "التغريدة: " + + input_sample + + "\nالتصنيف: " + ), + } + ] + +def post_process(response): + #print(response) + if "output" in response: + # if "content" in response["messages"]: + label = response["output"].strip() + label = label.replace("", "") + label = label.replace("", "") + label = label.lower() + else: + print("Response .. " + str(response)) + return None + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: + return "SUBJ" + elif "موضوعية" in label or "obj" in label: + return "OBJ" + else: + return None + + + From 7bd7f5a6ad1dc99a66171e834159b995947e6a1e Mon Sep 17 00:00:00 2001 From: MohamedBayan Date: Wed, 20 Nov 2024 15:10:17 +0300 Subject: [PATCH 2/3] format files --- .../Adult_BLOOMZ_ZeroShot.py | 0 .../Adult_GPT35_ZeroShot.py | 0 .../Adult_GPT4_FewShot.py | 0 .../Adult_GPT4_FewShot_Arabic.py | 19 ++-- .../Adult_GPT4_FewShot_English.py | 21 ++-- .../Adult_GPT4_FewShot_Mixed.py | 21 ++-- .../Adult_GPT4_ZeroShot.py | 0 .../Adult_GPT4_ZeroShot_Arabic.py | 27 +++-- .../Adult_GPT4_ZeroShot_English.py | 19 ++-- .../Adult_GPT4_ZeroShot_Mixed.py | 18 ++- .../Adult_JAIS13b_FewShot_Arabic.py | 42 +++++-- .../Adult_JAIS13b_FewShot_Mixed.py | 48 +++++--- .../Adult_JAIS13b_ZeroShot.py | 0 .../Adult_Llama3-8b_FewShot_Arabic.py | 25 +++-- .../Adult_Llama3-8b_FewShot_English.py | 29 +++-- .../Adult_Llama3-8b_FewShot_Mixed.py | 27 +++-- .../Adult_Llama3-8b_ZeroShot_Arabic.py | 25 +++-- .../Adult_Llama3-8b_ZeroShot_English.py | 25 +++-- .../Adult_Llama3-8b_ZeroShot_Mixed.py | 17 ++- .../adult_content_detection/Adult_Random.py | 0 .../CT22Attentionworthy_BLOOMZ_ZeroShot.py | 0 .../CT22Attentionworthy_GPT35_ZeroShot.py | 0 .../CT22Attentionworthy_GPT4_FewShot.py | 0 ...CT22Attentionworthy_GPT4_FewShot_Arabic.py | 32 ++---- ...T22Attentionworthy_GPT4_FewShot_English.py | 24 ++-- .../CT22Attentionworthy_GPT4_FewShot_Mixed.py | 15 ++- .../CT22Attentionworthy_GPT4_ZeroShot.py | 0 ...T22Attentionworthy_GPT4_ZeroShot_Arabic.py | 12 +- ...22Attentionworthy_GPT4_ZeroShot_English.py | 18 ++- ...CT22Attentionworthy_GPT4_ZeroShot_Mixed.py | 18 ++- ...2Attentionworthy_JAIS13b_FewShot_Arabic.py | 15 +-- ...Attentionworthy_JAIS13b_FewShot_English.py | 20 ++-- ...22Attentionworthy_JAIS13b_FewShot_Mixed.py | 17 ++- .../CT22Attentionworthy_JAIS13b_ZeroShot.py | 0 ...Attentionworthy_JAIS13b_ZeroShot_Arabic.py | 13 +-- ...ttentionworthy_JAIS13b_ZeroShot_English.py | 21 ++-- ...2Attentionworthy_JAIS13b_ZeroShot_Mixed.py | 14 +-- ...ttentionworthy_Llama3-8b_FewShot_Arabic.py | 18 +-- ...tentionworthy_Llama3-8b_FewShot_English.py | 19 ++-- ...Attentionworthy_Llama3-8b_FewShot_Mixed.py | 19 ++-- ...tentionworthy_Llama3-8b_ZeroShot_Arabic.py | 14 +-- ...entionworthy_Llama3-8b_ZeroShot_English.py | 22 ++-- ...ttentionworthy_Llama3-8b_ZeroShot_Mixed.py | 15 ++- .../CT22Attentionworthy_Random.py | 0 .../CT22Checkworthiness_BLOOMZ_ZeroShot.py | 0 .../CT22Checkworthiness_GPT35_ZeroShot.py | 0 .../CT22Checkworthiness_GPT4_FewShot.py | 0 ...CT22Checkworthiness_GPT4_FewShot_Arabic.py | 25 +++-- ...T22Checkworthiness_GPT4_FewShot_English.py | 26 +++-- .../CT22Checkworthiness_GPT4_FewShot_Mixed.py | 23 ++-- .../CT22Checkworthiness_GPT4_ZeroShot.py | 0 ...T22Checkworthiness_GPT4_ZeroShot_Arabic.py | 26 +++-- ...22Checkworthiness_GPT4_ZeroShot_English.py | 26 +++-- ...CT22Checkworthiness_GPT4_ZeroShot_Mixed.py | 30 ++--- ...2Checkworthiness_JAIS13b_FewShot_Arabic.py | 28 ++--- ...Checkworthiness_JAIS13b_FewShot_English.py | 26 ++--- ...22Checkworthiness_JAIS13b_FewShot_Mixed.py | 26 ++--- .../CT22Checkworthiness_JAIS13b_ZeroShot.py | 0 ...Checkworthiness_JAIS13b_ZeroShot_Arabic.py | 27 ++--- ...2Checkworthiness_JAIS13b_ZeroShot_Mixed.py | 22 ++-- ...heckworthiness_Llama3-8b_FewShot_Arabic.py | 32 +++--- ...eckworthiness_Llama3-8b_FewShot_English.py | 30 ++--- ...Checkworthiness_Llama3-8b_FewShot_Mixed.py | 28 +++-- ...heckworthiness_Llama3-8b_ZeroShot_Mixed.py | 25 +++-- ...eckworthiness_Llama3-8b_Zeroshot_Arabic.py | 28 ++--- ...ckworthiness_Llama3-8b_Zeroshot_English.py | 26 +++-- .../CT22Checkworthiness_Random.py | 0 .../CT22Claim_BLOOMZ_ZeroShot.py | 0 .../CT22Claim_GPT35_ZeroShot.py | 0 .../claim_detection/CT22Claim_GPT4_FewShot.py | 0 .../CT22Claim_GPT4_FewShot_Arabic.py | 18 +-- .../CT22Claim_GPT4_FewShot_English.py | 8 +- .../CT22Claim_GPT4_FewShot_Mixed.py | 13 +-- .../CT22Claim_GPT4_ZeroShot.py | 0 .../CT22Claim_GPT4_ZeroShot_Arabic.py | 16 +-- .../CT22Claim_GPT4_ZeroShot_English.py | 8 +- .../CT22Claim_GPT4_ZeroShot_Mixed.py | 11 +- .../CT22Claim_JAIS13b_FewShot_Arabic.py | 25 +++-- .../CT22Claim_JAIS13b_FewShot_English.py | 45 ++++---- .../CT22Claim_JAIS13b_FewShot_Mixed.py | 45 +++++--- .../CT22Claim_JAIS13b_ZeroShot.py | 0 .../CT22Claim_JAIS13b_ZeroShot_Arabic.py | 9 +- .../CT22Claim_JAIS13b_ZeroShot_English.py | 15 +-- .../CT22Claim_JAIS13b_ZeroShot_Mixed.py | 31 +++--- .../CT22Claim_Llama3-8b_FewShot_Arabic.py | 29 +++-- .../CT22Claim_Llama3-8b_FewShot_English.py | 29 ++--- .../CT22Claim_Llama3-8b_FewShot_Mixed.py | 31 ++++-- .../CT22Claim_Llama3-8b_ZeroShot_Arabic.py | 22 ++-- .../CT22Claim_Llama3-8b_ZeroShot_English.py | 20 ++-- .../CT22Claim_Llama3-8b_ZeroShot_Mixed.py | 20 +++- .../claim_detection/CT22Claim_Random.py | 0 .../ANSFactuality_BLOOMZ_ZeroShot.py | 0 .../ANSFactuality_GPT35_ZeroShot.py | 0 .../factuality/ANSFactuality_GPT4_FewShot.py | 0 .../ANSFactuality_GPT4_FewShot_Arabic.py | 12 +- .../ANSFactuality_GPT4_FewShot_English.py | 6 +- .../ANSFactuality_GPT4_FewShot_Mixed.py | 10 +- .../factuality/ANSFactuality_GPT4_ZeroShot.py | 0 .../ANSFactuality_GPT4_ZeroShot_Arabic.py | 8 +- .../ANSFactuality_GPT4_ZeroShot_English.py | 10 +- .../ANSFactuality_GPT4_ZeroShot_Mixed.py | 7 +- .../ANSFactuality_JAIS13b_FewShot_Arabic.py | 12 +- .../ANSFactuality_JAIS13b_FewShot_English.py | 10 +- .../ANSFactuality_JAIS13b_FewShot_Mixed.py | 9 +- .../ANSFactuality_JAIS13b_ZeroShot.py | 0 .../ANSFactuality_JAIS13b_ZeroShot_Arabic.py | 11 +- .../ANSFactuality_JAIS13b_ZeroShot_English.py | 8 +- .../ANSFactuality_JAIS13b_ZeroShot_Mixed.py | 12 +- .../ANSFactuality_Llama3-8b_FewShot_Arabic.py | 9 +- ...ANSFactuality_Llama3-8b_FewShot_English.py | 21 +--- .../ANSFactuality_Llama3-8b_FewShot_Mixed.py | 8 +- ...ANSFactuality_Llama3-8b_ZeroShot_Arabic.py | 10 +- ...NSFactuality_Llama3-8b_ZeroShot_English.py | 21 +--- .../ANSFactuality_Llama3-8b_ZeroShot_Mixed.py | 12 +- .../factuality/ANSFactuality_Random.py | 0 .../COVID19Factuality_BLOOMZ_ZeroShot.py | 0 .../COVID19Factuality_GPT35_ZeroShot.py | 0 .../COVID19Factuality_GPT4_FewShot.py | 0 .../COVID19Factuality_GPT4_ZeroShot.py | 0 .../COVID19Factuality_JAIS13b_ZeroShot.py | 0 .../factuality/COVID19Factuality_Random.py | 0 .../UnifiedFCFactuality_BLOOMZ_ZeroShot.py | 0 .../UnifiedFCFactuality_GPT35_ZeroShot.py | 0 .../UnifiedFCFactuality_GPT4_FewShot.py | 0 .../UnifiedFCFactuality_GPT4_ZeroShot.py | 0 .../UnifiedFCFactuality_JAIS13b_ZeroShot.py | 0 .../factuality/UnifiedFCFactuality_Random.py | 0 .../CT22Harmful_BLOOMZ_ZeroShot.py | 0 .../CT22Harmful_GPT35_ZeroShot.py | 0 .../CT22Harmful_GPT4_FewShot.py | 0 .../CT22Harmful_GPT4_FewShot_Arabic.py | 42 +++---- .../CT22Harmful_GPT4_FewShot_English.py | 41 +++---- .../CT22Harmful_GPT4_FewShot_Mixed.py | 39 +++---- .../CT22Harmful_GPT4_ZeroShot.py | 0 .../CT22Harmful_GPT4_ZeroShot_Arabic.py | 41 +++---- .../CT22Harmful_GPT4_ZeroShot_English.py | 41 +++---- .../CT22Harmful_GPT4_ZeroShot_Mixed.py | 39 +++---- .../CT22Harmful_JAIS13b_FewShot_Arabic.py | 30 +++-- .../CT22Harmful_JAIS13b_FewShot_English.py | 32 +++--- .../CT22Harmful_JAIS13b_FewShot_Mixed.py | 53 +++++---- .../CT22Harmful_JAIS13b_ZeroShot.py | 0 .../CT22Harmful_JAIS13b_ZeroShot_Arabic.py | 24 ++-- .../CT22Harmful_JAIS13b_ZeroShot_English.py | 21 ++-- .../CT22Harmful_JAIS13b_ZeroShot_Mixed.py | 21 ++-- .../CT22Harmful_Llama3-8b_FewShot_Arabic.py | 28 ++--- .../CT22Harmful_Llama3-8b_FewShot_English.py | 30 ++--- .../CT22Harmful_Llama3-8b_FewShot_Mixed.py | 25 +++-- .../CT22Harmful_Llama3-8b_ZeroShot_Arabic.py | 20 ++-- .../CT22Harmful_Llama3-8b_ZeroShot_English.py | 23 ++-- .../CT22Harmful_Llama3-8b_ZeroShot_Mixed.py | 15 ++- .../CT22Harmful_Random.py | 0 .../OSACT4SubtaskB_BLOOMZ_ZeroShot.py | 0 .../OSACT4SubtaskB_GPT35_ZeroShot.py | 0 .../OSACT4SubtaskB_GPT4_FewShot.py | 0 .../OSACT4SubtaskB_GPT4_FewShot_Arabic.py | 38 ++++--- .../OSACT4SubtaskB_GPT4_FewShot_English.py | 31 ++++-- .../OSACT4SubtaskB_GPT4_FewShot_Mixed.py | 38 ++++--- .../OSACT4SubtaskB_GPT4_ZeroShot.py | 0 .../OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py | 42 ++++--- .../OSACT4SubtaskB_GPT4_ZeroShot_English.py | 31 ++++-- .../OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py | 34 ++++-- .../OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py | 10 +- .../OSACT4SubtaskB_JAIS13b_FewShot_English.py | 18 +-- .../OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py | 7 +- .../OSACT4SubtaskB_JAIS13b_ZeroShot.py | 0 .../OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py | 24 ++-- ...OSACT4SubtaskB_JAIS13b_ZeroShot_English.py | 17 ++- .../OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py | 5 +- ...OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py | 25 +++-- ...SACT4SubtaskB_Llama3-8b_FewShot_English.py | 24 ++-- .../OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py | 27 +++-- ...SACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py | 22 ++-- ...ACT4SubtaskB_Llama3-8b_ZeroShot_English.py | 22 ++-- ...OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py | 20 +++- .../hate_speech/OSACT4SubtaskB_Random.py | 0 .../OSACT4SubtaskA_BLOOMZ_ZeroShot.py | 0 .../OSACT4SubtaskA_GPT35_ZeroShot.py | 0 .../OSACT4SubtaskA_GPT4_FewShot.py | 0 .../OSACT4SubtaskA_GPT4_FewShot_Arabic.py | 22 ++-- .../OSACT4SubtaskA_GPT4_FewShot_English.py | 18 +-- .../OSACT4SubtaskA_GPT4_FewShot_Mixed.py | 21 ++-- .../OSACT4SubtaskA_GPT4_ZeroShot.py | 0 .../OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py | 20 ++-- .../OSACT4SubtaskA_GPT4_ZeroShot_English.py | 21 ++-- .../OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py | 17 +-- .../OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py | 15 +-- .../OSACT4SubtaskA_JAIS13b_FewShot_English.py | 20 ++-- .../OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py | 19 ++-- .../OSACT4SubtaskA_JAIS13b_ZeroShot.py | 0 .../OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py | 11 +- ...OSACT4SubtaskA_JAIS13b_ZeroShot_English.py | 14 +-- .../OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py | 21 ++-- ...OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py | 19 ++-- ...SACT4SubtaskA_Llama3-8b_FewShot_English.py | 18 ++- .../OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py | 13 +-- ...SACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py | 12 +- ...ACT4SubtaskA_Llama3-8b_ZeroShot_English.py | 10 +- ...OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py | 19 ++-- .../OSACT4SubtaskA_Random.py | 0 .../propaganda/ArMemes_GPT4V_ZeroShot.py | 0 .../ArMemes_Image_GPT4V_ZeroShot.py | 0 .../propaganda/ArMemes_MM_GPT4V_ZeroShot.py | 0 .../propaganda/ArMemes_Text_GPT4V_ZeroShot.py | 0 .../propaganda/WANLP22T3_BLOOMZ_ZeroShot.py | 0 .../propaganda/WANLP22T3_GPT35_ZeroShot.py | 0 .../propaganda/WANLP22T3_GPT4_FewShot.py | 0 .../WANLP22T3_GPT4_FewShot_Arabic.py | 81 +++++++++----- .../WANLP22T3_GPT4_FewShot_English.py | 17 +-- .../WANLP22T3_GPT4_FewShot_Mixed.py | 48 +++++--- .../propaganda/WANLP22T3_GPT4_ZeroShot.py | 0 .../WANLP22T3_GPT4_ZeroShot_Arabic.py | 52 +++++---- .../WANLP22T3_GPT4_ZeroShot_English.py | 22 ++-- .../WANLP22T3_GPT4_ZeroShot_Mixed.py | 56 ++++++---- .../WANLP22T3_JAIS13b_FewShot_Arabic.py | 32 +++--- .../WANLP22T3_JAIS13b_FewShot_English.py | 49 +++++---- .../WANLP22T3_JAIS13b_FewShot_Mixed.py | 22 ++-- .../propaganda/WANLP22T3_JAIS13b_ZeroShot.py | 0 .../WANLP22T3_JAIS13b_ZeroShot_Arabic.py | 18 +-- .../WANLP22T3_JAIS13b_ZeroShot_English.py | 10 +- .../WANLP22T3_JAIS13b_ZeroShot_Mixed.py | 17 +-- .../WANLP22T3_Llama3-8b_FewShot_Arabic.py | 104 ++++++++++++------ .../WANLP22T3_Llama3-8b_FewShot_English.py | 24 ++-- .../WANLP22T3_Llama3-8b_FewShot_Mixed.py | 22 ++-- .../WANLP22T3_Llama3-8b_ZeroShot_Arabic.py | 25 +---- .../WANLP22T3_Llama3-8b_ZeroShot_English.py | 25 ++--- .../WANLP22T3_Llama3-8b_ZeroShot_Mixed.py | 21 +--- .../propaganda/WANLP22T3_Random.py | 0 .../spam/Spam_BLOOMZ_ZeroShot.py | 0 .../spam/Spam_GPT35_ZeroShot.py | 0 .../spam/Spam_GPT4_FewShot_Arabic.py | 38 ++++--- .../spam/Spam_GPT4_FewShot_English.py | 36 +++--- .../spam/Spam_GPT4_FewShot_Mixed.py | 34 +++--- .../spam/Spam_GPT4_ZeroShot.py | 0 .../spam/Spam_GPT4_ZeroShot_Arabic.py | 42 +++---- .../spam/Spam_GPT4_ZeroShot_English.py | 29 +++-- .../spam/Spam_GPT4_ZeroShot_Mixed.py | 38 ++++--- .../spam/Spam_JAIS13b_FewShot_Arabic.py | 45 +++++--- .../spam/Spam_JAIS13b_FewShot_English.py | 36 +++--- .../spam/Spam_JAIS13b_FewShot_Mixed.py | 15 +-- .../spam/Spam_JAIS13b_ZeroShot.py | 0 .../spam/Spam_JAIS13b_ZeroShot_Arabic.py | 36 +++--- .../spam/Spam_JAIS13b_ZeroShot_English.py | 19 +--- .../spam/Spam_JAIS13b_ZeroShot_Mixed.py | 16 +-- .../spam/Spam_Llama3-8b_FewShot_Arabic.py | 32 ++++-- .../spam/Spam_Llama3-8b_FewShot_English.py | 38 ++++--- .../spam/Spam_Llama3-8b_FewShot_Mixed.py | 33 ++++-- .../spam/Spam_Llama3-8b_ZeroShot_Arabic.py | 41 +++---- .../spam/Spam_Llama3-8b_ZeroShot_English.py | 32 ++++-- .../spam/Spam_Llama3-8b_ZeroShot_Mixed.py | 42 ++++--- .../spam/Spam_Random.py | 0 .../CT23Subjectivity_BLOOMZ_ZeroShot.py | 0 .../CT23Subjectivity_GPT35_ZeroShot.py | 0 .../CT23Subjectivity_GPT4_FewShot.py | 0 .../CT23Subjectivity_GPT4_FewShot_Arabic.py | 16 +-- .../CT23Subjectivity_GPT4_FewShot_English.py | 16 +-- .../CT23Subjectivity_GPT4_FewShot_Mixed.py | 22 ++-- .../CT23Subjectivity_GPT4_ZeroShot.py | 0 .../CT23Subjectivity_GPT4_ZeroShot_Arabic.py | 9 +- .../CT23Subjectivity_GPT4_ZeroShot_English.py | 16 +-- .../CT23Subjectivity_GPT4_ZeroShot_Mixed.py | 16 +-- ...CT23Subjectivity_JAIS13b_FewShot_Arabic.py | 16 +-- ...T23Subjectivity_JAIS13b_FewShot_English.py | 17 +-- .../CT23Subjectivity_JAIS13b_FewShot_Mixed.py | 10 +- ...23Subjectivity_JAIS13b_ZeroShot_English.py | 11 +- ...CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py | 6 +- .../CT23Subjectivity_Jais13b_ZeroShot.py | 0 ...T23Subjectivity_Jais13b_ZeroShot_Arabic.py | 9 +- ...23Subjectivity_LLama3-8b_FewShot_Arabic.py | 16 +-- ...3Subjectivity_LLama3-8b_FewShot_English.py | 15 +-- ...T23Subjectivity_LLama3-8b_FewShot_Mixed.py | 12 +- ...3Subjectivity_LLama3-8b_ZeroShot_Arabic.py | 10 +- ...Subjectivity_LLama3-8b_ZeroShot_English.py | 11 +- ...23Subjectivity_LLama3-8b_ZeroShot_Mixed.py | 10 +- .../subjectivity/CT23Subjectivity_Random.py | 0 .../ThatiARSubjectivity_GPT4_FewShot.py | 0 ...atiARSubjectivity_GPT4_FewShot_ENprompt.py | 0 .../ThatiARSubjectivity_GPT4_ZeroShot.py | 0 ...atiARSubjectivity_GPT4_ZeroShot_explain.py | 0 .../ThatiARSubjectivity_Llama3_8b_ZeroShot.py | 0 ...ThatiARSubjectivity_Mistral_7b_ZeroShot.py | 0 .../ThatiARSubjectivity_Sonnet_ZeroShot_en.py | 0 ...tiARSubjectivity_VLLMLlama3_8b_ZeroShot.py | 0 282 files changed, 2280 insertions(+), 2249 deletions(-) mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_GPT4V_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Image_GPT4V_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_MM_GPT4V_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Text_GPT4V_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Random.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot_ENprompt.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot_explain.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Llama3_8b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Mistral_7b_ZeroShot.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py mode change 100644 => 100755 assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_VLLMLlama3_8b_ZeroShot.py diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py index 3661556f..98ef6638 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AdultTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": AdultDataset, @@ -83,7 +77,16 @@ def post_process(response): label = label.replace("label:", "").strip() label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py index f17885a9..38d00b5c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AdultTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": AdultDataset, @@ -80,9 +74,18 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py index 11fd3385..cf464ddd 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -55,6 +51,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = f'بالنظر إلى التغريدة التالية، صنفها على أنها "ADULT" أو "NOT_ADULT" بناء على محتوى التغريدة' return [ @@ -68,6 +65,7 @@ def prompt(input_sample, examples): }, ] + def post_process(response): if not response: return None @@ -80,9 +78,18 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py index fff31f29..50446e5c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AdultTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": AdultDataset, @@ -32,10 +26,10 @@ def config(): def prompt(input_sample): prompt_string = ( - "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n" - + "التغريدة: " - + input_sample - + "\nالتصنيف: " + "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n" + + "التغريدة: " + + input_sample + + "\nالتصنيف: " ) return [ { @@ -57,7 +51,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py index 461c56a9..89899524 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AdultTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": AdultDataset, @@ -57,7 +51,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py index f9c9956e..80b05f79 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): prompt_string = ( f'هل تحتوي التغريدة التالية على لغة للبالغين؟ صنفها كـ "ADULT" أو "NOT_ADULT" بناءً على محتوى التغريدة.\n\n' @@ -56,7 +53,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label or "not_adult" in label or "not adult" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + or "not_adult" in label + or "not adult" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label: return "ADULT" diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py index 74c96636..0542da23 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. " @@ -40,11 +37,11 @@ def few_shot_prompt(input_sample, examples): f"التصنيف: {label}\n\n" ) base_prompt += ( - f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" - f"التصنيف: " + f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: " ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -53,6 +50,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] @@ -62,17 +60,39 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label or "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py index e4a333d8..06b4a500 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AdultTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": AdultDataset, @@ -25,10 +23,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" - ) + base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" for index, example in enumerate(examples): label = "adult" if example["label"] == "ADULT" else "not adult" base_prompt += ( @@ -36,12 +33,10 @@ def few_shot_prompt(input_sample, examples): f"التغريدة: '{example['input']}'\n" f"التصنيف: {label}\n\n" ) - base_prompt += ( - f"صنف الآن التغريدة الجديدة:\nجملة: '{input_sample}'\n" - f"التصنيف: " - ) + base_prompt += f"صنف الآن التغريدة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: " return base_prompt + def prompt(input_sample, examples): return [ { @@ -50,6 +45,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("التصنيف:", "").strip() @@ -58,17 +54,39 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label or "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py index 1c8b080a..fd9a7cfd 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. " @@ -40,8 +37,7 @@ def few_shot_prompt(input_sample, examples): f"التصنيف: {label}\n\n" ) base_prompt += ( - f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" - f"التصنيف: " + f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: " ) return base_prompt @@ -56,9 +52,9 @@ def prompt(input_sample, examples): def post_process(response): - #if not response or 'error' in response or 'output' not in response: - #print("Error or missing output in response:", response) - #return "NOT_ADULT" # Safely default to NOT_ADULT when unsure + # if not response or 'error' in response or 'output' not in response: + # print("Error or missing output in response:", response) + # return "NOT_ADULT" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("التصنيف:", "").strip() @@ -67,7 +63,14 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py index dbce9c74..89e945de 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py @@ -3,24 +3,17 @@ from llmebench.tasks import AdultTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { - "dataset": AdultDataset, "task": AdultTask, "model": AzureModel, @@ -35,6 +28,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -63,16 +57,14 @@ def prompt(input_sample, examples): return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] def post_process(response): - - if not response or 'error' in response or 'output' not in response: + + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "NOT_ADULT" # Safely default to NOT_ADULT when unsure @@ -83,9 +75,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py index 9cf7d89a..460cf13c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AdultTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": AdultDataset, @@ -25,10 +23,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" - ) + base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n" for index, example in enumerate(examples): label = "adult" if example["label"] == "ADULT" else "not adult" base_prompt += ( @@ -36,12 +33,10 @@ def few_shot_prompt(input_sample, examples): f"جملة: '{example['input']}'\n" f"التصنيف: {label}\n\n" ) - base_prompt += ( - f"صنف الآن الجملة الجديدة:\nجملة: '{input_sample}'\n" - f"التصنيف: " - ) + base_prompt += f"صنف الآن الجملة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: " return base_prompt + def prompt(input_sample, examples): return [ { @@ -50,6 +45,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["output"].strip().lower() @@ -59,9 +55,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py index 65cc5771..8eddcdd7 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -40,10 +37,11 @@ def prompt(input_sample): } ] + def post_process(response): - #if not response or 'error' in response or 'output' not in response: - #print("Error or missing output in response:", response) - #return "No Response" + # if not response or 'error' in response or 'output' not in response: + # print("Error or missing output in response:", response) + # return "No Response" label = response["output"].strip().lower() label = label.replace("التصنيف:", "").strip() @@ -52,9 +50,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py index 45aa4d62..846965f5 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -41,10 +38,11 @@ def prompt(input_sample): } ] + def post_process(response): - #if not response or 'error' in response or 'output' not in response: - #print("Error or missing output in response:", response) - #return "NOT_ADULT" # Safely default to NOT_ADULT when unsure + # if not response or 'error' in response or 'output' not in response: + # print("Error or missing output in response:", response) + # return "NOT_ADULT" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("التصنيف:", "").strip() @@ -53,9 +51,16 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py index 1409183c..e540e19a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AdultTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": AdultDataset, @@ -25,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -38,6 +37,7 @@ def prompt(input_sample): } ] + def post_process(response): label = response["output"].strip().lower() label = label.replace("", "").replace("", "").lower() @@ -46,9 +46,16 @@ def post_process(response): label = label.replace("label:", "").strip() label = label.lower() - if "ليس" in label or "ليست" in label or "not" in label or "no" in label or "غير" in label or "لا" in label: + if ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" in label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label: return "ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py index 7742c116..ee13a4eb 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -69,23 +63,13 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt - - - - - - - - - - def prompt(input_sample, examples): - base_prompt = f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: تناقش_الإجراء_المتخذ، ضارة، تناقش_العلاج، تطرح_سؤال، غير_مثيرة_للاهتمام، آخر، تلوم_السلطات، تحتوي_على_نصيحة، تدعو_لإجراء. قدم التصنيف فقط.\n\n' + base_prompt = f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: تناقش_الإجراء_المتخذ، ضارة، تناقش_العلاج، تطرح_سؤال، غير_مثيرة_للاهتمام، آخر، تلوم_السلطات، تحتوي_على_نصيحة، تدعو_لإجراء. قدم التصنيف فقط.\n\n' return [ { "role": "system", - "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", }, { "role": "user", @@ -98,11 +82,11 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = ( - label.replace(" - ", ", ") - .replace(",", "") - .replace(".", "") - .replace("label:", "") - .replace("التصنيف: ", "") + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") + .replace("التصنيف: ", "") ) label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py index 143ca5ca..6d28d7a7 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -45,12 +39,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: out_prompt = ( - out_prompt - + "tweet: " - + example["input"] - + "\n" - + example["label"] - + "\n\n" + out_prompt + "tweet: " + example["input"] + "\n" + example["label"] + "\n\n" ) # Append the sentence we want the model to predict for but leave the Label blank @@ -64,7 +53,7 @@ def prompt(input_sample, examples): return [ { "role": "system", - "content": "You are social media expert. You can annotate and classify tweets." + "content": "You are social media expert. You can annotate and classify tweets.", }, { "role": "user", @@ -85,10 +74,12 @@ def post_process(response): if "**Category:** " in label: label = label.split("**Category:** ")[-1] - if '**Annotation:** ' in label: + if "**Annotation:** " in label: label = label.split("**Annotation:** ")[-1] - label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + label = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace if label.startswith("no"): label_fixed = "no_not_interesting" @@ -122,4 +113,3 @@ def post_process(response): label_fixed = None return label_fixed - diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py index 9459930d..9721923f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -54,13 +50,14 @@ def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" return out_prompt + def prompt(input_sample, examples): base_prompt = f'هل تتطلب "التغريدة" انتباه الصحفيين، ومدققي الحقائق، والجهات الحكومية؟ قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: yes_discusses_action_taken, harmful, yes_discusses_cure, yes_asks_question, no_not_interesting, yes_other, yes_blame_authorities, yes_contains_advice, yes_calls_for_action. قدم التصنيف فقط.\n\n' return [ { "role": "system", - "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", + "content": "أنت خبير في وسائل التواصل الاجتماعي. يمكنك تحليل و تصنيف التغريدات.", }, { "role": "user", @@ -81,10 +78,12 @@ def post_process(response): if "**Category:** " in label: label = label.split("**Category:** ")[-1] - if '**Annotation:** ' in label: + if "**Annotation:** " in label: label = label.split("**Annotation:** ")[-1] - label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + label = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace if label.startswith("no"): label_fixed = "no_not_interesting" diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py index 2d29026a..2a20b98f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -67,7 +61,7 @@ def post_process(response): .replace(",", "") .replace(".", "") .replace("label:", "") - .replace("التصنيف: " ,"") + .replace("التصنيف: ", "") ) label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة @@ -101,5 +95,5 @@ def post_process(response): label_fixed = "yes_calls_for_action" else: label_fixed = None - + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py index 62b87274..81b56e4b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -71,8 +65,10 @@ def post_process(response): ) label_fixed = None - label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace - + label_fixed = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace + # Update conditions to match labels without surrounding whitespace if label_fixed.startswith("no"): label_fixed = "no_not_interesting" @@ -81,7 +77,9 @@ def post_process(response): elif "yes_harmful" in label_fixed: label_fixed = "harmful" elif label_fixed.startswith("yes"): - label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + label_fixed = ( + label_fixed.strip() + ) # Keep the original label if it starts with "yes" elif "yes_blame_authoritie" in label_fixed: label_fixed = "yes_blame_authoritie" elif "yes_discusses_action_taken" in label_fixed: diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py index d673c19c..4b6a4575 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -61,13 +57,15 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = ( - label.replace(" - ", ", ") - .replace(",", "") - .replace(".", "") - .replace("label:", "") + label.replace(" - ", ", ") + .replace(",", "") + .replace(".", "") + .replace("label:", "") ) - label = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace + label = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace if label.startswith("no"): label_fixed = "no_not_interesting" diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py index 48eb9c22..dbaf5f8a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -68,18 +62,19 @@ def prompt(input_sample, examples): }, ] + import re + def post_process(response): print(response) label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة - + label_fixed = label_fixed.replace("التصنيف:", "") if label_fixed.startswith("لا"): label_fixed = "no_not_interesting" @@ -111,5 +106,5 @@ def post_process(response): label_fixed = "yes_calls_for_action" else: label_fixed = None - + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py index c862ace5..a690c2c2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -71,15 +65,17 @@ def prompt(input_sample, examples): import re + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") - label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace - + label_fixed = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace + # Update conditions to match labels without surrounding whitespace if label_fixed.startswith("no"): label_fixed = "no_not_interesting" @@ -88,7 +84,9 @@ def post_process(response): elif "yes_harmful" in label_fixed: label_fixed = "harmful" elif label_fixed.startswith("yes"): - label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + label_fixed = ( + label_fixed.strip() + ) # Keep the original label if it starts with "yes" elif "yes_blame_authoritie" in label_fixed: label_fixed = "yes_blame_authoritie" elif "yes_discusses_action_taken" in label_fixed: diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py index 5abdd7a0..9722f288 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -38,6 +34,7 @@ def config(): "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + # def few_shot_prompt(input_sample, examples): # base_prompt = ( # 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' @@ -58,6 +55,7 @@ def config(): # ) # return base_prompt + # def prompt(input_sample, examples): # return [ # { @@ -86,8 +84,8 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): base_prompt = ( 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' - 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' - 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، " + "yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n" "إليك بعض الأمثلة:\n\n" ) return [ @@ -97,8 +95,10 @@ def prompt(input_sample, examples): }, ] + import re + def post_process(response): print(response) label = response["choices"][0]["message"]["content"] @@ -106,10 +106,9 @@ def post_process(response): label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة - + label_fixed = label_fixed.replace("التصنيف:", "") if label.startswith("no"): diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py index ea0a8b0b..c690d52b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Arabic.py @@ -3,20 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -47,23 +42,23 @@ def prompt(input_sample): f"التصنيف: \n" ) return [ - { "role": "user", "content": prompt_string, }, ] + import re + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") - + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة label_fixed = label_fixed.replace("التصنيف:", "") diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py index 1b7a6023..564438d9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_English.py @@ -3,20 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -48,25 +43,27 @@ def prompt(input_sample): f"label: \n" ) return [ - { "role": "user", "content": prompt_string, }, ] + import re + def post_process(response): print(response) label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") - label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace - + label_fixed = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace + # Update conditions to match labels without surrounding whitespace if label_fixed.startswith("no"): label_fixed = "no_not_interesting" @@ -75,7 +72,9 @@ def post_process(response): elif "yes_harmful" in label_fixed: label_fixed = "harmful" elif label_fixed.startswith("yes"): - label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + label_fixed = ( + label_fixed.strip() + ) # Keep the original label if it starts with "yes" elif "yes_blame_authoritie" in label_fixed: label_fixed = "yes_blame_authoritie" elif "yes_discusses_action_taken" in label_fixed: diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py index b81c916e..cb54d04e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_JAIS13b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AttentionworthyTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -36,11 +34,12 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): base_prompt = ( 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' - 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' - 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، " + "yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n" f"التغريدة: '{input_sample}'\n" "التصنيف: " ) @@ -51,10 +50,10 @@ def prompt(input_sample): }, ] -import re import re + def post_process(response): print(response) label = response["choices"][0]["message"]["content"] @@ -62,10 +61,9 @@ def post_process(response): label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة - + label_fixed = label_fixed.replace("التصنيف:", "") if label.startswith("no"): diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py index 34bb191a..6e2b057d 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -69,13 +63,9 @@ def prompt(input_sample, examples): ] - - - - - import re + def post_process(response): print(response) if "output" in response: @@ -86,9 +76,9 @@ def post_process(response): else: print("استجابة .. " + str(response)) label = "" - + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة - + label_fixed = label_fixed.replace("التصنيف:", "") if label_fixed.startswith("لا"): label_fixed = "no_not_interesting" diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py index 138a755a..6863db02 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -71,6 +65,7 @@ def prompt(input_sample, examples): import re + def post_process(response): print(response) if "output" in response: @@ -81,9 +76,11 @@ def post_process(response): else: print("Response .. " + str(response)) label = "" - - label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace - + + label_fixed = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace + if label.startswith("no"): label_fixed = "no_not_interesting" elif "yes_discusses_covid-19_vaccine_side_effects" in label: @@ -112,7 +109,7 @@ def post_process(response): label_fixed = "yes_contains_advice" elif "yes_calls_for_action" in label: label_fixed = "yes_calls_for_action" - else : + else: label_fixed = None return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py index 1effd23d..624ba253 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_FewShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AttentionworthyTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -36,11 +34,12 @@ def config(): "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def few_shot_prompt(input_sample, examples): base_prompt = ( 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' - 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' - 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، " + "yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n" "إليك بعض الأمثلة:\n\n" ) for index, example in enumerate(examples): @@ -56,6 +55,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -64,8 +64,10 @@ def prompt(input_sample, examples): }, ] + import re + def post_process(response): print(response) @@ -77,16 +79,15 @@ def post_process(response): else: print("Response .. " + str(response)) label = "" - - label_fixed = label.lower().strip() + + label_fixed = label.lower().strip() label_list = config()["model_args"]["class_labels"] label = label.replace("label:", "").strip() - label = label.replace("", "").replace("", "") label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة - + label_fixed = label_fixed.replace("التصنيف:", "") if label.startswith("no"): diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py index 12e7fd5c..8bb0922c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Arabic.py @@ -3,20 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -47,15 +42,16 @@ def prompt(input_sample): f"التصنيف: \n" ) return [ - { "role": "user", "content": prompt_string, }, ] + import re + def post_process(response): print(response) if "output" in response: @@ -66,7 +62,7 @@ def post_process(response): else: print("استجابة .. " + str(response)) label = "" - + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة label_fixed = label_fixed.replace("التصنيف:", "") @@ -100,5 +96,5 @@ def post_process(response): label_fixed = "yes_calls_for_action" else: label_fixed = None - + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py index f3e1c213..b1ac8223 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_English.py @@ -3,20 +3,15 @@ from llmebench.tasks import AttentionworthyTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -48,15 +43,16 @@ def prompt(input_sample): f"label: \n" ) return [ - { "role": "user", "content": prompt_string, }, ] + import re + def post_process(response): print(response) if "output" in response: @@ -67,9 +63,11 @@ def post_process(response): else: print("Response .. " + str(response)) label = "" - - label_fixed = label.lower().strip() # Convert to lowercase and strip leading/trailing whitespace - + + label_fixed = ( + label.lower().strip() + ) # Convert to lowercase and strip leading/trailing whitespace + # Update conditions to match labels without surrounding whitespace if label_fixed.startswith("no"): label_fixed = "no_not_interesting" @@ -78,7 +76,9 @@ def post_process(response): elif "yes_harmful" in label_fixed: label_fixed = "harmful" elif label_fixed.startswith("yes"): - label_fixed = label_fixed.strip() # Keep the original label if it starts with "yes" + label_fixed = ( + label_fixed.strip() + ) # Keep the original label if it starts with "yes" elif "yes_blame_authoritie" in label_fixed: label_fixed = "yes_blame_authoritie" elif "yes_discusses_action_taken" in label_fixed: diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py index 655a230c..0802899f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Llama3-8b_ZeroShot_Mixed.py @@ -3,18 +3,15 @@ from llmebench.tasks import AttentionworthyTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22AttentionworthyDataset, @@ -41,8 +38,8 @@ def config(): def prompt(input_sample): base_prompt = ( 'قم بتصنيف "التغريدة" التالية إلى واحدة من الفئات التالية: ' - 'yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، ' - 'yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n' + "yes_discusses_action_taken، harmful، yes_discusses_cure، yes_asks_question، no_not_interesting، yes_other، yes_blame_authorities، " + "yes_contains_advice، yes_calls_for_action. قدم التصنيف فقط.\n\n" f"التغريدة: '{input_sample}'\n" "التصنيف: " ) @@ -53,8 +50,10 @@ def prompt(input_sample): }, ] + import re + def post_process(response): print(response) if "output" in response: @@ -65,7 +64,7 @@ def post_process(response): else: print("استجابة .. " + str(response)) label = "" - + label_fixed = label.lower().strip() # تحويل إلى أحرف صغيرة وإزالة الفراغات الزائدة label_fixed = label_fixed.replace("التصنيف:", "") @@ -99,5 +98,5 @@ def post_process(response): label_fixed = "yes_calls_for_action" else: label_fixed = None - + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Random.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py index 10cef400..3e643a2c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -77,9 +71,22 @@ def post_process(response): label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py index 61ec297b..dff59f8f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -77,10 +71,22 @@ def post_process(response): label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py index f88a3f6c..e8dd51c1 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -74,9 +70,22 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py index e6992844..9d6fdb97 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -56,10 +50,22 @@ def post_process(response): label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py index 2a69cf50..31b7c832 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -56,10 +50,22 @@ def post_process(response): label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py index 27ff241a..ab27e31f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -49,21 +45,27 @@ def prompt(input_sample): ] - - - - def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip().lower() - if "لا_يستحق_التحقق" in label or "لا يستحق التحقق" in label or "ليس يستحق التحقق" in label or "لا تستحق التحقق" in label or "no" in label or "لا" in label or "not" in label: + if ( + "لا_يستحق_التحقق" in label + or "لا يستحق التحقق" in label + or "ليس يستحق التحقق" in label + or "لا تستحق التحقق" in label + or "no" in label + or "لا" in label + or "not" in label + ): return "0" - elif "yes" in label or "نعم" in label or "يستحق التحقق" in label or "checkworthy" in label: + elif ( + "yes" in label + or "نعم" in label + or "يستحق التحقق" in label + or "checkworthy" in label + ): return "1" else: return None - - - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py index 84f8dff8..6dcb22af 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -31,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" @@ -56,11 +52,8 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -68,16 +61,19 @@ def prompt(input_sample, examples): ] - def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - - + label = label.lower() - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py index e127207a..9a202fa3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -31,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "These are some examples:\n\n" @@ -56,9 +52,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' - ) + base_prompt = 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' return [ { "role": "user", @@ -66,14 +60,20 @@ def prompt(input_sample, examples): }, ] + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - - + label = label.lower() - if "لا" in label or "not" in label or "no" in label or "ليس" in label or "ليست" in label: + if ( + "لا" in label + or "not" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py index 3d50b9cb..aa4d87c2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_FewShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -29,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" @@ -54,11 +52,8 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -70,12 +65,15 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - - - + label = label.lower() - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py index 293a46e3..9310aaa4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -34,14 +28,11 @@ def config(): def prompt(input_sample): - base_prompt = ( - ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", - "content": base_prompt+ input_sample, + "content": base_prompt + input_sample, }, ] @@ -50,10 +41,14 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - - - + label = label.lower() - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py index 1b7cfbb0..1d9f7ebf 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -32,9 +28,7 @@ def config(): def prompt(input_sample): - base_prompt = ( - ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ { "role": "user", @@ -47,11 +41,15 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() - - - + label = label.lower() - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py index 9a55e26b..146e53b9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -31,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" @@ -56,17 +52,18 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), }, ] + + import random + + def post_process(response): print(response) if "output" in response: @@ -77,13 +74,18 @@ def post_process(response): else: print("Response .. " + str(response)) label = "" - + label = label.lower() if "لا أستطيع" in label: - return random.choice(["0","1"]) - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return random.choice(["0", "1"]) + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py index 2a3fa2fa..55cd373b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -31,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "These are some examples:\n\n" @@ -56,9 +52,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' - ) + base_prompt = 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' return [ { "role": "user", @@ -66,7 +60,10 @@ def prompt(input_sample, examples): }, ] + import random + + def post_process(response): print(response) if "output" in response: @@ -78,11 +75,16 @@ def post_process(response): print("Response .. " + str(response)) label = "" label = label.lower() - + if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) - if "not" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return random.choice(["0", "1"]) + if ( + "not" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py index 82ce1f00..94fd163e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_FewShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -29,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "هذه بعض الأمثلة:\n\n" @@ -54,18 +52,18 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), }, ] + import random + + def post_process(response): print(response) if "output" in response: @@ -78,9 +76,15 @@ def post_process(response): label = "" label = label.lower() if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label or "not" in label: + return random.choice(["0", "1"]) + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + or "not" in label + ): return "0" return "1" return label - diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py index 4f70e746..16452928 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_ZeroShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -32,9 +28,7 @@ def config(): def prompt(input_sample): - base_prompt = ( - ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "checkworthy" أو "not checkworthy". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ { "role": "user", @@ -42,7 +36,10 @@ def prompt(input_sample): }, ] + import random + + def post_process(response): print(response) if "output" in response: @@ -54,10 +51,16 @@ def post_process(response): print("Response .. " + str(response)) label = "" label = label.lower() - + if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return random.choice(["0", "1"]) + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py index c6b8248b..3ae32b7d 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -34,18 +28,18 @@ def config(): def prompt(input_sample): - base_prompt = ( - ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "جديرة بالتحقق" أو "غير جديرة بالتحقق". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", - "content": base_prompt+ input_sample, + "content": base_prompt + input_sample, }, ] + import random + + def post_process(response): print(response) if "output" in response: @@ -58,8 +52,14 @@ def post_process(response): label = "" label = label.lower() if "لا أستطيع" in label: - return random.choice(["0","1"]) - if "لا" in label or "غير" in label or "no" in label or "ليس" in label or "ليست" in label: + return random.choice(["0", "1"]) + if ( + "لا" in label + or "غير" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py index c5671140..53537462 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Llama3-8b_Zeroshot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22CheckworthinessDataset, @@ -34,16 +28,18 @@ def config(): def prompt(input_sample): - base_prompt = ( - 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' - ) + base_prompt = 'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n' return [ { "role": "user", "content": base_prompt + input_sample, }, ] + + import random + + def post_process(response): print(response) if "output" in response: @@ -56,8 +52,14 @@ def post_process(response): label = "" label = label.lower() if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) - if "لا" in label or "not" in label or "no" in label or "ليس" in label or "ليست" in label: + return random.choice(["0", "1"]) + if ( + "لا" in label + or "not" in label + or "no" in label + or "ليس" in label + or "ليست" in label + ): return "0" return "1" return label diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Random.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py index 4290717c..ad2ec15a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, @@ -31,7 +25,9 @@ def config(): def prompt(input_sample, examples): - base_prompt = "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + base_prompt = ( + "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + ) prompt = few_shot_prompt(input_sample, base_prompt, examples) return [ @@ -46,8 +42,6 @@ def prompt(input_sample, examples): ] - - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -73,7 +67,8 @@ def post_process(response): if ( "لا" in input_label or "لا تحتوي" in input_label - or "ليست" in input_label or "not" in input_label + or "ليست" in input_label + or "not" in input_label or "label: 0" in input_label or "label: no" in input_label or "not contain" in input_label @@ -91,4 +86,3 @@ def post_process(response): return "1" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py index 6bb020a6..18374d44 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py index 63efaa29..71f84ca0 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, @@ -29,7 +25,9 @@ def config(): def prompt(input_sample, examples): - base_prompt = "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n" + base_prompt = ( + "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n" + ) prompt = few_shot_prompt(input_sample, base_prompt, examples) return [ @@ -44,8 +42,6 @@ def prompt(input_sample, examples): ] - - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -91,4 +87,3 @@ def post_process(response): return "1" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py index 9bc989bb..b4281930 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, @@ -49,11 +43,6 @@ def prompt(input_sample): ] - - - - - def post_process(response): input_label = response["choices"][0]["message"]["content"] input_label = input_label.replace(".", "").strip().lower() @@ -61,7 +50,8 @@ def post_process(response): if ( "لا" in input_label or "لا تحتوي" in input_label - or "ليست" in input_label or "not" in input_label + or "ليست" in input_label + or "not" in input_label or "label: 0" in input_label or "label: no" in input_label or "not contain" in input_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py index fbb6ff60..2bdb67cf 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py index 681f91c7..07e8c1db 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, @@ -47,11 +43,6 @@ def prompt(input_sample): ] - - - - - def post_process(response): input_label = response["choices"][0]["message"]["content"] input_label = input_label.replace(".", "").strip().lower() diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py index 272d4c44..891d8e61 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py @@ -3,36 +3,37 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": FastChatModel, - "model_args": { + "model_args": { "max_tries": 3, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def prompt(input_sample, few_shot_examples): few_shot_text = "" for example in few_shot_examples: few_shot_text += ( - "التغريدة: " + example["input"] + "\n" - + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" ) return [ @@ -41,12 +42,15 @@ def prompt(input_sample, few_shot_examples): "content": ( "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + few_shot_text - + "التغريدة: " + input_sample + "\n" + + "التغريدة: " + + input_sample + + "\n" + "الإجابة: " - ) + ), } ] + def post_process(response): label = response["choices"][0]["message"]["content"] @@ -66,5 +70,4 @@ def post_process(response): else: label_fixed = None - return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py index 93409450..ea8e1c53 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py @@ -3,30 +3,27 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": FastChatModel, - "model_args": { + "model_args": { "max_tries": 3, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def prompt(input_sample, examples=None): base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" if examples: @@ -34,41 +31,45 @@ def prompt(input_sample, examples=None): else: user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " - return [ - { - "role": "user", - "content": user_message_content - } - ] + return [{"role": "user", "content": user_message_content}] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: - label = "no" if example['label'] == "0" else "yes" - out_prompt += ( - "Sentence: " + example['input'] + "\nLabel: " + label + "\n\n" - ) + label = "no" if example["label"] == "0" else "yes" + out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n" out_prompt += "Sentence: " + input_sample + "\nLabel: " return out_prompt + def post_process(response): - + label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip() label = label.lower() - if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): return None if "هذه التغريدة تحتوي" in label: - return "1" - - if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + return "1" + + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): return "0" return "1" - if "label: " in label: arr = label.split("label: ") label = arr[1].strip() diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py index e082dd3d..6687d61a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py @@ -3,33 +3,37 @@ from llmebench.tasks import ClaimDetectionTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": FastChatModel, - "model_args": { + "model_args": { "max_tries": 3, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def prompt(input_sample, few_shot_examples): few_shot_text = "" for example in few_shot_examples: few_shot_text += ( - "التغريدة: " + example["input"] + "\n" - + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" ) return [ @@ -38,12 +42,15 @@ def prompt(input_sample, few_shot_examples): "content": ( "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + few_shot_text - + "التغريدة: " + input_sample + "\n" + + "التغريدة: " + + input_sample + + "\n" + "الإجابة: " - ) + ), } ] + def post_process(response): try: label = response["choices"][0]["message"]["content"] @@ -51,18 +58,26 @@ def post_process(response): label = label.replace("الإجابة:", "").strip() label = label.lower() if "هذه التغريدة تحتوي" in label: - return "1" + return "1" - if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): return None - if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): return "0" - - - if "نعم" in label or 'yes' in label: + if "نعم" in label or "yes" in label: pred_label = "1" - elif "لا" in label or 'no' in label: + elif "لا" in label or "no" in label: pred_label = "0" else: pred_label = "" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py index c04a54b1..77ebde59 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, @@ -64,5 +58,4 @@ def post_process(response): else: label_fixed = None - return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py index d934916e..b5e5a7b9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22ClaimDataset, @@ -51,12 +45,15 @@ def post_process(response): label = label.replace("label:", "").strip() label = label.lower() - if "label: " in label: arr = label.split("label: ") label = arr[1].strip() - if "yes" in label or "نعم" in label or "the sentence contains a factual claim" in label: + if ( + "yes" in label + or "نعم" in label + or "the sentence contains a factual claim" in label + ): label_fixed = "1" if "no" in label or "لا" in label: label_fixed = "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py index bb3b9961..6dec7c55 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, @@ -43,7 +39,6 @@ def prompt(input_sample): ] - def post_process(response): try: label = response["choices"][0]["message"]["content"] @@ -51,18 +46,26 @@ def post_process(response): label = label.replace("الإجابة:", "").strip() label = label.lower() if "هذه التغريدة تحتوي" in label: - return "1" - - if "لا يمكنني" in label or "I cannot" in label or "sorry" in label or "هذه المحادثة غير مناسبة" in label: + return "1" + + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): return None - if "not a factual claim" in label or "لا يوجد" in label or "not" in label or "لا" in label: + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): return "0" - - - if "نعم" in label or 'yes' in label: + if "نعم" in label or "yes" in label: pred_label = "1" - elif "لا" in label or 'no' in label: + elif "لا" in label or "no" in label: pred_label = "0" else: pred_label = "" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py index 78ecee0b..6e8fac6f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py @@ -3,36 +3,37 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": AzureModel, - "model_args": { + "model_args": { "max_tries": 30, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def prompt(input_sample, few_shot_examples): few_shot_text = "" for example in few_shot_examples: few_shot_text += ( - "الجملة: " + example["input"] + "\n" - + "الإجابة: " + ("نعم" if example["label"] == "1" else "لا") + "\n\n" + "الجملة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("نعم" if example["label"] == "1" else "لا") + + "\n\n" ) return [ @@ -41,12 +42,18 @@ def prompt(input_sample, few_shot_examples): "content": ( "هل تحتوي هذه الجملة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + few_shot_text - + "الجملة: " + input_sample + "\n" + + "الجملة: " + + input_sample + + "\n" + "الإجابة: " - ) + ), } ] + + import random + + def post_process(response): try: label = "" @@ -56,7 +63,7 @@ def post_process(response): print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) + return random.choice(["0", "1"]) if "نعم" in label: pred_label = "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py index a192d1bf..12e02776 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py @@ -3,30 +3,27 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": AzureModel, - "model_args": { + "model_args": { "max_tries": 30, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def prompt(input_sample, examples=None): base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" if examples: @@ -34,24 +31,22 @@ def prompt(input_sample, examples=None): else: user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " - return [ - { - "role": "user", - "content": user_message_content - } - ] + return [{"role": "user", "content": user_message_content}] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: - label = "no" if example['label'] == "0" else "yes" - out_prompt += ( - "Sentence: " + example['input'] + "\nLabel: " + label + "\n\n" - ) + label = "no" if example["label"] == "0" else "yes" + out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n" out_prompt += "Sentence: " + input_sample + "\nLabel: " return out_prompt + + import random + + def post_process(response): try: label = "" @@ -61,7 +56,7 @@ def post_process(response): print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) + return random.choice(["0", "1"]) if "yes" in label: pred_label = "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py index db002968..0aae6e0f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py @@ -1,7 +1,8 @@ +import random + from llmebench.datasets import CT22ClaimDataset from llmebench.models import AzureModel from llmebench.tasks import ClaimDetectionTask -import random def metadata(): @@ -9,27 +10,32 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22ClaimDataset, "task": ClaimDetectionTask, "model": AzureModel, - "model_args": { + "model_args": { "max_tries": 30, }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def prompt(input_sample, few_shot_examples): few_shot_text = "" for example in few_shot_examples: few_shot_text += ( - "التغريدة: " + example["input"] + "\n" - + "الإجابة: " + ("yes" if example["label"] == "1" else "no") + "\n\n" + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" ) return [ @@ -38,12 +44,15 @@ def prompt(input_sample, few_shot_examples): "content": ( "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + few_shot_text - + "التغريدة: " + input_sample + "\n" + + "التغريدة: " + + input_sample + + "\n" + "الإجابة: " - ) + ), } ] + def post_process(response): try: label = "" @@ -53,10 +62,10 @@ def post_process(response): print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) - if "نعم" in label or 'yes' in label: + return random.choice(["0", "1"]) + if "نعم" in label or "yes" in label: pred_label = "1" - elif "لا" in label or 'no' in label: + elif "لا" in label or "no" in label: pred_label = "0" else: pred_label = "" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py index 498b6d36..6b5013f6 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, @@ -25,6 +21,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -36,7 +33,11 @@ def prompt(input_sample): ), } ] + + import random + + def post_process(response): try: label = "" @@ -48,19 +49,24 @@ def post_process(response): # Debug print to check the extracted label print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) + return random.choice(["0", "1"]) # Determining the prediction label based on the response content if "نعم" in label or "contains a factual claim" in label or "label: 1" in label: pred_label = "1" - elif "لا" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + elif ( + "لا" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): pred_label = "0" else: # If none of the expected labels are found, default to a negative claim (most conservative approach) pred_label = "0" # Debug print to check the final predicted label - #print(f"Predicted Label: {pred_label}") + # print(f"Predicted Label: {pred_label}") return pred_label except Exception as e: diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py index eec5a9b1..f88e2944 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22ClaimDataset, @@ -25,6 +21,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -36,7 +33,11 @@ def prompt(input_sample): ), } ] + + import random + + def post_process(response): try: label = "" @@ -48,12 +49,17 @@ def post_process(response): # Debug print to check the extracted label print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) + return random.choice(["0", "1"]) # Determining the prediction label based on the response content if "yes" in label or "contains a factual claim" in label or "label: 1" in label: pred_label = "1" - elif "no" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + elif ( + "no" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): pred_label = "0" else: # If none of the expected labels are found, default to a negative claim (most conservative approach) diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py index 103090b0..5da6f98e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import ClaimDetectionTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22ClaimDataset, @@ -23,6 +21,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -34,7 +33,11 @@ def prompt(input_sample): ), } ] + + import random + + def post_process(response): try: label = "" @@ -46,19 +49,24 @@ def post_process(response): # Debug print to check the extracted label print(f"Extracted Label: {label}") if "لا أستطيع" in label or "I cannot" in label: - return random.choice(["0","1"]) + return random.choice(["0", "1"]) # Determining the prediction label based on the response content if "yes" in label or "contains a factual claim" in label or "label: 1" in label: pred_label = "1" - elif "no" in label or "label: 0" in label or "does not contain a factual claim" in label or "label: no" in label: + elif ( + "no" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): pred_label = "0" else: # If none of the expected labels are found, default to a negative claim (most conservative approach) pred_label = "0" # Debug print to check the final predicted label - #print(f"Predicted Label: {pred_label}") + # print(f"Predicted Label: {pred_label}") return pred_label except Exception as e: diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Random.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py index 8cd97af0..a115fa49 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -45,14 +39,12 @@ def prompt(input_sample, examples): ] - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt for example in examples: sent = example["input"] label = "صحيحة" if example["label"] == "true" else "خاطئة" - out_prompt = ( out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" ) @@ -92,4 +84,4 @@ def post_process(response): return "false" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py index 212c28de..2253ecfb 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_English.py @@ -3,17 +3,15 @@ from llmebench.tasks import FactualityTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": ANSFactualityDataset, @@ -86,4 +84,4 @@ def post_process(response): return "false" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py index 879e77c6..1655c2a1 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -43,14 +39,12 @@ def prompt(input_sample, examples): ] - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt for example in examples: sent = example["input"] label = example["label"] - out_prompt = ( out_prompt + "الجملة: " + sent + "\n" + "التصنيف: " + label + "\n\n" ) @@ -90,4 +84,4 @@ def post_process(response): return "false" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py index 18a2d945..ff1fe4ec 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py index a37d2325..fc513431 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -75,4 +69,4 @@ def post_process(response): return "false" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py index 6100b220..6140335a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -48,7 +44,6 @@ def prompt(input_sample): ] - def post_process(response): input_label = response["choices"][0]["message"]["content"] input_label = input_label.replace(".", "").strip().lower() diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py index 7119e69c..4d3b37ee 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -29,7 +23,6 @@ def config(): } - def prompt(input_sample, examples): prompt_text = "هل المعلومات في الجملة التالية صحيحة أم لا؟ أجب فقط بـ 'نعم' إذا كانت صحيحة و'لا' إذا لم تكن صحيحة. " @@ -68,8 +61,6 @@ def post_process(response): label = label.replace(".", "").strip().lower() if "آسف" in label or "لا أستطيع" in label: return None - - if ( "لا" in label @@ -79,7 +70,6 @@ def post_process(response): or "label: 0" in label or "label: no" in label or "غير صحيح" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py index 420744a9..c418520c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -64,7 +58,6 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace(".", "").strip().lower() - if ( "لا" in label or "خطأ" in label @@ -72,7 +65,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py index 90ca31d1..23cdda1e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -38,6 +34,7 @@ def prompt(input_sample, examples): }, ] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt for example in examples: @@ -63,7 +60,6 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace(".", "").strip().lower() - if ( "لا" in label or "خطأ" in label @@ -71,7 +67,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py index f4c82ee7..b002cbee 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -45,12 +39,10 @@ def prompt(input_sample): ] - def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace(".", "").strip().lower() - if ( "لا" in label or "خطأ" in label @@ -58,7 +50,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py index a5edf196..986f1274 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py index 5b7affe9..469ee3eb 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_JAIS13b_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -39,15 +35,14 @@ def prompt(input_sample): { "role": "user", "content": prompt_text, - - }] + } + ] def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace(".", "").strip().lower() - if ( "لا" in label or "خطأ" in label @@ -55,7 +50,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py index 3c506d13..e72f2575 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -81,7 +75,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py index 0492381c..30bced14 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -72,21 +66,12 @@ def post_process(response): label = label.replace(".", "").strip().lower() - if ( - "true" in label - or "label: 1" in label - or "label: yes" in label - ): + if "true" in label or "label: 1" in label or "label: yes" in label: pred_label = "true" - elif ( - "false" in label - or "label: 0" in label - or "label: no" in label - ): + elif "false" in label or "label: 0" in label or "label: no" in label: pred_label = "false" else: print("label problem!! " + label) pred_label = None return pred_label - diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py index 0cbcfb96..18a92734 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -38,6 +34,7 @@ def prompt(input_sample, examples): }, ] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt for example in examples: @@ -78,7 +75,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py index 76bf39e1..2cd40497 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -45,7 +39,6 @@ def prompt(input_sample): ] - def post_process(response): if "output" in response: # if "content" in response["messages"]: @@ -65,7 +58,6 @@ def post_process(response): or "false" in label or "خطا" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py index e58a2325..574b5ce9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import FactualityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": ANSFactualityDataset, @@ -46,7 +40,6 @@ def prompt(input_sample): ] - def post_process(response): if "output" in response: # if "content" in response["messages"]: @@ -58,17 +51,9 @@ def post_process(response): label = "" label = label.replace(".", "").strip().lower() - if ( - "true" in label - or "label: 1" in label - or "label: yes" in label - ): + if "true" in label or "label: 1" in label or "label: yes" in label: pred_label = "true" - elif ( - "false" in label - or "label: 0" in label - or "label: no" in label - ): + elif "false" in label or "label: 0" in label or "label: no" in label: pred_label = "false" else: print("label problem!! " + label) diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py index 3cad1d69..3cd6c987 100755 --- a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Llama3-8b_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import FactualityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": ANSFactualityDataset, @@ -39,9 +35,8 @@ def prompt(input_sample): { "role": "user", "content": prompt_text, - - }] - + } + ] def post_process(response): @@ -63,7 +58,6 @@ def post_process(response): or "false" in label or "label: 0" in label or "label: no" in label - ): pred_label = "false" elif ( diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Random.py b/assets/ar/factuality_disinformation_harmful_content/factuality/ANSFactuality_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_Random.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_Random.py b/assets/ar/factuality_disinformation_harmful_content/factuality/UnifiedFCFactuality_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py index 82567e94..187661c9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import HarmfulDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -67,27 +61,25 @@ def few_shot_prompt(input_sample, base_prompt, examples): def post_process(response): label = response["choices"][0]["message"]["content"] label_fixed = None - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): label_fixed = "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: label_fixed = "1" - return label_fixed \ No newline at end of file + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py index de688645..4915b237 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import HarmfulDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -66,28 +60,25 @@ def few_shot_prompt(input_sample, base_prompt, examples): def post_process(response): label = response["choices"][0]["message"]["content"] - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): return "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: return "1" return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py index 9045b1fe..ffe298b4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import HarmfulDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22HarmfulDataset, @@ -65,28 +61,25 @@ def few_shot_prompt(input_sample, base_prompt, examples): def post_process(response): label = response["choices"][0]["message"]["content"] label_fixed = None - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): return "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: return "1" return label_fixed - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py index 0535b298..0f5e05b0 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py @@ -5,21 +5,15 @@ from llmebench.tasks import HarmfulDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -53,28 +47,25 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"] - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): return "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: return "1" return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py index e06226f9..cac11ed7 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import HarmfulDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -53,28 +47,25 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"] - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): return "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: return "1" return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py index b0404ded..13196db3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py @@ -5,19 +5,15 @@ from llmebench.tasks import HarmfulDetectionTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22HarmfulDataset, @@ -52,28 +48,25 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"] label_fixed = None - label = label.replace("label:","") - label = label.replace("التصنيف:","") + label = label.replace("label:", "") + label = label.replace("التصنيف:", "") label = label.strip().lower() - if ("غير ضارة" in label - or "ليست ضارة" in label - or "غير ضاره" in label - or "غير" in label - or "not" in label - or "ليست" in label - or "لا" in label - or "not harmful" in label - or label.startswith("no") - or "safe" in label - or "not_harmful" in label + if ( + "غير ضارة" in label + or "ليست ضارة" in label + or "غير ضاره" in label + or "غير" in label + or "not" in label + or "ليست" in label + or "لا" in label + or "not harmful" in label + or label.startswith("no") + or "safe" in label + or "not_harmful" in label ): return "0" - elif ("ضارة" in label - or "harmful" in label - or "نعم" in label - or "yes" in label): + elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label: return "1" return label_fixed - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py index 0397fe0f..9e9be164 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import FastChatModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,13 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -43,27 +39,29 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط." return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] - def post_process(response): label = response["choices"][0]["message"]["content"].lower() - print("label: ",label) + print("label: ", label) if "لا يمكنني" in label or "ai" in label: return None - if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label: + if ( + "غير ضارة" in label + or "لا يحتوي على محتوى ضار" in label + or "غير" in label + or "ليست" in label + ): return "0" - - + else: - return "1" \ No newline at end of file + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py index e9828a87..c26b458a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import FastChatModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,13 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -48,30 +44,32 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label." return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] - - def post_process(response): label = response["choices"][0]["message"]["content"].lower() - print("label: ",label) + print("label: ", label) if "label: harmful" in label or "is harmful" in label: return "1" if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label: return None - if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label or "not" in label: + if ( + "غير ضارة" in label + or "لا يحتوي على محتوى ضار" in label + or "غير" in label + or "ليست" in label + or "not" in label + ): return "0" - - + else: - return "1" \ No newline at end of file + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py index 1b0bdadc..9fe4d0e3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import FastChatModel from llmebench.tasks import HarmfulDetectionTask -import random - def metadata(): @@ -10,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22HarmfulDataset, @@ -26,6 +25,8 @@ def config(): }, "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -38,37 +39,51 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): - base_prompt = "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + base_prompt = ( + "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + ) return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] - def post_process(response): label = response["choices"][0]["message"]["content"].lower() - print("label: ",label) - if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label or "ما هي المعايير" in label: + print("label: ", label) + if ( + "لا يمكنني" in label + or "ai" in label + or "لا يمكن" in label + or "ما هي المعايير" in label + ): return None if "التغريدة الأخيرة هي 'harmful'" in label: return "1" if "التغريدة الثالثة مصنفة على أنها 'غير مؤذية'" in label: return "0" - if "التغريدة الثالثة مصنفة على أنها 'مؤذية'" in label or "غير آمن" in label or "لأنها لا تحتوي على محتوى ضار" in label: - return "1" - + if ( + "التغريدة الثالثة مصنفة على أنها 'مؤذية'" in label + or "غير آمن" in label + or "لأنها لا تحتوي على محتوى ضار" in label + ): + return "1" + if "label: harmful" in label or "is harmful" in label: return "1" - - if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليس" in label or "no" in label: + + if ( + "غير ضارة" in label + or "لا يحتوي على محتوى ضار" in label + or "غير" in label + or "ليس" in label + or "no" in label + ): return "0" - - + else: - return "1" + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py index 25942750..42592efa 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import FastChatModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22HarmfulDataset, @@ -29,6 +26,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -45,12 +43,16 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"].lower() - print("label: ",label) + print("label: ", label) if "لا يمكنني" in label or "ai" in label: return None - if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label: + if ( + "غير ضارة" in label + or "لا يحتوي على محتوى ضار" in label + or "غير" in label + or "ليست" in label + ): return "0" - - + else: - return "1" + return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py index af387715..273545b3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import HarmfulDetectionTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -47,14 +41,19 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"].lower() - print("label: ",label) + print("label: ", label) if "label: harmful" in label or "is harmful" in label: return "1" if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label: return None - if "غير ضارة" in label or "لا يحتوي على محتوى ضار" in label or "غير" in label or "ليست" in label or "not" in label: + if ( + "غير ضارة" in label + or "لا يحتوي على محتوى ضار" in label + or "غير" in label + or "ليست" in label + or "not" in label + ): return "0" - - + else: return "1" diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py index 9dd5fc01..8fd926a5 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import FastChatModel from llmebench.tasks import HarmfulDetectionTask -import random - def metadata(): @@ -10,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22HarmfulDataset, @@ -27,6 +26,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -43,12 +43,17 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"].lower() - + if "غير" in label or "ليس" in label or "not" in label: return "0" - elif "ضار" in label or "مس" in label or "مؤ" in label or "harm" in label or "مض" in label: + elif ( + "ضار" in label + or "مس" in label + or "مؤ" in label + or "harm" in label + or "مض" in label + ): return "1" - + else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py index 1ef503b5..26d6281c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,13 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -43,30 +39,28 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط." return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] - def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) + + print("label: ", label) if "غير ضارة" in label or "لا" in label: return "0" - elif label== "ضارة" or "ضارة" in label or "نعم" in label: + elif label == "ضارة" or "ضارة" in label or "نعم" in label: return "1" - + else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py index 6c393e41..2495bdab 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,13 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT22HarmfulDataset, @@ -48,32 +44,28 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label." return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] - - def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) - + print("label: ", label) + if "not_harmful" in label: return "0" - elif label== "harmful": + elif label == "harmful": return "1" - + else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py index 7099d265..cb1587b5 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - def metadata(): @@ -10,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22HarmfulDataset, @@ -27,6 +26,7 @@ def config(): "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -39,27 +39,28 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): - base_prompt = "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + base_prompt = ( + "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n" + ) return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] + def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) + + print("label: ", label) if "لا أستطيع تقديم هذا النوع من الإجابات." in label: return None elif "غير" in label or "ليس" in label or "not" in label: return "0" else: return "1" - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py index 999ba0ff..90d4fd20 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22HarmfulDataset, @@ -29,6 +26,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -44,16 +42,16 @@ def prompt(input_sample): def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) + + print("label: ", label) if "غير ضارة" in label or "لا" in label: return "0" - elif label== "ضارة" or "ضارة" in label or "نعم" in label: + elif label == "ضارة" or "ضارة" in label or "نعم" in label: return "1" - + else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py index 3a664ad5..e2d8107d 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - - def metadata(): @@ -11,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT22HarmfulDataset, @@ -29,6 +26,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -42,19 +40,18 @@ def prompt(input_sample): } ] + def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) - + print("label: ", label) + if "not_harmful" in label: return "0" - elif label== "harmful": + elif label == "harmful": return "1" - + else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py index 8bf062c3..130505ee 100755 --- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import CT22HarmfulDataset from llmebench.models import AzureModel from llmebench.tasks import HarmfulDetectionTask -import random - def metadata(): @@ -10,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT22HarmfulDataset, @@ -27,6 +26,7 @@ def config(): "general_args": {"test_split": "ar"}, } + def prompt(input_sample): return [ { @@ -42,15 +42,14 @@ def prompt(input_sample): def post_process(response): - # Extract the label from the response + # Extract the label from the response if "output" in response: label = response["output"].strip().lower() - - print("label: ",label) + + print("label: ", label) if "لا أستطيع تقديم هذا النوع من الإجابات." in label: return None elif "غير" in label or "ليس" in label or "not" in label: return "0" else: return "1" - diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Random.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py index 2d795bdc..1f177e73 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import HateSpeechTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -47,11 +41,6 @@ def prompt(input_sample, examples): ] - - - - - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -75,9 +64,28 @@ def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py index b9c76e51..be47b7c8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import HateSpeechTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -70,9 +64,28 @@ def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py index f42b50f5..fb04f384 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import HateSpeechTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -28,9 +24,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - 'هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "hate speech" إن احتوت على خطاب كراهية، و أجب بـ "not hate speech" إن لم تكن كذلك.\n' - ) + base_prompt = 'هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "hate speech" إن احتوت على خطاب كراهية، و أجب بـ "not hate speech" إن لم تكن كذلك.\n' return [ { @@ -44,11 +38,6 @@ def prompt(input_sample, examples): ] - - - - - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -69,9 +58,28 @@ def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py index de484631..cfb23768 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import HateSpeechTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -38,23 +32,41 @@ def prompt(input_sample): }, { "role": "user", - "content": f' هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "كراهية" إن احتوت على خطاب كراهية، و أجب بـ "ليس كراهية" إن لم تكن كذلك. \n' + "التغريدة: " + input_sample + "\n" + "التصنيف: " + "content": f' هل تحتوي التغريدة التالية على خطاب كراهية؟ أجب بـ "كراهية" إن احتوت على خطاب كراهية، و أجب بـ "ليس كراهية" إن لم تكن كذلك. \n' + + "التغريدة: " + + input_sample + + "\n" + + "التصنيف: ", }, ] - - - - - def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py index a813f726..de6262df 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import HateSpeechTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -47,9 +41,28 @@ def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py index 95135ae7..6bb4206b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import HateSpeechTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -41,18 +37,32 @@ def prompt(input_sample): ] - - - - - def post_process(response): out = response["choices"][0]["message"]["content"] label = out.lower().strip() - if "ليس" in label or "ليس كراهية" in label or "لا" in label or "no" in label or "not" in label or "don't" in label or "not_hs" in label or "not_hatespeech" in label or "not_hate_speech" in label: + if ( + "ليس" in label + or "ليس كراهية" in label + or "لا" in label + or "no" in label + or "not" in label + or "don't" in label + or "not_hs" in label + or "not_hatespeech" in label + or "not_hate_speech" in label + ): return "NOT_HS" - elif "كراهية" in label or "نعم" in label or "أجل" in label or "yes" in label or "contins" in label or "hs" in label or "hatespeech" in label or "hate speech" in label: + elif ( + "كراهية" in label + or "نعم" in label + or "أجل" in label + or "yes" in label + or "contins" in label + or "hs" in label + or "hatespeech" in label + or "hate speech" in label + ): return "HS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py index 7701e603..db30a9d9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import HateSpeechTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط." @@ -45,6 +42,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -52,6 +50,8 @@ def prompt(input_sample, examples): "content": few_shot_prompt(input_sample, examples), } ] + + def post_process(response): print(response) label = response["choices"][0]["message"]["content"].lower() diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py index 0001013d..4c6f6518 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_English.py @@ -3,17 +3,15 @@ from llmebench.tasks import HateSpeechTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -24,7 +22,7 @@ def config(): }, } - + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" for example in examples: @@ -40,18 +38,20 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): - base_prompt = ("Respond only with 'Hate speech' if it is hate speech and 'Not hate speech' if it is not hate speech from the following tweets. " - "Here are some examples to guide you:\n") + base_prompt = ( + "Respond only with 'Hate speech' if it is hate speech and 'Not hate speech' if it is not hate speech from the following tweets. " + "Here are some examples to guide you:\n" + ) return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] + def post_process(response): print(response) label = ( diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py index 0e238ce8..b1b7bb27 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_FewShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import HateSpeechTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -25,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "أجب فقط بـ 'hate speech' إذا كانت خطاب كراهية و'not hate speech' إذا لم تكن خطاب كراهية من التغريدات التالية. " @@ -43,6 +42,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -51,6 +51,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): print(response) label = response["choices"][0]["message"]["content"].lower() diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py index 1db408d5..97b541fa 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import HateSpeechTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -40,14 +37,21 @@ def prompt(input_sample): } ] + def post_process(response): print(response) - label = ( - response["choices"][0]["message"]["content"].lower() - ) + label = response["choices"][0]["message"]["content"].lower() if "لا يمكنني" in label: return None - if "لا كراهية" in label or "لا تحتوي" in label or "ليست كراهية" in label or "ليس" in label or "ليست" in label or "not" in label or "لا" in label: + if ( + "لا كراهية" in label + or "لا تحتوي" in label + or "ليست كراهية" in label + or "ليس" in label + or "ليست" in label + or "not" in label + or "لا" in label + ): return "NOT_HS" if "تحتوي على خطاب كراهية" in label: return "HS" @@ -57,4 +61,4 @@ def post_process(response): return "HS" else: print("No clear label found.") - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py index de4147b7..4deb47c5 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import HateSpeechTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -29,6 +23,7 @@ def config(): }, } + def prompt(input_sample): base_prompt = f"Classify the following tweet into 'Hate Speech' or 'Not Hate Speech'. Provide only label \n {input_sample}" return [ @@ -39,13 +34,17 @@ def prompt(input_sample): ] - def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() ) - if "no" in label or "not hate speech" in label or "not_hs" in label or "not" in label: + if ( + "no" in label + or "not hate speech" in label + or "not_hs" in label + or "not" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label: return "HS" diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py index 9e91b81b..80dbbe15 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_JAIS13b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import HateSpeechTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -25,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py index 5d28b966..70efc7b3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Arabic.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import OSACT4SubtaskBDataset from llmebench.models import AzureModel from llmebench.tasks import HateSpeechTask -import random - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +25,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "صنف التغريدة التالية حسب احتوائها على خطاب كراهية. أجب ب 'كراهية' أو 'لا كراهية' وقدم التصنيف فقط." @@ -45,6 +44,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -53,6 +53,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): print(response) if "output" in response: @@ -62,10 +63,18 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: - - return None \ No newline at end of file + + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py index 3fb3263d..c549025f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import HateSpeechTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( "Respond only with 'Hate speech' if it is hate speech and 'Not hate speech' if it is not hate speech from the following tweets. " @@ -45,6 +42,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -52,7 +50,11 @@ def prompt(input_sample, examples): "content": few_shot_prompt(input_sample, examples), } ] + + import random + + def post_process(response): print(response) if "output" in response: @@ -62,10 +64,18 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: print("No clear label found.") - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py index 74a314d8..5c02bcac 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_FewShot_Mixed.py @@ -1,7 +1,8 @@ +import random + from llmebench.datasets import OSACT4SubtaskBDataset from llmebench.models import AzureModel from llmebench.tasks import HateSpeechTask -import random def metadata(): @@ -9,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -25,11 +25,12 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = ( - "أجب فقط بـ 'hate_speech' إذا كانت خطاب كراهية و'not_hate_speech' إذا لم تكن خطاب كراهية من التغريدات التالية. " - "إليك بعض الأمثلة لتوجيهك:\n\n" -) + "أجب فقط بـ 'hate_speech' إذا كانت خطاب كراهية و'not_hate_speech' إذا لم تكن خطاب كراهية من التغريدات التالية. " + "إليك بعض الأمثلة لتوجيهك:\n\n" + ) for index, example in enumerate(examples): label = "hate_speech" if example["label"] == "HS" else "not_hate_speech" @@ -44,6 +45,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -52,6 +54,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): print(response) if "output" in response: @@ -61,10 +64,18 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: print("No clear label found.") - return random.choice(["HS","NOT_HS"]) \ No newline at end of file + return random.choice(["HS", "NOT_HS"]) diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py index e505d147..128bf6be 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Arabic.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import OSACT4SubtaskBDataset from llmebench.models import AzureModel from llmebench.tasks import HateSpeechTask -import random - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +25,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -40,6 +39,7 @@ def prompt(input_sample): } ] + def post_process(response): print(response) if "output" in response: @@ -49,10 +49,18 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: print("No clear label found.") - return random.choice(["HS","NOT_HS"]) \ No newline at end of file + return random.choice(["HS", "NOT_HS"]) diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py index e8ed7817..3b440f6c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_English.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import OSACT4SubtaskBDataset from llmebench.models import AzureModel from llmebench.tasks import HateSpeechTask -import random - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -27,6 +25,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -40,6 +39,7 @@ def prompt(input_sample): } ] + def post_process(response): print(response) if "output" in response: @@ -49,10 +49,18 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: print("No clear label found.") - return random.choice(["HS","NOT_HS"]) + return random.choice(["HS", "NOT_HS"]) diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py index 0ba553cd..26534aee 100755 --- a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Llama3-8b_ZeroShot_Mixed.py @@ -1,7 +1,8 @@ +import random + from llmebench.datasets import OSACT4SubtaskBDataset from llmebench.models import AzureModel from llmebench.tasks import HateSpeechTask -import random def metadata(): @@ -9,11 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskBDataset, @@ -39,6 +39,7 @@ def prompt(input_sample): } ] + def post_process(response): print(response) if "output" in response: @@ -48,10 +49,19 @@ def post_process(response): print("Response .. " + str(response)) return "NOT_HS" # Default to "NOT_HS" when unsure - if "not hate speech" in label or "not_hs" in label or "لا كراهية" in label or "لا" in label or "ليست" in label or "ليس" in label or "no" in label or "not_hate_speech" in label: + if ( + "not hate speech" in label + or "not_hs" in label + or "لا كراهية" in label + or "لا" in label + or "ليست" in label + or "ليس" in label + or "no" in label + or "not_hate_speech" in label + ): return "NOT_HS" elif "hate speech" in label or "hs" in label or "كراهية" in label or "hate_speech": return "HS" else: print("No clear label found.") - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Random.py b/assets/ar/factuality_disinformation_harmful_content/hate_speech/OSACT4SubtaskB_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py index ca8bd5a1..89ad85ef 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import OffensiveTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -32,7 +26,8 @@ def config(): def prompt(input_sample, examples): base_prompt = ( - "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. هذه بعض الأمثلة: " + "\n" + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. هذه بعض الأمثلة: " + + "\n" ) return [ @@ -55,7 +50,12 @@ def few_shot_prompt(input_sample, base_prompt, examples): label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" out_prompt = ( - out_prompt + "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + out_prompt + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + label + + "\n\n" ) # Append the sentence we want the model to predict for but leave the Label blank @@ -72,7 +72,9 @@ def post_process(response): label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py index 8c27391b..c95cf728 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import OffensiveTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -31,9 +25,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - "If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise. Here are some examples:\n" - ) + base_prompt = "If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise. Here are some examples:\n" return [ { @@ -72,7 +64,9 @@ def post_process(response): label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py index 335c4e78..7a66f54c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -30,10 +26,10 @@ def config(): def prompt(input_sample, examples): base_prompt = ( - "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. هذه بعض الأمثلة: " + "\n" + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. هذه بعض الأمثلة: " + + "\n" ) - return [ { "role": "system", @@ -54,7 +50,12 @@ def few_shot_prompt(input_sample, base_prompt, examples): label = "not offensive" if example["label"] == "NOT_OFF" else "offensive" out_prompt = ( - out_prompt + "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n" + out_prompt + + "التغريدة: " + + example["input"] + + "\nالتصنيف: " + + label + + "\n\n" ) # Append the sentence we want the model to predict for but leave the Label blank @@ -71,7 +72,9 @@ def post_process(response): label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py index 7d54e5f9..7f4b92b9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import OffensiveTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -38,18 +32,24 @@ def prompt(input_sample): }, { "role": "user", - "content": ("إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. " + "\n\nالتغريدة" + input_sample + "\nالتصنيف: ") + "content": ( + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'. " + + "\n\nالتغريدة" + + input_sample + + "\nالتصنيف: " + ), }, ] - def post_process(response): out = response["choices"][0]["message"]["content"] label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py index 13db2e4f..2ced81f6 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import OffensiveTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -38,7 +32,12 @@ def prompt(input_sample): }, { "role": "user", - "content": ("If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise." + "\n\ntweet: " + input_sample + "\nlabel: ") + "content": ( + "If the following tweet is offensive, label it as 'offensive'. Return 'not offensive' otherwise." + + "\n\ntweet: " + + input_sample + + "\nlabel: " + ), }, ] @@ -48,7 +47,9 @@ def post_process(response): label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py index 2f6c7c6f..4200a7c8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -36,7 +32,12 @@ def prompt(input_sample): }, { "role": "user", - "content": ("إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. " + "\n\nالتغريدة" + input_sample + "\nالتصنيف: ") + "content": ( + "إذا كانت التغريدة التالية تحتوي على أي درجة من الإساءة، قم بالرد بـ 'offensive' إذا لم تحتوي على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'not offensive'. " + + "\n\nالتغريدة" + + input_sample + + "\nالتصنيف: " + ), }, ] @@ -46,7 +47,9 @@ def post_process(response): label = out.strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py index 339ca0b8..0f4fa923 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Arabic.py @@ -1,22 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import FastChatModel from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,10 +25,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" - ) + base_prompt = f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" for index, example in enumerate(examples): label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" base_prompt += ( @@ -45,6 +41,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -53,6 +50,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] @@ -65,4 +63,3 @@ def post_process(response): return "OFF" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py index f486f68a..a9446180 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_English.py @@ -1,22 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import FastChatModel from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,14 +25,20 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): base_prompt = "Review the content provided and classify it as 'OFF' for Offensive or 'NOT_OFF' for Not Offensive. Here are some examples to guide your classification:\n\n" for index, example in enumerate(examples): label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" base_prompt += f"Example {index + 1}: Content - '{example['input']}' -> Classification: {label}\n" - base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" + base_prompt += ( + "\nNow classify the new content:\nContent: '" + + input_sample + + "'\nClassification:\n" + ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -44,12 +47,13 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip().lower() label = re.sub(r"|", "", label) - #print("label", label) + # print("label", label) # Ensure only the expected labels are returned if "not" in label or "غير" in label or "ليس" in label: @@ -57,4 +61,4 @@ def post_process(response): elif "is" in label or "مسيء" in label or "off" in label: return "OFF" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py index 4dbe7e63..e4bfa858 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_FewShot_Mixed.py @@ -1,20 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import FastChatModel from llmebench.tasks import OffensiveTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -26,10 +25,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" - ) + base_prompt = "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" for index, example in enumerate(examples): label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" base_prompt += ( @@ -43,6 +41,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -51,17 +50,17 @@ def prompt(input_sample, examples): } ] + def post_process(response): print("Received response:", response) - if not response or 'output' not in response: + if not response or "output" not in response: print("Error or missing output in response:", response) return "NOT_OFF" # Default to "NOT_OFF" when unsure label = response["output"].strip().upper() - label = re.sub(r"|", "", label) + label = re.sub(r"|", "", label) label = label.lower() - if "not" in label or "غير" in label or "no" in label : + if "not" in label or "غير" in label or "no" in label: return "NOT_OFF" return "OFF" - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py index a799210a..0070c26e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Arabic.py @@ -1,9 +1,8 @@ +import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import FastChatModel from llmebench.tasks import OffensiveTask -import re - - def metadata(): @@ -11,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,6 +25,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -38,6 +36,7 @@ def prompt(input_sample): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py index 8afed567..39d42e3b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_English.py @@ -1,9 +1,8 @@ +import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import FastChatModel from llmebench.tasks import OffensiveTask -import re - - def metadata(): @@ -11,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,6 +25,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -38,13 +36,14 @@ def prompt(input_sample): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("label:", "").strip().lower() label = re.sub(r"|", "", label) - #print("label", label) + # print("label", label) # Ensure only the expected labels are returned if "not" in label or "غير" in label or "ليس" in label: @@ -53,4 +52,3 @@ def post_process(response): return "OFF" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py index 617b2014..1c166fea 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_JAIS13b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import OffensiveTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -25,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -35,18 +34,24 @@ def prompt(input_sample): } ] + def post_process(response): - + label = response["choices"][0]["message"]["content"] label = label.strip().lower() print("label", label) - - if "not" in label or "غير" in label or "ليس" in label or "لا تحتوي" in label or "not" in label or "'not_off'" in label: + if ( + "not" in label + or "غير" in label + or "ليس" in label + or "لا تحتوي" in label + or "not" in label + or "'not_off'" in label + ): return "NOT_OFF" - elif "off" in label or "مس" in label or "ضار" in label : + elif "off" in label or "مس" in label or "ضار" in label: return "OFF" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py index 2331e893..cbef87f8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Arabic.py @@ -1,22 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import AzureModel from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,10 +25,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" - ) + base_prompt = f"يرجى تقييم الجملة التالية لتحديد ما إذا كانت تحتوي على محتوى مسيء. إذا كانت الجملة تحتوي على أي درجة من الإساءة، قم بالرد بـ 'مسيء'. إذا لم تحتوي الجملة على أي محتوى مسيء أو مهين أو ضار، قم بالرد بـ 'غير مسيء'.\n" for index, example in enumerate(examples): label = "غير مسيء" if example["label"] == "NOT_OFF" else "مسيء" base_prompt += ( @@ -54,15 +50,16 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["output"].strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: print("No clear label found:", label) return None - - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py index f9fd0d70..3da5b5c8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_English.py @@ -1,22 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import AzureModel from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -28,6 +25,7 @@ def config(): }, } + # def few_shot_prompt(input_sample, examples): # base_prompt = "Review the content provided and classify it as 'OFF' for Offensive or 'NOT_OFF' for Not Offensive. Here are some examples to guide your classification:\n\n" # for index, example in enumerate(examples): @@ -36,13 +34,20 @@ def config(): # base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" # return base_prompt + def few_shot_prompt(input_sample, examples): base_prompt = f"Please assess the following sentence to determine if it contains any offensive content. If the sentence contains any degree of offense, respond with 'OFF'. If the sentence does not contain any offensive, insulting, or harmful material, respond with 'NOT_OFF'. Do not provide any additional explanation.Here are some examples:\n\n" for index, example in enumerate(examples): label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" base_prompt += f"Example {index + 1}: Content - '{example['input']}' -> Classification: {label}\n" - base_prompt += "\nNow classify the new content:\nContent: '" + input_sample + "'\nClassification:\n" + base_prompt += ( + "\nNow classify the new content:\nContent: '" + + input_sample + + "'\nClassification:\n" + ) return base_prompt + + def prompt(input_sample, examples): return [ { @@ -51,6 +56,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["output"].strip().lower() label = re.sub(r"|", "", label) # Removing potential formatting characters diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py index 7ee972ad..1943d013 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_FewShot_Mixed.py @@ -1,20 +1,19 @@ import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import AzureModel from llmebench.tasks import OffensiveTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -26,10 +25,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" - ) + base_prompt = "يرجى تقييم المحتوى التالي وتحديد ما إذا كان يحتوي على لغة قد تعتبر مسيئة أو غير لائقة. استخدم التصنيفات 'OFF' للمحتوى المسيء أو 'NOT_OFF' للمحتوى غير المسيء. إليك بعض الأمثلة لتوجيهك:\n\n" for index, example in enumerate(examples): label = "NOT_OFF" if example["label"] == "NOT_OFF" else "OFF" base_prompt += ( @@ -43,6 +41,7 @@ def few_shot_prompt(input_sample, examples): ) return base_prompt + def prompt(input_sample, examples): return [ { @@ -51,6 +50,7 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["output"].strip().lower() label = re.sub(r"|", "", label) # Removing potential formatting characters @@ -62,4 +62,3 @@ def post_process(response): return "OFF" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py index 95da9fed..b9e88c99 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import OffensiveTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -37,12 +34,15 @@ def prompt(input_sample): } ] + def post_process(response): label = response["output"].strip().lower() if "غير" in label or "not" in label or "ليس" in label or "ليست" in label: return "NOT_OFF" - elif "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label: + elif ( + "مسيء" in label or "offensive" in label or "المساءاة" in label or "off" in label + ): return "OFF" else: print("No clear label found:", label) diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py index b455eaa4..2c270834 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_English.py @@ -1,8 +1,8 @@ +import re + from llmebench.datasets import OSACT4SubtaskADataset from llmebench.models import AzureModel from llmebench.tasks import OffensiveTask -import re - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -27,6 +25,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -37,6 +36,7 @@ def prompt(input_sample): } ] + def post_process(response): label = response["output"].strip().lower() label = re.sub(r"|", "", label) # Removing potential formatting characters diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py index e3365fe4..d5e08360 100755 --- a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Llama3-8b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import OffensiveTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": OSACT4SubtaskADataset, @@ -25,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -35,9 +34,10 @@ def prompt(input_sample): } ] + def post_process(response): print(response) - if not response or 'output' not in response: + if not response or "output" not in response: print("Error or missing output in response:", response) return "NOT_OFF" # Safely default to "NOT_OFF" when unsure @@ -45,11 +45,16 @@ def post_process(response): label = label.lower() print("label", label) - - if "not" in label or "غير" in label or "ليس" in label or "لا تحتوي" in label or "NOT" in label or "'not_off'" in label: + if ( + "not" in label + or "غير" in label + or "ليس" in label + or "لا تحتوي" in label + or "NOT" in label + or "'not_off'" in label + ): return "NOT_OFF" elif "off" in label: return "OFF" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Random.py b/assets/ar/factuality_disinformation_harmful_content/offensive_language/OSACT4SubtaskA_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_GPT4V_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_GPT4V_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Image_GPT4V_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Image_GPT4V_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_MM_GPT4V_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_MM_GPT4V_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Text_GPT4V_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArMemes_Text_GPT4V_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py index 298b118c..8640392e 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py @@ -1,6 +1,7 @@ +import ast import random import re -import ast + from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import OpenAIModel from llmebench.tasks import MultilabelPropagandaTask @@ -9,21 +10,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -56,26 +51,39 @@ def config(): }, } + def translate_labels(label): label_mapping = { - "no technique": "بدون تقنية", "Smears": "تشويه", "Exaggeration/Minimisation": "مبالغة/تقليل", - "Loaded Language": "لغة محملة بالمشاعر", "Appeal to fear/prejudice": "الاحتكام إلى الخوف/التحيز", - "Name calling/Labeling": "التسمية/الملصقات", "Slogans": "الشعارات", "Repetition": "التكرار", - "Doubt": "الشك", "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", - "Flag-waving": "التلويح بالعلم", "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", + "no technique": "بدون تقنية", + "Smears": "تشويه", + "Exaggeration/Minimisation": "مبالغة/تقليل", + "Loaded Language": "لغة محملة بالمشاعر", + "Appeal to fear/prejudice": "الاحتكام إلى الخوف/التحيز", + "Name calling/Labeling": "التسمية/الملصقات", + "Slogans": "الشعارات", + "Repetition": "التكرار", + "Doubt": "الشك", + "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", + "Flag-waving": "التلويح بالعلم", + "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (مغالطة رجل القش)", "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", - "Appeal to authority": "الاحتكام إلى السلطة", "Whataboutism": "ماذا عن", + "Appeal to authority": "الاحتكام إلى السلطة", + "Whataboutism": "ماذا عن", "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", - "Thought-terminating cliché": "الكليشيه القاطع للفكر", "Causal Oversimplification": "التبسيط السببي" + "Thought-terminating cliché": "الكليشيه القاطع للفكر", + "Causal Oversimplification": "التبسيط السببي", } return label_mapping.get(label, label) + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n" for index, example in enumerate(examples): tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]]) - out_prompt += f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + out_prompt += ( + f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + ) out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" return out_prompt @@ -86,7 +94,7 @@ def prompt(input_sample, examples): "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." """ - + return [ { "role": "system", @@ -102,23 +110,42 @@ def prompt(input_sample, examples): def post_process(response): label = response["choices"][0]["message"]["content"] # .lower() label = label.strip().lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] label_mapping = { - "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", - "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", - "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", - "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", - "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", + "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", - "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "الاحتكام إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", - "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification", } - detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if - arabic_label in label or english_label.lower() in label] + detected_labels = [ + english_label + for arabic_label, english_label in label_mapping.items() + if arabic_label in label or english_label.lower() in label + ] final_labels = [l for l in detected_labels if "no technique" not in l] diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py index 2a7c1f0f..447e18c1 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -192,7 +186,14 @@ def fix_label(pred_label): def post_process(response): label = response["choices"][0]["message"]["content"].lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] pred_label = fix_label(label) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py index e7dd966a..c2a4f1ba 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py @@ -9,19 +9,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -105,25 +101,43 @@ def prompt(input_sample, examples): def post_process(response): label = response["choices"][0]["message"]["content"] # .lower() label = label.strip().lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] - + label_mapping = { - "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", - "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", - "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", - "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", - "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", + "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", - "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "الاحتكام إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", - "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification", } - detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if - arabic_label in label or english_label.lower() in label] + detected_labels = [ + english_label + for arabic_label, english_label in label_mapping.items() + if arabic_label in label or english_label.lower() in label + ] final_labels = [l for l in detected_labels if "no technique" not in l] return list(set(final_labels)) - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py index 6393fb8c..da7aa2d9 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -66,13 +60,10 @@ def prompt(input_sample): prompt_string = base_prompt + "التغريدة: \n\n" + input_sample + "التصنيف: \n" - return [ { "role": "system", - "content": ( - "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي." - ), + "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."), }, { "role": "user", @@ -84,23 +75,42 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"].lower() label = label.strip().lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] label_mapping = { - "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", - "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", - "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", - "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", - "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", + "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", - "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "الاحتكام إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", - "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification", } - detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if - arabic_label in label or english_label.lower() in label] + detected_labels = [ + english_label + for arabic_label, english_label in label_mapping.items() + if arabic_label in label or english_label.lower() in label + ] final_labels = [l for l in detected_labels if "no technique" not in l] return list(set(final_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py index 833d5bd8..c02e72b0 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -69,9 +63,7 @@ def prompt(input_sample): return [ { "role": "system", - "content": ( - "You are an expert social media content analyst." - ), + "content": ("You are an expert social media content analyst."), }, { "role": "user", @@ -80,7 +72,6 @@ def prompt(input_sample): ] - def fix_label(pred_label): if "used in this text" in pred_label: return ["no technique"] @@ -170,7 +161,14 @@ def fix_label(pred_label): def post_process(response): label = response["choices"][0]["message"]["content"].lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] pred_label = fix_label(label) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py index ed833d4f..c647b461 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py @@ -9,19 +9,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -62,18 +58,12 @@ def prompt(input_sample): f"قدم التصنيفات فقط كقائمة من النصوص.\n" ) - prompt_string = ( - base_prompt + - f"التغريدة: {input_sample}\n\n" - f"التصنيف: \n" - ) + prompt_string = base_prompt + f"التغريدة: {input_sample}\n\n" f"التصنيف: \n" return [ { "role": "system", - "content": ( - "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي." - ), + "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."), }, { "role": "user", @@ -82,27 +72,45 @@ def prompt(input_sample): ] - def post_process(response): label = response["choices"][0]["message"]["content"].lower() label = label.strip().lower() - if "لا يوجد في النص" in label or label == "'no technique'" or "doesn't" in label or "does not" in label or "لا يحتوي" in label or "لا يستخدم" in label: + if ( + "لا يوجد في النص" in label + or label == "'no technique'" + or "doesn't" in label + or "does not" in label + or "لا يحتوي" in label + or "لا يستخدم" in label + ): return [] label_mapping = { - "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", - "لغة محملة بالمشاعر": "Loaded Language", "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", - "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", - "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", - "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة بالمشاعر": "Loaded Language", + "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", - "الاحتكام إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "الاحتكام إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", - "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification", } - detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if - arabic_label in label or english_label.lower() in label] + detected_labels = [ + english_label + for arabic_label, english_label in label_mapping.items() + if arabic_label in label or english_label.lower() in label + ] final_labels = [l for l in detected_labels if "no technique" not in l] return list(set(final_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py index 792e0d3f..f2de7611 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py @@ -8,19 +8,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -53,6 +49,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): instruction = """ "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، @@ -78,16 +75,18 @@ def few_shot_prompt(input_sample, examples): "Whataboutism": "ماذا عن", "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", "Thought-terminating cliché": "الكليشيه القاطع للفكر", - "Causal Oversimplification": "التبسيط السببي" + "Causal Oversimplification": "التبسيط السببي", } - + few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n" for index, example in enumerate(examples): print(f"Processing example {index + 1}") print(f"Example label: {example['label']}") - + try: - labels = ", ".join(label_mapping[l] for l in example["label"] if example["label"]) + labels = ", ".join( + label_mapping[l] for l in example["label"] if example["label"] + ) print("Labels in few_shot:", labels) except KeyError as e: print(f"KeyError: {e} in example {index + 1}") @@ -96,20 +95,17 @@ def few_shot_prompt(input_sample, examples): few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " return few_shot_text + def prompt(input_sample, examples): - return [ - { - "role": "user", - "content": few_shot_prompt(input_sample, examples) - } - ] + return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}] + def post_process(response): label = response["choices"][0]["message"]["content"].lower() label = label.lower() - label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags + label = re.sub(r"<[^>]+>", "", label) # Remove any HTML-like tags label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -129,7 +125,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py index 94add2d8..86e8afe6 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -56,6 +50,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -92,23 +87,21 @@ def prompt(input_sample, examples): "Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification." """ base_prompt = instruction.strip() - + return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] + def post_process(response): - + label = response["choices"][0]["message"]["content"].lower() - label = label.replace("", "").replace("", "") - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -128,7 +121,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } print("label: ", label) detected_labels = [] @@ -151,15 +144,30 @@ def post_process(response): detected_labels.append(label_mapping["التكرار"]) if "Doubt" in label: detected_labels.append(label_mapping["الشك"]) - if "Obfuscation, Intentional vagueness, Confusion" in label or "Obfuscation" in label or "Intentional vagueness" in label or "Confusion" in label: + if ( + "Obfuscation, Intentional vagueness, Confusion" in label + or "Obfuscation" in label + or "Intentional vagueness" in label + or "Confusion" in label + ): detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"]) if "Flag-waving" in label or "flag": detected_labels.append(label_mapping["التلويح بالعلم"]) - if "Glittering generalities (Virtue)" in label or "الفضيلة" in label or "Glittering": + if ( + "Glittering generalities (Virtue)" in label + or "الفضيلة" in label + or "Glittering" + ): detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"]) - if "Misrepresentation of Someone's Position (Straw Man)" in label or "تحريف موقف شخص" in label: + if ( + "Misrepresentation of Someone's Position (Straw Man)" in label + or "تحريف موقف شخص" in label + ): detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"]) - if "Presenting Irrelevant Data (Red Herring)" in label or "عرض بيانات غير ذات صلة" in label: + if ( + "Presenting Irrelevant Data (Red Herring)" in label + or "عرض بيانات غير ذات صلة" in label + ): detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"]) if "Appeal to authority" in label: detected_labels.append(label_mapping["النداء إلى السلطة"]) @@ -172,7 +180,4 @@ def post_process(response): if "Causal Oversimplification" in label or "التبسيط" in label: detected_labels.append(label_mapping["التبسيط السببي"]) - return detected_labels - - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py index 01c1c3c8..72776442 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py @@ -8,17 +8,15 @@ random.seed(1333) - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -51,6 +49,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): instruction = """ "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، @@ -70,19 +69,16 @@ def few_shot_prompt(input_sample, examples): few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " return few_shot_text + def prompt(input_sample, examples): - return [ - { - "role": "user", - "content": few_shot_prompt(input_sample, examples) - } - ] + return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}] + def post_process(response): label = response["choices"][0]["message"]["content"].lower() - label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags + label = re.sub(r"<[^>]+>", "", label) # Remove any HTML-like tags label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -102,7 +98,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -112,7 +108,7 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py index 8e63b704..a304edaa 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -68,16 +62,12 @@ def prompt(input_sample): { "role": "user", "content": ( - f' \n{instruction}\n' - + "التغريدة: " - + input_sample - + "التصنيف: " + f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: " ), } ] - def post_process(response): label = response["choices"][0]["message"]["content"].lower() label = label.lower() @@ -100,7 +90,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } print("label: ", label) detected_labels = [] @@ -148,5 +138,3 @@ def post_process(response): detected_labels = list(set(detected_labels)) return detected_labels - - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py index e1d7cd36..a56c1dea 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py @@ -5,21 +5,15 @@ from llmebench.tasks import MultilabelPropagandaTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -55,7 +49,7 @@ def config(): def prompt(input_sample): base_prompt = ( - f'Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from:\n\n' + f"Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from:\n\n" f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'" f"\nAnswer (only yes/no) in the following format: \n" f"'Doubt': 'yes', " diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py index 7c75471e..8ecbc6c2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py @@ -9,19 +9,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -65,16 +61,12 @@ def prompt(input_sample): { "role": "user", "content": ( - f' \n{instruction}\n' - + "التغريدة: " - + input_sample - + "التصنيف: " + f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: " ), } ] - def post_process(response): label = response["choices"][0]["message"]["content"].lower() label = label.lower() @@ -97,7 +89,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } print("label: ", label) detected_labels = [] @@ -141,7 +133,4 @@ def post_process(response): if "السببي" in label or "التبسيط" in label: detected_labels.append(label_mapping["التبسيط السببي"]) - return detected_labels - - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py index 61926954..0cef8a61 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py @@ -50,7 +50,7 @@ # def few_shot_prompt(input_sample, examples): # instruction = """ # "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، -# "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، +# "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، # "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"." # """ # label_mapping = { @@ -152,7 +152,7 @@ # label = response["output"].strip().lower() # label = re.sub(r'<[^>]+>', '', label) # Remove any HTML-like tags # label = label.lower() - + # label_mapping = { # "بدون تقنية": "no technique", # "تشويه": "Smears", @@ -182,7 +182,7 @@ # elif english_label.lower() in label: # detected_labels.append(english_label) -# print("Detected labels:", detected_labels) +# print("Detected labels:", detected_labels) # # this is for duplicates values # detected_labels = list(set(detected_labels)) @@ -190,6 +190,7 @@ # return detected_labels import random import re + from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import AzureModel from llmebench.tasks import MultilabelPropagandaTask @@ -197,19 +198,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -218,40 +215,67 @@ def config(): "model": AzureModel, "model_args": { "class_labels": [ - "no technique", "Smears", "Exaggeration/Minimisation", "Loaded Language", - "Appeal to fear/prejudice", "Name calling/Labeling", "Slogans", "Repetition", - "Doubt", "Obfuscation, Intentional vagueness, Confusion", "Flag-waving", - "Glittering generalities (Virtue)", "Misrepresentation of Someone's Position (Straw Man)", - "Presenting Irrelevant Data (Red Herring)", "Appeal to authority", "Whataboutism", - "Black-and-white Fallacy/Dictatorship", "Thought-terminating cliché", "Causal Oversimplification" + "no technique", + "Smears", + "Exaggeration/Minimisation", + "Loaded Language", + "Appeal to fear/prejudice", + "Name calling/Labeling", + "Slogans", + "Repetition", + "Doubt", + "Obfuscation, Intentional vagueness, Confusion", + "Flag-waving", + "Glittering generalities (Virtue)", + "Misrepresentation of Someone's Position (Straw Man)", + "Presenting Irrelevant Data (Red Herring)", + "Appeal to authority", + "Whataboutism", + "Black-and-white Fallacy/Dictatorship", + "Thought-terminating cliché", + "Causal Oversimplification", ], "max_tries": 3, }, } + def translate_labels(label): label_mapping = { - "no technique": "بدون تقنية", "Smears": "تشويه", "Exaggeration/Minimisation": "مبالغة/تقليل", - "Loaded Language": "لغة محملة", "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", - "Name calling/Labeling": "التسمية/الملصقات", "Slogans": "الشعارات", "Repetition": "التكرار", - "Doubt": "الشك", "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", - "Flag-waving": "التلويح بالعلم", "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", + "no technique": "بدون تقنية", + "Smears": "تشويه", + "Exaggeration/Minimisation": "مبالغة/تقليل", + "Loaded Language": "لغة محملة", + "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز", + "Name calling/Labeling": "التسمية/الملصقات", + "Slogans": "الشعارات", + "Repetition": "التكرار", + "Doubt": "الشك", + "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك", + "Flag-waving": "التلويح بالعلم", + "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)", "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)", "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)", - "Appeal to authority": "النداء إلى السلطة", "Whataboutism": "ماذا عن", + "Appeal to authority": "النداء إلى السلطة", + "Whataboutism": "ماذا عن", "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية", - "Thought-terminating cliché": "الكليشيه القاطع للفكر", "Causal Oversimplification": "التبسيط السببي" + "Thought-terminating cliché": "الكليشيه القاطع للفكر", + "Causal Oversimplification": "التبسيط السببي", } return label_mapping.get(label, label) + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n" for index, example in enumerate(examples): tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]]) - out_prompt += f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + out_prompt += ( + f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n" + ) out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" return out_prompt + def prompt(input_sample, examples): base_prompt = """ "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"، @@ -261,29 +285,43 @@ def prompt(input_sample, examples): return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] + def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return None - label = re.sub(r'<[^>]+>', '', response["output"].strip().lower()) + label = re.sub(r"<[^>]+>", "", response["output"].strip().lower()) label_mapping = { - "بدون تقنية": "no technique", "تشويه": "Smears", "مبالغة/تقليل": "Exaggeration/Minimisation", - "لغة محملة": "Loaded Language", "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", - "التسمية/الملصقات": "Name calling/Labeling", "الشعارات": "Slogans", "التكرار": "Repetition", - "الشك": "Doubt", "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", - "التلويح بالعلم": "Flag-waving", "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", + "بدون تقنية": "no technique", + "تشويه": "Smears", + "مبالغة/تقليل": "Exaggeration/Minimisation", + "لغة محملة": "Loaded Language", + "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice", + "التسمية/الملصقات": "Name calling/Labeling", + "الشعارات": "Slogans", + "التكرار": "Repetition", + "الشك": "Doubt", + "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion", + "التلويح بالعلم": "Flag-waving", + "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)", "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)", "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)", - "النداء إلى السلطة": "Appeal to authority", "ماذا عن": "Whataboutism", + "النداء إلى السلطة": "Appeal to authority", + "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", - "الكليشيه القاطع للفكر": "Thought-terminating cliché", "التبسيط السببي": "Causal Oversimplification" + "الكليشيه القاطع للفكر": "Thought-terminating cliché", + "التبسيط السببي": "Causal Oversimplification", } - detected_labels = [english_label for arabic_label, english_label in label_mapping.items() if arabic_label in label or english_label.lower() in label] + detected_labels = [ + english_label + for arabic_label, english_label in label_mapping.items() + if arabic_label in label or english_label.lower() in label + ] print("Detected labels:", list(set(detected_labels))) return list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py index 856aff3b..b0bbef4c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -56,6 +50,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -92,25 +87,24 @@ def prompt(input_sample, examples): "Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification." """ base_prompt = instruction.strip() - + return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] + def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "No respose" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("", "").replace("", "") label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -130,7 +124,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -140,7 +134,7 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py index 4f68be80..3b75978a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py @@ -8,17 +8,15 @@ random.seed(1333) - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -51,6 +49,7 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): instruction = """ "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"، @@ -70,23 +69,20 @@ def few_shot_prompt(input_sample, examples): few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: " return few_shot_text + def prompt(input_sample, examples): - return [ - { - "role": "user", - "content": few_shot_prompt(input_sample, examples) - } - ] + return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}] + def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "No respose" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("", "").replace("", "") label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -106,7 +102,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -116,7 +112,7 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py index bbdd2ca5..a42d224f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -68,25 +62,21 @@ def prompt(input_sample): { "role": "user", "content": ( - f' \n{instruction}\n' - + "التغريدة: " - + input_sample - + "التصنيف: " + f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: " ), } ] - def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "No respose" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("", "").replace("", "") label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -106,7 +96,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -116,12 +106,9 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) return detected_labels - - - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py index 0c473744..3c87ced3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -56,6 +50,7 @@ def config(): }, } + def prompt(input_sample): instruction = """ Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears," @@ -67,15 +62,11 @@ def prompt(input_sample): return [ { "role": "user", - "content": ( - f'{instruction}\n' - + "Tweet: " - + input_sample - + "\nLabel: " - ), + "content": (f"{instruction}\n" + "Tweet: " + input_sample + "\nLabel: "), } ] + # def post_process(response): # if not response or 'error' in response or 'output' not in response: # print("Error or missing output in response:", response) @@ -152,14 +143,14 @@ def prompt(input_sample): def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "No respose" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("", "").replace("", "") label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -179,7 +170,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -189,7 +180,7 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py index e7cf5a1a..cdec1b31 100755 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py @@ -9,19 +9,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": WANLP22T3PropagandaDataset, @@ -65,25 +61,21 @@ def prompt(input_sample): { "role": "user", "content": ( - f' \n{instruction}\n' - + "التغريدة: " - + input_sample - + "التصنيف: " + f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: " ), } ] - def post_process(response): - if not response or 'error' in response or 'output' not in response: + if not response or "error" in response or "output" not in response: print("Error or missing output in response:", response) return "No respose" # Safely default to NOT_ADULT when unsure label = response["output"].strip().lower() label = label.replace("", "").replace("", "") label = label.lower() - + label_mapping = { "بدون تقنية": "no technique", "تشويه": "Smears", @@ -103,7 +95,7 @@ def post_process(response): "ماذا عن": "Whataboutism", "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship", "الكليشيه القاطع للفكر": "Thought-terminating cliché", - "التبسيط السببي": "Causal Oversimplification" + "التبسيط السببي": "Causal Oversimplification", } detected_labels = [] @@ -113,10 +105,9 @@ def post_process(response): elif english_label.lower() in label: detected_labels.append(english_label) - print("Detected labels:", detected_labels) + print("Detected labels:", detected_labels) # Remove duplicates detected_labels = list(set(detected_labels)) return detected_labels - diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Random.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py index d0f5dea4..104d2225 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -35,6 +29,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" @@ -61,12 +56,8 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt - - - - def prompt(input_sample, examples): - base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" return [ { @@ -85,12 +76,27 @@ def post_process(response): label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py index c57c2568..8ceae4eb 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py @@ -9,21 +9,15 @@ random.seed(1333) - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -35,6 +29,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -60,10 +55,6 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt - - - - def prompt(input_sample, examples): base_prompt = ( f"If the following tweet can be classified as spam or contains an advertisemnt, write 'spam' without explnanation, otherwise write 'not spam' without explanantion.\n\n" @@ -87,12 +78,27 @@ def post_process(response): label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py index eb0320cf..ceb490e4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py @@ -9,19 +9,15 @@ random.seed(1333) - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -33,6 +29,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n" @@ -59,12 +56,8 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt - - - - def prompt(input_sample, examples): - base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" return [ { @@ -83,12 +76,27 @@ def post_process(response): label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py index 7c0424c6..a94dfe41 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -31,34 +25,44 @@ def config(): def prompt(input_sample): - base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: " - + return [ { "role": "system", "content": "أنت خبير في تحليل و تصنيف التغريدات.", }, - { - "role": "user", - "content": base_prompt - }, + {"role": "user", "content": base_prompt}, ] - - def post_process(response): out = response["choices"][0]["message"]["content"] label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py index cac11026..e7ee1e4d 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -48,12 +42,27 @@ def post_process(response): label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py index 63654173..e5e5f6a4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SpamTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -29,33 +25,43 @@ def config(): def prompt(input_sample): - base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: " return [ { "role": "system", "content": "أنت خبير في تحليل و تصنيف التغريدات.", }, - { - "role": "user", - "content": base_prompt - }, + {"role": "user", "content": base_prompt}, ] - - def post_process(response): out = response["choices"][0]["message"]["content"] label = out.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "no" in label or "ليس" in label or "notads" in label: + if ( + "ليست" in label + or "not" in label + or "no" in label + or "ليس" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label or "__ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + or "__ads" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py index dadc5685..31bb2575 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -29,6 +23,7 @@ def config(): }, } + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt += "إليك بعض الأمثلة:\n\n" @@ -44,29 +39,53 @@ def few_shot_prompt(input_sample, base_prompt, examples): out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n" return out_prompt + + def prompt(input_sample, examples): base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] + + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("التصنيف:", "").strip().lower() if "لا يمكن تحديد" in label: return None - if "هي محتوى غير مرغوب فيه" in label or "التغريدة الأخيرة تحتوي على محتوى غير مرغوب فيه" in label or "والرابعة تحتويان على محتوى" in label or "هذه التغريدة تصنف ضمن الرسائل غير المرغوب فيها" in label or "هي على الأرجح إعلان" in label: + if ( + "هي محتوى غير مرغوب فيه" in label + or "التغريدة الأخيرة تحتوي على محتوى غير مرغوب فيه" in label + or "والرابعة تحتويان على محتوى" in label + or "هذه التغريدة تصنف ضمن الرسائل غير المرغوب فيها" in label + or "هي على الأرجح إعلان" in label + ): return "__label__ADS" if "هي إعلان" in label or "قدمتها تحتوي على محتوى غير مرغوب" in label: - return "__label__ADS" + return "__label__ADS" - #print("label", label) - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + # print("label", label) + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py index e3cc83f7..ee2d02f8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -52,17 +46,17 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): - base_prompt="If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion." + base_prompt = "If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion." return [ { "role": "user", - "content": ( - few_shot_prompt(input_sample, base_prompt, examples ) - ), + "content": (few_shot_prompt(input_sample, base_prompt, examples)), } ] + def post_process(response): out = response["choices"][0]["message"]["content"] j = out.find(".") @@ -70,9 +64,25 @@ def post_process(response): out = out[0:j] label = out.replace("label:", "").strip().lower() - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label or "notads" in label: + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + or "notads" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label or "ads" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + or "ads" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py index d8ef1cf4..4adcb451 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SpamTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -28,14 +24,13 @@ def config(): } - - - def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt += "إليك بعض الأمثلة:\n\n" for index, example in enumerate(examples): - label = "__label__ADS" if example["label"] == "__label__ADS" else "__label__NOTADS" + label = ( + "__label__ADS" if example["label"] == "__label__ADS" else "__label__NOTADS" + ) out_prompt += ( f"مثال {index + 1}:\n" f"الجملة: '{example['input']}'\n" @@ -53,7 +48,7 @@ def prompt(input_sample, examples): return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py index 2bf036df..8b914ca1 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -30,18 +24,12 @@ def config(): } - - - def prompt(input_sample): base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" return [ { "role": "user", - "content": base_prompt - + "التغريدة: " - + input_sample - + "التصنيف: " + "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ", } ] @@ -50,10 +38,24 @@ def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("التصنيف:", "").strip().lower() - #print("label", label) - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + # print("label", label) + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py index 1afb2932..4f9b4b65 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py @@ -1,26 +1,17 @@ - - - from llmebench.datasets import SpamDataset from llmebench.models import FastChatModel from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -33,18 +24,12 @@ def config(): } - - - def prompt(input_sample): base_prompt = "If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n tweet: {input_sample}\nlabel: \n" return [ { "role": "user", - "content": base_prompt - + "Tweet: " - + input_sample - + "Classification: " + "content": base_prompt + "Tweet: " + input_sample + "Classification: ", } ] diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py index 528c262c..596e8515 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SpamTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -28,18 +24,12 @@ def config(): } - - - def prompt(input_sample): base_prompt = "صنف الجملة التالية كـ '__label__ADS' أو '__label__NOTADS'، أعد التسمية فقط بدون الحاجة إلى وصف أو تحليل.\n" return [ { "role": "user", - "content": base_prompt - + "التغريدة: " - + input_sample - + "التصنيف: " + "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ", } ] @@ -53,7 +43,7 @@ def post_process(response): if "لا يمكنني" in label: return None - + if "ليس" in label or "ليست" in label or "not" in label: return "__label__NOTADS" return "__label__ADS" diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py index 6bad23e8..4f099eff 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -58,10 +52,11 @@ def prompt(input_sample, examples): return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] + def post_process(response): if "output" in response: label = response["output"].strip() @@ -75,13 +70,26 @@ def post_process(response): label = label.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "ليست" in label or "not" in label or "غير" in label or "no" in label or "ليس" in label: + if ( + "ليست" in label + or "not" in label + or "غير" in label + or "no" in label + or "ليس" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "اعلان" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "اعلان" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py index 2870bd01..9c8a27b4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import SpamDataset from llmebench.models import AzureModel from llmebench.tasks import SpamTask -import random - - def metadata(): @@ -11,14 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - - def config(): return { "dataset": SpamDataset, @@ -34,7 +29,7 @@ def config(): def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n\n" out_prompt = out_prompt + "Here are some examples:\n\n" - + for index, example in enumerate(examples): label = "'spam'" if example["label"] == "__label__ADS" else "'not spam'" out_prompt += ( @@ -55,6 +50,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def prompt(input_sample, examples): base_prompt = ( "Classify the following tweet as either 'spam' or 'not spam'. " @@ -64,7 +60,7 @@ def prompt(input_sample, examples): return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] @@ -82,12 +78,26 @@ def post_process(response): label = label.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() print("label: ", label) - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "notads" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + if ( + "notads" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py index bd48b997..4df15579 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import SpamDataset from llmebench.models import AzureModel from llmebench.tasks import SpamTask -import random - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -46,14 +44,15 @@ def few_shot_prompt(input_sample, base_prompt, examples): def prompt(input_sample, examples): - base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" + base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n" return [ { "role": "user", - "content": few_shot_prompt(input_sample, base_prompt, examples) + "content": few_shot_prompt(input_sample, base_prompt, examples), } ] + def post_process(response): if "output" in response: label = response["output"].strip() @@ -67,12 +66,26 @@ def post_process(response): label = label.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py index 81f7346c..68edc5eb 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py @@ -1,9 +1,8 @@ +import random + from llmebench.datasets import SpamDataset from llmebench.models import AzureModel from llmebench.tasks import SpamTask -import random - - def metadata(): @@ -11,13 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": SpamDataset, @@ -30,24 +26,18 @@ def config(): } - - - def prompt(input_sample): base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n" return [ { "role": "user", - "content": base_prompt - + "التغريدة: " - + input_sample - + "التصنيف: " + "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ", } ] def post_process(response): - #print(response) + # print(response) if "output" in response: label = response["output"].strip() label = label.replace("", "") @@ -60,13 +50,26 @@ def post_process(response): label = label.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py index 44ced30c..df0f6787 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py @@ -3,22 +3,15 @@ from llmebench.tasks import SpamTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - - def config(): return { "dataset": SpamDataset, @@ -39,6 +32,7 @@ def prompt(input_sample): }, ] + def post_process(response): if "output" in response: label = response["output"].strip() @@ -52,12 +46,26 @@ def post_process(response): label = label.replace("التصنيف:", "").strip().lower() label = label.replace("label:", "").strip().lower() - #print("label", label) + # print("label", label) if "لا أستطيع" in label or "I cannot" in label: return None - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py index e9096ed1..c682245f 100755 --- a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py @@ -1,8 +1,8 @@ +import random + from llmebench.datasets import SpamDataset from llmebench.models import AzureModel from llmebench.tasks import SpamTask -import random - def metadata(): @@ -10,12 +10,10 @@ def metadata(): "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": SpamDataset, @@ -28,9 +26,6 @@ def config(): } - - - def prompt(input_sample): base_prompt = ( "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج / إعلان؟\n" @@ -40,10 +35,7 @@ def prompt(input_sample): return [ { "role": "user", - "content": base_prompt - + "التغريدة: " - + input_sample - + "\nالتصنيف: " + "content": base_prompt + "التغريدة: " + input_sample + "\nالتصنيف: ", } ] @@ -63,12 +55,30 @@ def post_process(response): if "لا يمكنني" in label or "لا توجد معلومات" in label: return None - #print("label", label) - if "لا أستطيع" in label or "I cannot" in label or "'ads' أو 'label__NOTADS'".lower() in label: + # print("label", label) + if ( + "لا أستطيع" in label + or "I cannot" in label + or "'ads' أو 'label__NOTADS'".lower() in label + ): return None - if "لا" in label or "ليست" in label or "not" in label or "ليس" in label or "no" in label: + if ( + "لا" in label + or "ليست" in label + or "not" in label + or "ليس" in label + or "no" in label + ): return "__label__NOTADS" - elif "نعم" in label or "إعلان" in label or "spam" in label or "مزعج" in label or "yes" in label or "مرغوب" in label or "غير" in label: + elif ( + "نعم" in label + or "إعلان" in label + or "spam" in label + or "مزعج" in label + or "yes" in label + or "مرغوب" in label + or "غير" in label + ): return "__label__ADS" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Random.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py index ca21cb59..8f95c058 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,9 +26,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - 'صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' - ) + base_prompt = 'صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' return [ { "role": "system", @@ -54,9 +46,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): label = "موضوعية" if example["label"] == "OBJ" else "غير موضوعية" out_prompt += ( - f"مثال {index}:\n" - f"التغريدة: {example['input']}\n" - f"التصنيف: {label}\n\n" + f"مثال {index}:\n" f"التغريدة: {example['input']}\n" f"التصنيف: {label}\n\n" ) out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py index 6b9fe702..bfae7aba 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -75,6 +69,12 @@ def post_process(response): label = response["choices"][0]["message"]["content"].strip().lower() if "obj" in label or "موضوعي" in label: return "OBJ" - elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + elif ( + "subj" in label + or "غير" in label + or "لا" in label + or "ذاتي" in label + or "ليس" in label + ): return "SUBJ" return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py index 4f40b6dd..33c0f5f0 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -30,9 +26,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - 'صنف التغريدة الى "subjective" أو "objective". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' - ) + base_prompt = 'صنف التغريدة الى "subjective" أو "objective". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.\n' return [ { "role": "system", @@ -52,9 +46,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): label = "objective" if example["label"] == "OBJ" else "subjective" out_prompt += ( - f"مثال {index}:\n" - f"التغريدة: {example['input']}\n" - f"التصنيف: {label}\n\n" + f"مثال {index}:\n" f"التغريدة: {example['input']}\n" f"التصنيف: {label}\n\n" ) out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n" @@ -66,6 +58,12 @@ def post_process(response): label = response["choices"][0]["message"]["content"].strip().lower() if "obj" in label or "موضوعي" in label: return "OBJ" - elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + elif ( + "subj" in label + or "غير" in label + or "لا" in label + or "ذاتي" in label + or "ليس" in label + ): return "SUBJ" return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py index 684b75fc..7b9cc990 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -53,4 +47,3 @@ def post_process(response): return "OBJ" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py index d66e0eec..2568590a 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -53,6 +47,12 @@ def post_process(response): label = response["choices"][0]["message"]["content"].strip().lower() if "obj" in label or "موضوعي" in label: return "OBJ" - elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + elif ( + "subj" in label + or "غير" in label + or "لا" in label + or "ذاتي" in label + or "ليس" in label + ): return "SUBJ" return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py index 7eb4ba7e..56d3a6d2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "GPT-4o-2024-05-22", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -27,7 +23,7 @@ def config(): def prompt(input_sample): prompt_string = ( - 'صنف التغريدة الى "objective" أو "subjective". قم بتقديم التصنيف دون أي تبرير إضافي.\n' + 'صنف التغريدة الى "objective" أو "subjective". قم بتقديم التصنيف دون أي تبرير إضافي.\n' f"التغريدة: {input_sample}\n" f"التصنيف: \n" ) @@ -47,6 +43,12 @@ def post_process(response): label = response["choices"][0]["message"]["content"].strip().lower() if "obj" in label or "موضوعي" in label: return "OBJ" - elif "subj" in label or "غير" in label or "لا" in label or "ذاتي" in label or "ليس" in label : + elif ( + "subj" in label + or "غير" in label + or "لا" in label + or "ذاتي" in label + or "ليس" in label + ): return "SUBJ" return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py index 21224666..0d49d7e8 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,11 +26,8 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -67,6 +58,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() @@ -77,4 +69,4 @@ def post_process(response): elif "موضوعية" in label or "obj" in label: return "OBJ" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py index 71c384ed..fdd6d001 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,9 +26,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' - ) + base_prompt = 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' return [ { "role": "user", @@ -42,6 +34,7 @@ def prompt(input_sample, examples): }, ] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -69,10 +62,10 @@ def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() ) - + if "غير" in label or "subj" in label or "not" in label or "ليس" in label: return "SUBJ" elif "موضوعية" in label or "obj" in label: return "OBJ" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py index ddcb1821..1b0e1a01 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -30,9 +26,8 @@ def config(): def prompt(input_sample, examples): - base_prompt = ("صنف الجملة على أنها subjective أو objective. قدم فقط التسمية.\n\nالنص: {input_sample}\التصنيف: ") + base_prompt = "صنف الجملة على أنها subjective أو objective. قدم فقط التسمية.\n\nالنص: {input_sample}\التصنيف: " return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -63,6 +58,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py index 825527a6..206cae81 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -40,11 +34,12 @@ def prompt(input_sample): } ] + def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() ) - #print(label) + # print(label) if "غير" in label or "subj" in label or "not" in label or "ليس" in label: return "SUBJ" diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py index 5031d68d..1a3ea668 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_JAIS13b_ZeroShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT23SubjectivityDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py index 49ab149f..8760822d 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Jais13b_ZeroShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -40,7 +34,6 @@ def prompt(input_sample): def post_process(response): label = ( response["choices"][0]["message"]["content"].lower().replace(".", "").strip() - ) if "غير" in label or "subj" in label or "not" in label or "ليس" in label: diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py index 69cc2cbc..452ea072 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Arabic.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,11 +26,8 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' - ) + base_prompt = ' صنف التغريدة الى "موضوعية" أو "غير موضوعية". قم بتقديم التصنيف فقط لأخر تغريدة، لا تقدم أي تبرير إضافي.:\n' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -69,7 +60,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() @@ -85,4 +76,3 @@ def post_process(response): return "OBJ" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py index bbcedcb5..e9d2778c 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_English.py @@ -3,21 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,9 +26,7 @@ def config(): def prompt(input_sample, examples): - base_prompt = ( - 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' - ) + base_prompt = 'Classify the tweet as "Objective" or "Subjective". Provide the classification for the last tweet only, do not provide any additional justification:\n' return [ { "role": "user", @@ -42,6 +34,7 @@ def prompt(input_sample, examples): }, ] + def few_shot_prompt(input_sample, base_prompt, examples): out_prompt = base_prompt + "\n" out_prompt = out_prompt + "Here are some examples:\n\n" @@ -66,7 +59,7 @@ def few_shot_prompt(input_sample, base_prompt, examples): def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py index ec6b6fae..db6da233 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_FewShot_Mixed.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": CT23SubjectivityDataset, @@ -32,7 +28,6 @@ def config(): def prompt(input_sample, examples): base_prompt = 'صنف التغريدة الى "subjective" أو "objective". قم بتقديم التصنيف فقط.' return [ - { "role": "user", "content": few_shot_prompt(input_sample, base_prompt, examples), @@ -63,8 +58,9 @@ def few_shot_prompt(input_sample, base_prompt, examples): return out_prompt + def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() @@ -80,5 +76,3 @@ def post_process(response): return "OBJ" else: return None - - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py index 9cb5163c..cca523c3 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT23SubjectivityDataset, @@ -42,8 +38,9 @@ def prompt(input_sample): } ] + def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() @@ -59,4 +56,3 @@ def post_process(response): return "OBJ" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py index 386cbf1a..ada805a4 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import SubjectivityTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": CT23SubjectivityDataset, @@ -42,8 +38,9 @@ def prompt(input_sample): } ] + def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() @@ -59,5 +56,3 @@ def post_process(response): return "OBJ" else: return None - - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py index a5202e02..3ded318b 100755 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_LLama3-8b_ZeroShot_Mixed.py @@ -3,14 +3,12 @@ from llmebench.tasks import SubjectivityTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "Llama-3.1-8B-Instruct", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } @@ -40,8 +38,9 @@ def prompt(input_sample): } ] + def post_process(response): - #print(response) + # print(response) if "output" in response: # if "content" in response["messages"]: label = response["output"].strip() @@ -57,6 +56,3 @@ def post_process(response): return "OBJ" else: return None - - - diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Random.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_Random.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot_ENprompt.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_FewShot_ENprompt.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot_explain.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_GPT4_ZeroShot_explain.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Llama3_8b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Llama3_8b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Mistral_7b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Mistral_7b_ZeroShot.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py old mode 100644 new mode 100755 diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_VLLMLlama3_8b_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_VLLMLlama3_8b_ZeroShot.py old mode 100644 new mode 100755 From 2722abfcc41ee62716e3d23468ad7006319897f7 Mon Sep 17 00:00:00 2001 From: MohamedBayan Date: Thu, 21 Nov 2024 13:59:09 +0300 Subject: [PATCH 3/3] format_code --- .../Adult_JAIS13b_FewShot_English.py | 49 ++++++++++++------- .../Adult_JAIS13b_ZeroShot_Arabic.py | 40 +++++++++++---- .../Adult_JAIS13b_ZeroShot_English.py | 42 +++++++++++----- .../Adult_JAIS13b_ZeroShot_Mixed.py | 37 +++++++++++--- ...heckworthiness_JAIS13b_ZeroShot_English.py | 40 ++++++--------- 5 files changed, 136 insertions(+), 72 deletions(-) diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py index e6f868e5..a3fb0afd 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,10 +23,9 @@ def config(): }, } + def few_shot_prompt(input_sample, examples): - base_prompt = ( - "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label.\n\n" - ) + base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label.\n\n" for index, example in enumerate(examples): label = "adult" if example["label"] == "ADULT" else "not adult" base_prompt += ( @@ -38,12 +33,10 @@ def few_shot_prompt(input_sample, examples): f"Tweet: '{example['input']}'\n" f"Label: {label}\n\n" ) - base_prompt += ( - f"Now classify the new tweet:\nTweet: '{input_sample}'\n" - f"Label: " - ) + base_prompt += f"Now classify the new tweet:\nTweet: '{input_sample}'\n" f"Label: " return base_prompt + def prompt(input_sample, examples): return [ { @@ -52,24 +45,46 @@ def prompt(input_sample, examples): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] label = label.replace("التصنيف:", "").strip() label = label.replace("label:", "").strip() label = label.lower() - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: return None - diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py index 6df883fa..20db77d1 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -40,6 +37,7 @@ def prompt(input_sample): } ] + def post_process(response): label = response["choices"][0]["message"]["content"] @@ -48,18 +46,40 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py index 8dcf7898..01b11bed 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py @@ -3,19 +3,15 @@ from llmebench.tasks import AdultTask - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - - def config(): return { "dataset": AdultDataset, @@ -27,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -39,7 +36,8 @@ def prompt(input_sample): + "\nLabel: " ), } - ] + ] + def post_process(response): label = response["choices"][0]["message"]["content"] @@ -49,17 +47,39 @@ def post_process(response): label = label.replace("label:", "").strip() label = label.lower() - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py index e36fc907..e051cff2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py +++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py @@ -3,17 +3,15 @@ from llmebench.tasks import AdultTask - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", } - def config(): return { "dataset": AdultDataset, @@ -25,6 +23,7 @@ def config(): }, } + def prompt(input_sample): return [ { @@ -48,17 +47,39 @@ def post_process(response): label = label.replace("", "").replace("", "") label = label.lower() - if "cannot classify" in label or "cannot complete" in label or "لا يمكنني" in label or "cannot fulfill" in label: + if ( + "cannot classify" in label + or "cannot complete" in label + or "لا يمكنني" in label + or "cannot fulfill" in label + ): return None - elif "غير مناسب للبالغين" in label or "غير مناسبة للبالغين" in label or "مناسب للجمهور العام" in label: + elif ( + "غير مناسب للبالغين" in label + or "غير مناسبة للبالغين" in label + or "مناسب للجمهور العام" in label + ): return "NOT_ADULT" elif "غير مناسب للنشر العام" in label: return "ADULT" elif "not_adult" in label or "not adult" in label: return "NOT_ADULT" - elif "التغريدة مناسبة للبالغين" in label or "المحتوى مناسب للبالغين" in label "للبالغين" in label or "نعم" in label or "adult" in label: + elif ( + "التغريدة مناسبة للبالغين" in label + or "المحتوى مناسب للبالغين" in label + or "للبالغين" in label + or "نعم" in label + or "adult" in label + ): return "ADULT" - elif "ليس" in label or "ليست" in label or "not" in label or "no"==label or "غير" in label or "لا" in label: + elif ( + "ليس" in label + or "ليست" in label + or "not" in label + or "no" == label + or "غير" in label + or "لا" in label + ): return "NOT_ADULT" else: - return None \ No newline at end of file + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py index ed20dfeb..86e311f2 100755 --- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py +++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_JAIS13b_ZeroShot_English.py @@ -3,19 +3,16 @@ from llmebench.tasks import CheckworthinessTask - - - def metadata(): return { "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", "model": "jais-13b-chat", - "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054)." - } - - -, + "description": ( + "For a comprehensive analysis and results, refer to our peer-reviewed publication available at " + "[Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on " + "[arXiv](https://arxiv.org/abs/2409.07054)." + ), } @@ -34,7 +31,8 @@ def config(): def prompt(input_sample): base_prompt = ( - f'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, without providing any additional justification:\n\n' + f'Classify the tweet as "checkworthy" or "not checkworthy". Provide the classification only for the last tweet, ' + f"without providing any additional justification:\n\n" f"tweet: {input_sample}\n" f"label: \n" ) @@ -47,22 +45,12 @@ def prompt(input_sample): def post_process(response): - label = response["choices"][0]["message"]["content"] + label = response["choices"][0]["message"]["content"].strip().lower() - label = label.replace("label:", "").strip() - - - - label = label.lower() - if label == "checkworthy" or label == "Checkworthy": - label_fixed = "1" - elif label == "Not_checkworthy." or label == "not_checkworthy": - label_fixed = "0" - elif "not_checkworthy" in label or "label: not_checkworthy" in label: - label_fixed = "0" - elif "checkworthy" in label or "label: checkworthy" in label: - label_fixed = "1" + # Normalize labels + if "checkworthy" in label: + return "1" + elif "not_checkworthy" in label: + return "0" else: - label_fixed = None - - return label_fixed + return None