diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py index 1e13fbc5..2acfc1f3 100644 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py @@ -2,7 +2,7 @@ import re from llmebench.datasets import CT22AttentionworthyDataset -from llmebench.models import GPTModel, RandomGPTModel +from llmebench.models import GPTModel from llmebench.tasks import AttentionworthyTask diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py index dc9355fd..37707a08 100644 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py @@ -1,15 +1,10 @@ import os -import random -import re from llmebench.datasets import CT22AttentionworthyDataset from llmebench.models import GPTChatCompletionModel from llmebench.tasks import AttentionworthyTask -random.seed(1333) - - def config(): return { "dataset": CT22AttentionworthyDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py index 82f465f7..7dc6c70a 100644 --- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py @@ -1,15 +1,9 @@ import os -import random -import re - from llmebench.datasets import CT22AttentionworthyDataset from llmebench.models import GPTChatCompletionModel from llmebench.tasks import AttentionworthyTask -random.seed(1333) - - def config(): return { "dataset": CT22AttentionworthyDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py index e2112584..9c2a3a18 100644 --- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py @@ -1,15 +1,10 @@ import os -import random -import re from llmebench.datasets import CT22CheckworthinessDataset from llmebench.models import GPTChatCompletionModel from llmebench.tasks import CheckworthinessTask -random.seed(1333) - - def config(): return { "dataset": CT22CheckworthinessDataset, diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py index fd36a6a6..302d1814 100644 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py @@ -2,9 +2,9 @@ import random import re -from llmebench.datasets import WANLP22PropagandaDataset +from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import PropagandaMultilabelTask +from llmebench.tasks import MultilabelPropagandaTask random.seed(1333) @@ -12,11 +12,11 @@ def config(): return { - "dataset": WANLP22PropagandaDataset, + "dataset": WANLP22T3PropagandaDataset, "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt" }, - "task": PropagandaMultilabelTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py index 873833b7..a9f861a8 100644 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py @@ -2,18 +2,18 @@ import regex as re -from llmebench.datasets import WANLP22PropagandaDataset +from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import GPTModel -from llmebench.tasks import PropagandaMultilabelTask +from llmebench.tasks import MultilabelPropagandaTask def config(): return { - "dataset": WANLP22PropagandaDataset, + "dataset": WANLP22T3PropagandaDataset, "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt" }, - "task": PropagandaMultilabelTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTModel, "model_args": { diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py index e43084d8..1dced0a3 100644 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py @@ -2,9 +2,9 @@ import random import re -from llmebench.datasets import WANLP22PropagandaDataset +from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import PropagandaMultilabelTask +from llmebench.tasks import MultilabelPropagandaTask random.seed(1333) @@ -12,11 +12,11 @@ def config(): return { - "dataset": WANLP22PropagandaDataset, + "dataset": WANLP22T3PropagandaDataset, "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt" }, - "task": PropagandaMultilabelTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py index d5e1a6cf..c4162624 100644 --- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py @@ -2,9 +2,9 @@ import random import re -from llmebench.datasets import WANLP22T3Dataset +from llmebench.datasets import WANLP22T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import PropagandaMultilabelTask +from llmebench.tasks import MultilabelPropagandaTask random.seed(1333) @@ -12,11 +12,11 @@ def config(): return { - "dataset": WANLP22T3Dataset, + "dataset": WANLP22T3PropagandaDataset, "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt" }, - "task": PropagandaMultilabelTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py index 63fa49b4..80cb5d4c 100644 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py @@ -1,13 +1,13 @@ import os -from llmebench.datasets import SubjectivityDataset +from llmebench.datasets import CT23SubjectivityDataset from llmebench.models import BLOOMPetalModel from llmebench.tasks import SubjectivityTask def config(): return { - "dataset": SubjectivityDataset, + "dataset": CT23SubjectivityDataset, "dataset_args": {}, "task": SubjectivityTask, "task_args": {}, diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py index ac41046e..44a7d197 100644 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py @@ -1,13 +1,13 @@ import os -from llmebench.datasets import SubjectivityDataset +from llmebench.datasets import CT23SubjectivityDataset from llmebench.models import GPTModel, RandomGPTModel from llmebench.tasks import SubjectivityTask def config(): return { - "dataset": SubjectivityDataset, + "dataset": CT23SubjectivityDataset, "dataset_args": {}, "task": SubjectivityTask, "task_args": {}, diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py index 93859b9f..2acfaea5 100644 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py @@ -1,13 +1,13 @@ import os -from llmebench.datasets import SubjectivityDataset +from llmebench.datasets import CT23SubjectivityDataset from llmebench.models import GPTChatCompletionModel from llmebench.tasks import SubjectivityTask def config(): return { - "dataset": SubjectivityDataset, + "dataset": CT23SubjectivityDataset, "dataset_args": {}, "task": SubjectivityTask, "task_args": {}, diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py index 95c46f73..23454f6c 100644 --- a/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py @@ -2,7 +2,7 @@ import random import re -from llmebench.datasets import SubjectivityDataset +from llmebench.datasets import CT23SubjectivityDataset from llmebench.models import GPTChatCompletionModel from llmebench.tasks import SubjectivityTask @@ -12,7 +12,7 @@ def config(): return { - "dataset": SubjectivityDataset, + "dataset": CT23SubjectivityDataset, "dataset_args": {}, "task": SubjectivityTask, "task_args": {}, diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index f49e669f..5d5c070c 100644 --- a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index 0c464fb3..bdac0ca6 100644 --- a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index aea9d3f9..dce30585 100644 --- a/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index 904c14e3..3c8d7fba 100644 --- a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index 624f3318..117b160c 100644 --- a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index 659e6faa..a01b0369 100644 --- a/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index f3b2dd9c..3f3f00e4 100644 --- a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index 7f49bd7b..c4f64cf9 100644 --- a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index ae106b5c..80f68cf8 100644 --- a/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index 711ce596..d5c931ed 100644 --- a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index 8359def4..75bad866 100644 --- a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index 8e939b86..eb978d48 100644 --- a/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index b8cf551e..33ade972 100644 --- a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index de992641..5d6acf12 100644 --- a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index 1d7886ea..4f2371ec 100644 --- a/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py index 2687affd..91c23317 100644 --- a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py +++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import BLOOMPetalModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": BLOOMPetalModel, "model_args": { diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py index 5b6551b8..4a89abb4 100644 --- a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py +++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py @@ -3,7 +3,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -12,7 +12,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py index 9633b020..563ac5a6 100644 --- a/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py +++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py @@ -4,7 +4,7 @@ from llmebench.datasets import SemEval23T3PropagandaDataset from llmebench.models import GPTChatCompletionModel -from llmebench.tasks import SemEval23T3PropagandaTask +from llmebench.tasks import MultilabelPropagandaTask def config(): @@ -13,7 +13,7 @@ def config(): "dataset_args": { "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt" }, - "task": SemEval23T3PropagandaTask, + "task": MultilabelPropagandaTask, "task_args": {}, "model": GPTChatCompletionModel, "model_args": { diff --git a/llmebench/datasets/COVID19Factuality.py b/llmebench/datasets/COVID19Factuality.py index cd6227c5..a3e48a61 100644 --- a/llmebench/datasets/COVID19Factuality.py +++ b/llmebench/datasets/COVID19Factuality.py @@ -3,9 +3,9 @@ from llmebench.datasets.dataset_base import DatasetBase -class FactualityCOVID19Dataset(DatasetBase): +class COVID19FactualityDataset(DatasetBase): def __init__(self, **kwargs): - super(FactualityCOVID19Dataset, self).__init__(**kwargs) + super(COVID19FactualityDataset, self).__init__(**kwargs) def get_data_sample(self): return {"input": "some tweet", "label": "no"} diff --git a/llmebench/datasets/CT22Attentionworthy.py b/llmebench/datasets/CT22Attentionworthy.py index 90123c31..8119b977 100644 --- a/llmebench/datasets/CT22Attentionworthy.py +++ b/llmebench/datasets/CT22Attentionworthy.py @@ -3,9 +3,9 @@ from llmebench.datasets.dataset_base import DatasetBase -class AttentionworthyDataset(DatasetBase): +class CT22AttentionworthyDataset(DatasetBase): def __init__(self, **kwargs): - super(AttentionworthyDataset, self).__init__(**kwargs) + super(CT22AttentionworthyDataset, self).__init__(**kwargs) def get_data_sample(self): return {"input": "some tweet", "label": "no_not_interesting"} diff --git a/llmebench/datasets/CT22Checkworthiness.py b/llmebench/datasets/CT22Checkworthiness.py index d257b219..b7d9806c 100644 --- a/llmebench/datasets/CT22Checkworthiness.py +++ b/llmebench/datasets/CT22Checkworthiness.py @@ -3,21 +3,21 @@ from llmebench.datasets.dataset_base import DatasetBase -class CheckworthinessDataset(DatasetBase): +class CT22CheckworthinessDataset(DatasetBase): def __init__(self, **kwargs): - super(CheckworthinessDataset, self).__init__(**kwargs) + super(CT22CheckworthinessDataset, self).__init__(**kwargs) def get_data_sample(self): return { "input": "some tweet", - "label": "checkworthy", + "label": "1", "input_id": 0, "line_number": 0, } def metadata(): return { - "language": ["ar", "bg", "nl", "en", "tr"], + "language": ["ar", "bg", "nl", "en", "es", "tr"], "citation": """@inproceedings{nakov2022overview, title={Overview of the clef--2022 checkthat! lab on fighting the covid-19 infodemic and fake news detection}, author={Nakov, Preslav and Barr{\\'o}n-Cede{\\~n}o, Alberto and da San Martino, Giovanni and Alam, Firoj and Stru{\\ss}, Julia Maria and Mandl, Thomas and M{\\'\\i}guez, Rub{\\'e}n and Caselli, Tommaso and Kutlu, Mucahid and Zaghouani, Wajdi and others}, diff --git a/llmebench/datasets/CT22Harmful.py b/llmebench/datasets/CT22Harmful.py index a711c1f3..c19049e5 100644 --- a/llmebench/datasets/CT22Harmful.py +++ b/llmebench/datasets/CT22Harmful.py @@ -1,15 +1,13 @@ -import pandas as pd - from llmebench.datasets.dataset_base import DatasetBase -class CovidHarmfulDataset(DatasetBase): +class CT22HarmfulDataset(DatasetBase): def __init__(self, **kwargs): - super(CovidHarmfulDataset, self).__init__(**kwargs) + super(CT22HarmfulDataset, self).__init__(**kwargs) def metadata(): return { - "language": "ar", + "language": ["ar", "bg", "nl", "en", "tr"], "citation": """@inproceedings{nakov2022overview, title={Overview of the CLEF-2022 CheckThat! lab task 1 on identifying relevant claims in tweets}, author={Nakov, Preslav and Barr{\\'o}n-Cede{\\~n}o, Alberto and Da San Martino, Giovanni and Alam, Firoj and Kutlu, Mucahid and Zaghouani, Wajdi and Li, Chengkai and Shaar, Shaden and Mubarak, Hamdy and Nikolov, Alex}, diff --git a/llmebench/datasets/CT23Subjectivity.py b/llmebench/datasets/CT23Subjectivity.py index 64299c04..b1ba7fe4 100644 --- a/llmebench/datasets/CT23Subjectivity.py +++ b/llmebench/datasets/CT23Subjectivity.py @@ -3,9 +3,9 @@ from llmebench.datasets.dataset_base import DatasetBase -class SubjectivityDataset(DatasetBase): +class CT23SubjectivityDataset(DatasetBase): def __init__(self, **kwargs): - super(SubjectivityDataset, self).__init__(**kwargs) + super(CT23SubjectivityDataset, self).__init__(**kwargs) def get_data_sample(self): return {"input": "some tweet", "label": "SUBJ"} diff --git a/llmebench/datasets/SemEval23T3Propaganda.py b/llmebench/datasets/SemEval23T3Propaganda.py index b0e13173..4d9d33cd 100644 --- a/llmebench/datasets/SemEval23T3Propaganda.py +++ b/llmebench/datasets/SemEval23T3Propaganda.py @@ -4,11 +4,11 @@ from llmebench.datasets.dataset_base import DatasetBase -class SemEval23T3Dataset(DatasetBase): +class SemEval23T3PropagandaDataset(DatasetBase): def __init__(self, techniques_path=None, **kwargs): # Get the path to the file listing the target techniques self.techniques_path = Path(techniques_path) if techniques_path else None - super(SemEval23T3Dataset, self).__init__(**kwargs) + super(SemEval23T3PropagandaDataset, self).__init__(**kwargs) def metadata(): return { diff --git a/llmebench/datasets/__init__.py b/llmebench/datasets/__init__.py index 721f1f72..ee10d036 100644 --- a/llmebench/datasets/__init__.py +++ b/llmebench/datasets/__init__.py @@ -11,16 +11,16 @@ from .ARCD import ARCDDataset from .ArSarcasm import ArSarcasmDataset from .ArSASSentiment import ArSASSentimentDataset -from .Attentionworthy import AttentionworthyDataset +from .CT22Attentionworthy import CT22AttentionworthyDataset from .BanglaSentiment import BanglaSentimentDataset -from .Checkworthiness import CheckworthinessDataset -from .Claim import CovidClaimDataset +from .CT22Checkworthiness import CT22CheckworthinessDataset +from .CT22Claim import CT22ClaimDataset from .DialectADI import DialectADIDataset from .Emotion import EmotionDataset -from .FactualityCOVID19 import FactualityCOVID19Dataset +from .COVID19Factuality import COVID19FactualityDataset from .FactualityKhouja20 import FactualityKhouja20Dataset from .FactualityUnifiedFC import FactualityUnifiedFCDataset -from .Harmful import CovidHarmfulDataset +from .CT22Harmful import CT22HarmfulDataset from .HateSpeech import HateSpeechDataset from .Lemmatization import LemmatizationDataset from .Location import LocationDataset @@ -32,8 +32,8 @@ from .NewsCatAlKhaleej import NewsCatAlKhaleejDataset from .NewsCatASND import NewsCatASNDDataset from .Offensive import OffensiveDataset -from .Propaganda import PropagandaTweetDataset -from .PropagandaSemEval23 import PropagandaSemEval23Dataset +from .WANLP22T3Propaganda import WANLP22T3PropagandaDataset +from .SemEval23T3Propaganda import SemEval23T3PropagandaDataset from .QADI import QADIDataset from .Spam import SpamDataset from .StanceKhouja20 import StanceKhouja20Dataset @@ -41,7 +41,7 @@ from .STSArSemEval17Track1 import STSArSemEval17Track1Dataset from .STSArSemEval17Track2 import STSArSemEval17Track2Dataset from .STSQ2Q import Q2QSimDataset -from .Subjectivity import SubjectivityDataset +from .CT23Subjectivity import CT23SubjectivityDataset from .TyDiQA import TyDiQADataset from .XNLI import XNLIDataset from .XQuAD import XQuADDataset diff --git a/llmebench/tasks/MultilabelPropaganda.py b/llmebench/tasks/MultilabelPropaganda.py index e0a33eed..fbabd1d8 100644 --- a/llmebench/tasks/MultilabelPropaganda.py +++ b/llmebench/tasks/MultilabelPropaganda.py @@ -4,18 +4,27 @@ from llmebench.tasks.task_base import TaskBase -class SemEval23T3PropagandaTask(TaskBase): +class MultilabelPropagandaTask(TaskBase): def __init__(self, **kwargs): - super(SemEval23T3PropagandaTask, self).__init__(**kwargs) + super(MultilabelPropagandaTask, self).__init__(**kwargs) def evaluate(self, true_labels, predicted_labels): - # Handle cases when model fails! - # Flatten true labels as it is a list of lists - predicted_labels = [p if p else ["no_technique"] for p in predicted_labels] - # Need the pre-defined list of techniques techniques = self.dataset.get_predefined_techniques() + # To generalize task to multiple datasets, since we use "no technique" as the random label next + no_technique_label = "no_technique" + for tch in techniques: + if "technique" in tch: + no_technique_label = tch + break + + + # Handle cases when model fails! + # Flatten true labels as it is a list of lists + # use no_technique_label as the random label + predicted_labels = [p if p else no_technique_label for p in predicted_labels] + # Binarize labels and use them for multi-label evaluation mlb = preprocessing.MultiLabelBinarizer(classes=techniques) mlb.fit([techniques]) diff --git a/llmebench/tasks/Subjectivity.py b/llmebench/tasks/Subjectivity.py index e88e3e63..96272969 100644 --- a/llmebench/tasks/Subjectivity.py +++ b/llmebench/tasks/Subjectivity.py @@ -9,9 +9,9 @@ from llmebench.tasks.task_base import TaskBase -class CT23SubjectivityTask(TaskBase): +class SubjectivityTask(TaskBase): def __init__(self, **kwargs): - super(CT23SubjectivityTask, self).__init__(**kwargs) + super(SubjectivityTask, self).__init__(**kwargs) def evaluate(self, gold_labels, pred_labels): pred_labels = [ diff --git a/llmebench/tasks/__init__.py b/llmebench/tasks/__init__.py index f5255d83..cf476232 100644 --- a/llmebench/tasks/__init__.py +++ b/llmebench/tasks/__init__.py @@ -24,8 +24,7 @@ from .NewsCatAlKhaleej import NewsCatAlKhaleejTask from .NewsCatASND import NewsCatASNDTask from .Offensive import OffensiveTask -from .PropagandaMultilabel import PropagandaMultilabelTask -from .PropagandaMultilabelSemEval23 import PropagandaMultilabelSemEval23Task +from .MultilabelPropaganda import MultilabelPropagandaTask from .Q2QSimDetect import Q2QSimDetectionTask from .QA import QATask from .Sarcasm import SarcasmTask