From 80d869234df768679e211b9b37c0da96fbe8bfab Mon Sep 17 00:00:00 2001
From: maramhasanain <maramhasanain@gmail.com>
Date: Mon, 4 Sep 2023 17:23:57 +0300
Subject: [PATCH] renamed datasets and assets

---
 ...=> CT22Attentionworthy_BLOOMZ_ZeroShot.py} |  4 +--
 ... => CT22Attentionworthy_GPT35_ZeroShot.py} |  4 +--
 ...py => CT22Attentionworthy_GPT4_FewShot.py} |  4 +--
 ...y => CT22Attentionworthy_GPT4_ZeroShot.py} |  4 +--
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ... => CT22Checkworthiness_GPT35_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ...roShot.py => CT22Claim_BLOOMZ_ZeroShot.py} |  4 +--
 ...eroShot.py => CT22Claim_GPT35_ZeroShot.py} |  4 +--
 ...4_FewShot.py => CT22Claim_GPT4_FewShot.py} |  4 +--
 ...ZeroShot.py => CT22Claim_GPT4_ZeroShot.py} |  4 +--
 ...y => COVID19Factuality_BLOOMZ_ZeroShot.py} |  4 +--
 ...t.py => COVID19Factuality_GPT4_FewShot.py} |  4 +--
 ....py => COVID19Factuality_GPT4_ZeroShot.py} |  4 +--
 ...Shot.py => CT22Harmful_BLOOMZ_ZeroShot.py} |  4 +--
 ...oShot.py => CT22Harmful_GPT35_ZeroShot.py} |  4 +--
 ...FewShot.py => CT22Harmful_GPT4_FewShot.py} |  4 +--
 ...roShot.py => CT22Harmful_GPT4_ZeroShot.py} |  4 +--
 ...roShot.py => WANLP22T3_BLOOMZ_ZeroShot.py} |  4 +--
 ...eroShot.py => WANLP22T3_GPT35_ZeroShot.py} |  4 +--
 ...4_FewShot.py => WANLP22T3_GPT4_FewShot.py} |  4 +--
 ...ZeroShot.py => WANLP22T3_GPT4_ZeroShot.py} |  4 +--
 ...py => CT23Subjectivity_BLOOMZ_ZeroShot.py} |  0
 ....py => CT23Subjectivity_GPT35_ZeroShot.py} |  0
 ...ot.py => CT23Subjectivity_GPT4_FewShot.py} |  0
 ...t.py => CT23Subjectivity_GPT4_ZeroShot.py} |  0
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ... SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} |  8 ++---
 ... => SemEval23T3Propaganda_GPT4_FewShot.py} |  8 ++---
 ...=> SemEval23T3Propaganda_GPT4_ZeroShot.py} |  8 ++---
 ...=> CT22Checkworthiness_BLOOMZ_ZeroShot.py} |  4 +--
 ...py => CT22Checkworthiness_GPT4_FewShot.py} |  4 +--
 ...y => CT22Checkworthiness_GPT4_ZeroShot.py} |  4 +--
 ...tualityCOVID19.py => COVID19Factuality.py} |  0
 ...entionworthy.py => CT22Attentionworthy.py} |  2 +-
 ...ckworthiness.py => CT22Checkworthiness.py} |  2 +-
 llmebench/datasets/{Claim.py => CT22Claim.py} |  8 ++---
 .../datasets/{Harmful.py => CT22Harmful.py}   |  0
 .../{Subjectivity.py => CT23Subjectivity.py}  |  0
 ...aSemEval23.py => SemEval23T3Propaganda.py} |  7 ++---
 .../{Propaganda.py => WANLP22T3Propaganda.py} | 13 ++++++---
 ...elSemEval23.py => MultilabelPropaganda.py} |  6 ++--
 llmebench/tasks/PropagandaMultilabel.py       | 29 -------------------
 llmebench/tasks/Subjectivity.py               |  4 +--
 71 files changed, 169 insertions(+), 198 deletions(-)
 rename assets/ar/factuality_disinformation_harmful_content/attentionworthy/{Attentionworthy_BLOOMZ_ZeroShot.py => CT22Attentionworthy_BLOOMZ_ZeroShot.py} (93%)
 rename assets/ar/factuality_disinformation_harmful_content/attentionworthy/{Attentionworthy_GPT35_ZeroShot.py => CT22Attentionworthy_GPT35_ZeroShot.py} (95%)
 rename assets/ar/factuality_disinformation_harmful_content/attentionworthy/{Attentionworthy_GPT4_FewShot.py => CT22Attentionworthy_GPT4_FewShot.py} (97%)
 rename assets/ar/factuality_disinformation_harmful_content/attentionworthy/{Attentionworthy_GPT4_ZeroShot.py => CT22Attentionworthy_GPT4_ZeroShot.py} (95%)
 rename assets/ar/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (92%)
 rename assets/ar/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT35_ZeroShot.py => CT22Checkworthiness_GPT35_ZeroShot.py} (93%)
 rename assets/ar/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/ar/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename assets/ar/factuality_disinformation_harmful_content/claim_detection/{ClaimDetectCOVID19_BLOOMZ_ZeroShot.py => CT22Claim_BLOOMZ_ZeroShot.py} (93%)
 rename assets/ar/factuality_disinformation_harmful_content/claim_detection/{ClaimDetectCOVID19_GPT35_ZeroShot.py => CT22Claim_GPT35_ZeroShot.py} (94%)
 rename assets/ar/factuality_disinformation_harmful_content/claim_detection/{ClaimDetectCOVID19_GPT4_FewShot.py => CT22Claim_GPT4_FewShot.py} (96%)
 rename assets/ar/factuality_disinformation_harmful_content/claim_detection/{ClaimDetectCOVID19_GPT4_ZeroShot.py => CT22Claim_GPT4_ZeroShot.py} (94%)
 rename assets/ar/factuality_disinformation_harmful_content/factuality/{FactualityCOVID19_BLOOMZ_ZeroShot.py => COVID19Factuality_BLOOMZ_ZeroShot.py} (93%)
 rename assets/ar/factuality_disinformation_harmful_content/factuality/{FactualityCOVID19_GPT4_FewShot.py => COVID19Factuality_GPT4_FewShot.py} (96%)
 rename assets/ar/factuality_disinformation_harmful_content/factuality/{FactualityCOVID19_GPT4_ZeroShot.py => COVID19Factuality_GPT4_ZeroShot.py} (94%)
 rename assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/{HarmfulDetectCOVID19_BLOOMZ_ZeroShot.py => CT22Harmful_BLOOMZ_ZeroShot.py} (94%)
 rename assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/{HarmfulDetectCOVID19_GPT35_ZeroShot.py => CT22Harmful_GPT35_ZeroShot.py} (94%)
 rename assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/{HarmfulDetectCOVID19_GPT4_FewShot.py => CT22Harmful_GPT4_FewShot.py} (96%)
 rename assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/{HarmfulDetectCOVID19_GPT4_ZeroShot.py => CT22Harmful_GPT4_ZeroShot.py} (96%)
 rename assets/ar/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => WANLP22T3_BLOOMZ_ZeroShot.py} (98%)
 rename assets/ar/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT35_ZeroShot.py => WANLP22T3_GPT35_ZeroShot.py} (97%)
 rename assets/ar/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => WANLP22T3_GPT4_FewShot.py} (97%)
 rename assets/ar/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => WANLP22T3_GPT4_ZeroShot.py} (98%)
 rename assets/ar/factuality_disinformation_harmful_content/subjectivity/{Subjectivity_BLOOMZ_ZeroShot.py => CT23Subjectivity_BLOOMZ_ZeroShot.py} (100%)
 rename assets/ar/factuality_disinformation_harmful_content/subjectivity/{Subjectivity_GPT35_ZeroShot.py => CT23Subjectivity_GPT35_ZeroShot.py} (100%)
 rename assets/ar/factuality_disinformation_harmful_content/subjectivity/{Subjectivity_GPT4_FewShot.py => CT23Subjectivity_GPT4_FewShot.py} (100%)
 rename assets/ar/factuality_disinformation_harmful_content/subjectivity/{Subjectivity_GPT4_ZeroShot.py => CT23Subjectivity_GPT4_ZeroShot.py} (100%)
 rename assets/bg/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (92%)
 rename assets/bg/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/bg/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename assets/de/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/de/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/de/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/en/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (92%)
 rename assets/en/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/en/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename assets/en/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/en/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/en/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/es/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (91%)
 rename assets/es/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/es/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename assets/fr/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/fr/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/fr/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/it/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/it/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/it/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/nl/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (92%)
 rename assets/nl/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/nl/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename assets/pl/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/pl/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/pl/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/ru/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_BLOOMZ_ZeroShot.py => SemEval23T3Propaganda_BLOOMZ_ZeroShot.py} (96%)
 rename assets/ru/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_FewShot.py => SemEval23T3Propaganda_GPT4_FewShot.py} (97%)
 rename assets/ru/factuality_disinformation_harmful_content/propaganda/{PropMultilabel_GPT4_ZeroShot.py => SemEval23T3Propaganda_GPT4_ZeroShot.py} (97%)
 rename assets/tr/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_BLOOMZ_ZeroShot.py => CT22Checkworthiness_BLOOMZ_ZeroShot.py} (91%)
 rename assets/tr/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_FewShot.py => CT22Checkworthiness_GPT4_FewShot.py} (96%)
 rename assets/tr/factuality_disinformation_harmful_content/checkworthyness/{Checkworthiness_GPT4_ZeroShot.py => CT22Checkworthiness_GPT4_ZeroShot.py} (95%)
 rename llmebench/datasets/{FactualityCOVID19.py => COVID19Factuality.py} (100%)
 rename llmebench/datasets/{Attentionworthy.py => CT22Attentionworthy.py} (96%)
 rename llmebench/datasets/{Checkworthiness.py => CT22Checkworthiness.py} (97%)
 rename llmebench/datasets/{Claim.py => CT22Claim.py} (90%)
 rename llmebench/datasets/{Harmful.py => CT22Harmful.py} (100%)
 rename llmebench/datasets/{Subjectivity.py => CT23Subjectivity.py} (100%)
 rename llmebench/datasets/{PropagandaSemEval23.py => SemEval23T3Propaganda.py} (95%)
 rename llmebench/datasets/{Propaganda.py => WANLP22T3Propaganda.py} (76%)
 rename llmebench/tasks/{PropagandaMultilabelSemEval23.py => MultilabelPropaganda.py} (86%)
 delete mode 100644 llmebench/tasks/PropagandaMultilabel.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py
similarity index 93%
rename from assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py
index 37f15dde..de9d63a1 100644
--- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import AttentionworthyDataset
+from llmebench.datasets import CT22AttentionworthyDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import AttentionworthyTask
 
 
 def config():
     return {
-        "dataset": AttentionworthyDataset,
+        "dataset": CT22AttentionworthyDataset,
         "dataset_args": {},
         "task": AttentionworthyTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py
similarity index 95%
rename from assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py
index 23d7f66c..1e13fbc5 100644
--- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT35_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import AttentionworthyDataset
+from llmebench.datasets import CT22AttentionworthyDataset
 from llmebench.models import GPTModel, RandomGPTModel
 from llmebench.tasks import AttentionworthyTask
 
 
 def config():
     return {
-        "dataset": AttentionworthyDataset,
+        "dataset": CT22AttentionworthyDataset,
         "dataset_args": {},
         "task": AttentionworthyTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py
index 92c7a32b..dc9355fd 100644
--- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_FewShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import AttentionworthyDataset
+from llmebench.datasets import CT22AttentionworthyDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import AttentionworthyTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": AttentionworthyDataset,
+        "dataset": CT22AttentionworthyDataset,
         "dataset_args": {},
         "task": AttentionworthyTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py
similarity index 95%
rename from assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py
index 69da7868..82f465f7 100644
--- a/assets/ar/factuality_disinformation_harmful_content/attentionworthy/Attentionworthy_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/attentionworthy/CT22Attentionworthy_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import AttentionworthyDataset
+from llmebench.datasets import CT22AttentionworthyDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import AttentionworthyTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": AttentionworthyDataset,
+        "dataset": CT22AttentionworthyDataset,
         "dataset_args": {},
         "task": AttentionworthyTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 92%
rename from assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index 7d7c512f..4b852302 100644
--- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py
similarity index 93%
rename from assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py
index 5e60e8a5..ec0d9a82 100644
--- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT35_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTModel, RandomGPTModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index 444fb4de..85c628f8 100644
--- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index d90cb0ce..60f3b7d5 100644
--- a/assets/ar/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py
similarity index 93%
rename from assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py
index 63be543d..4e171616 100644
--- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidClaimDataset
+from llmebench.datasets import CT22ClaimDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import ClaimDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidClaimDataset,
+        "dataset": CT22ClaimDataset,
         "dataset_args": {},
         "task": ClaimDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py
similarity index 94%
rename from assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py
index f8e2ba13..64a027ea 100644
--- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT35_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidClaimDataset
+from llmebench.datasets import CT22ClaimDataset
 from llmebench.models import GPTModel
 from llmebench.tasks import ClaimDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidClaimDataset,
+        "dataset": CT22ClaimDataset,
         "dataset_args": {},
         "task": ClaimDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py
index 8ed99f9a..6059c977 100644
--- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidClaimDataset
+from llmebench.datasets import CT22ClaimDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import ClaimDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidClaimDataset,
+        "dataset": CT22ClaimDataset,
         "dataset_args": {},
         "task": ClaimDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py
similarity index 94%
rename from assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py
index 1e9c3c73..e2112584 100644
--- a/assets/ar/factuality_disinformation_harmful_content/claim_detection/ClaimDetectCOVID19_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
similarity index 93%
rename from assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
index fa4dc0dd..ce18ac41 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import FactualityCOVID19Dataset
+from llmebench.datasets import COVID19FactualityDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import FactualityCOVID19Task
 
 
 def config():
     return {
-        "dataset": FactualityCOVID19Dataset,
+        "dataset": COVID19FactualityDataset,
         "dataset_args": {},
         "task": FactualityCOVID19Task,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
index e165251a..1d835f46 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_FewShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import FactualityCOVID19Dataset
+from llmebench.datasets import COVID19FactualityDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import FactualityCOVID19Task
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": FactualityCOVID19Dataset,
+        "dataset": COVID19FactualityDataset,
         "dataset_args": {},
         "task": FactualityCOVID19Task,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
similarity index 94%
rename from assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
index 6b74eb7e..94d2f1fd 100644
--- a/assets/ar/factuality_disinformation_harmful_content/factuality/FactualityCOVID19_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/factuality/COVID19Factuality_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import FactualityCOVID19Dataset
+from llmebench.datasets import COVID19FactualityDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import FactualityCOVID19Task
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": FactualityCOVID19Dataset,
+        "dataset": COVID19FactualityDataset,
         "dataset_args": {},
         "task": FactualityCOVID19Task,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py
similarity index 94%
rename from assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py
index 20cb07f7..ac1040ae 100644
--- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidHarmfulDataset
+from llmebench.datasets import CT22HarmfulDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import HarmfulDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidHarmfulDataset,
+        "dataset": CT22HarmfulDataset,
         "dataset_args": {},
         "task": HarmfulDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py
similarity index 94%
rename from assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py
index 8db951ae..b2f6696f 100644
--- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT35_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidHarmfulDataset
+from llmebench.datasets import CT22HarmfulDataset
 from llmebench.models import GPTModel
 from llmebench.tasks import HarmfulDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidHarmfulDataset,
+        "dataset": CT22HarmfulDataset,
         "dataset_args": {},
         "task": HarmfulDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py
index fe497189..92cdce83 100644
--- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CovidHarmfulDataset
+from llmebench.datasets import CT22HarmfulDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import HarmfulDetectionTask
 
 
 def config():
     return {
-        "dataset": CovidHarmfulDataset,
+        "dataset": CT22HarmfulDataset,
         "dataset_args": {},
         "task": HarmfulDetectionTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py
similarity index 96%
rename from assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py
index 8b0813b0..3779a9d6 100644
--- a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/HarmfulDetectCOVID19_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py
similarity index 98%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py
index 0247eaa6..fd36a6a6 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_BLOOMZ_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import PropagandaTweetDataset
+from llmebench.datasets import WANLP22PropagandaDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import PropagandaMultilabelTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": PropagandaTweetDataset,
+        "dataset": WANLP22PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt"
         },
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py
index b9ad7f81..873833b7 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT35_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT35_ZeroShot.py
@@ -2,14 +2,14 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaTweetDataset
+from llmebench.datasets import WANLP22PropagandaDataset
 from llmebench.models import GPTModel
 from llmebench.tasks import PropagandaMultilabelTask
 
 
 def config():
     return {
-        "dataset": PropagandaTweetDataset,
+        "dataset": WANLP22PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt"
         },
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py
similarity index 97%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py
index 0b1f913b..e43084d8 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import PropagandaTweetDataset
+from llmebench.datasets import WANLP22PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import PropagandaMultilabelTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": PropagandaTweetDataset,
+        "dataset": WANLP22PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt"
         },
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py
similarity index 98%
rename from assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py
index 271385fe..d5e1a6cf 100644
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot.py
@@ -2,7 +2,7 @@
 import random
 import re
 
-from llmebench.datasets import PropagandaTweetDataset
+from llmebench.datasets import WANLP22T3Dataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import PropagandaMultilabelTask
 
@@ -12,7 +12,7 @@
 
 def config():
     return {
-        "dataset": PropagandaTweetDataset,
+        "dataset": WANLP22T3Dataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda/classes.txt"
         },
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_BLOOMZ_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py
similarity index 100%
rename from assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_BLOOMZ_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_BLOOMZ_ZeroShot.py
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT35_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py
similarity index 100%
rename from assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT35_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT35_ZeroShot.py
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py
similarity index 100%
rename from assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT4_FewShot.py
rename to assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_FewShot.py
diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py
similarity index 100%
rename from assets/ar/factuality_disinformation_harmful_content/subjectivity/Subjectivity_GPT4_ZeroShot.py
rename to assets/ar/factuality_disinformation_harmful_content/subjectivity/CT23Subjectivity_GPT4_ZeroShot.py
diff --git a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 92%
rename from assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index 10c2a3bf..ff303850 100644
--- a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index 495d05d2..37cfbea5 100644
--- a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index 2d118f14..408435e3 100644
--- a/assets/bg/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/bg/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index 7f38863d..f49e669f 100644
--- a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index 70523f8e..0c464fb3 100644
--- a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index e7836e14..aea9d3f9 100644
--- a/assets/de/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/de/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 92%
rename from assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index 5bf0e294..296280ce 100644
--- a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index d0b67175..bf4b0537 100644
--- a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index 8cb18b32..fa4d5af5 100644
--- a/assets/en/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index 6fa517a7..904c14e3 100644
--- a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index e7f33f39..624f3318 100644
--- a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index 590ca1ab..659e6faa 100644
--- a/assets/en/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/en/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 91%
rename from assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index d8c1a989..977f9864 100644
--- a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index e6099674..0f5c195f 100644
--- a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index 3ed072e6..bb7c65fd 100644
--- a/assets/es/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/es/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index cc58c362..f3b2dd9c 100644
--- a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index 2c642c7f..7f49bd7b 100644
--- a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index 6c7c4dc7..ae106b5c 100644
--- a/assets/fr/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/fr/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index 9103c03c..711ce596 100644
--- a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index 2b782ca3..8359def4 100644
--- a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index 511ca715..8e939b86 100644
--- a/assets/it/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/it/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 92%
rename from assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index 06793d52..c373d29d 100644
--- a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index 7f241509..a7677af1 100644
--- a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index f52b350c..4caf1405 100644
--- a/assets/nl/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/nl/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index 4fca8321..b8cf551e 100644
--- a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index d9447aaa..de992641 100644
--- a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index cc0a834b..1d7886ea 100644
--- a/assets/pl/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/pl/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
similarity index 96%
rename from assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
rename to assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
index 9b3114e6..2687affd 100644
--- a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_BLOOMZ_ZeroShot.py
+++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_BLOOMZ_ZeroShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import BLOOMPetalModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": BLOOMPetalModel,
         "model_args": {
diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
similarity index 97%
rename from assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
rename to assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
index d4abcaac..5b6551b8 100644
--- a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_FewShot.py
+++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_FewShot.py
@@ -1,18 +1,18 @@
 import os
 import re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
similarity index 97%
rename from assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
rename to assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
index e2557173..9633b020 100644
--- a/assets/ru/factuality_disinformation_harmful_content/propaganda/PropMultilabel_GPT4_ZeroShot.py
+++ b/assets/ru/factuality_disinformation_harmful_content/propaganda/SemEval23T3Propaganda_GPT4_ZeroShot.py
@@ -2,18 +2,18 @@
 
 import regex as re
 
-from llmebench.datasets import PropagandaSemEval23Dataset
+from llmebench.datasets import SemEval23T3PropagandaDataset
 from llmebench.models import GPTChatCompletionModel
-from llmebench.tasks import PropagandaMultilabelSemEval23Task
+from llmebench.tasks import SemEval23T3PropagandaTask
 
 
 def config():
     return {
-        "dataset": PropagandaSemEval23Dataset,
+        "dataset": SemEval23T3PropagandaDataset,
         "dataset_args": {
             "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
         },
-        "task": PropagandaMultilabelSemEval23Task,
+        "task": SemEval23T3PropagandaTask,
         "task_args": {},
         "model": GPTChatCompletionModel,
         "model_args": {
diff --git a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
similarity index 91%
rename from assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
rename to assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
index 61338f04..a3c08771 100644
--- a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_BLOOMZ_ZeroShot.py
+++ b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_BLOOMZ_ZeroShot.py
@@ -1,13 +1,13 @@
 import os
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import BLOOMPetalModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
similarity index 96%
rename from assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
rename to assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
index c1ae1b57..c940592f 100644
--- a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_FewShot.py
+++ b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_FewShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
similarity index 95%
rename from assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
rename to assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
index 14ebc259..5041a952 100644
--- a/assets/tr/factuality_disinformation_harmful_content/checkworthyness/Checkworthiness_GPT4_ZeroShot.py
+++ b/assets/tr/factuality_disinformation_harmful_content/checkworthyness/CT22Checkworthiness_GPT4_ZeroShot.py
@@ -1,14 +1,14 @@
 import os
 import re
 
-from llmebench.datasets import CheckworthinessDataset
+from llmebench.datasets import CT22CheckworthinessDataset
 from llmebench.models import GPTChatCompletionModel
 from llmebench.tasks import CheckworthinessTask
 
 
 def config():
     return {
-        "dataset": CheckworthinessDataset,
+        "dataset": CT22CheckworthinessDataset,
         "dataset_args": {},
         "task": CheckworthinessTask,
         "task_args": {},
diff --git a/llmebench/datasets/FactualityCOVID19.py b/llmebench/datasets/COVID19Factuality.py
similarity index 100%
rename from llmebench/datasets/FactualityCOVID19.py
rename to llmebench/datasets/COVID19Factuality.py
diff --git a/llmebench/datasets/Attentionworthy.py b/llmebench/datasets/CT22Attentionworthy.py
similarity index 96%
rename from llmebench/datasets/Attentionworthy.py
rename to llmebench/datasets/CT22Attentionworthy.py
index f8978a5b..90123c31 100644
--- a/llmebench/datasets/Attentionworthy.py
+++ b/llmebench/datasets/CT22Attentionworthy.py
@@ -12,7 +12,7 @@ def get_data_sample(self):
 
     def metadata():
         return {
-            "language": "ar",
+            "language": ["ar", "bg", "nl", "en", "tr"],
             "citation": """@InProceedings{clef-checkthat:2022:task1,
                 author = {Nakov, Preslav and Barr\\'{o}n-Cede\\~{n}o, Alberto and Da San Martino, Giovanni and Alam, Firoj and M\\'{\\i}guez, Rub\'{e}n and Caselli, Tommaso and Kutlu, Mucahid and Zaghouani, Wajdi and Li, Chengkai and Shaar, Shaden and Mubarak, Hamdy and Nikolov, Alex and Kartal, Yavuz Selim and Beltr\\'{a}n, Javier},
                 title = "Overview of the {CLEF}-2022 {CheckThat}! Lab Task 1 on Identifying Relevant Claims in Tweets",
diff --git a/llmebench/datasets/Checkworthiness.py b/llmebench/datasets/CT22Checkworthiness.py
similarity index 97%
rename from llmebench/datasets/Checkworthiness.py
rename to llmebench/datasets/CT22Checkworthiness.py
index 9556d768..d257b219 100644
--- a/llmebench/datasets/Checkworthiness.py
+++ b/llmebench/datasets/CT22Checkworthiness.py
@@ -17,7 +17,7 @@ def get_data_sample(self):
 
     def metadata():
         return {
-            "language": "ar",
+            "language": ["ar", "bg", "nl", "en", "tr"],
             "citation": """@inproceedings{nakov2022overview,
                   title={Overview of the clef--2022 checkthat! lab on fighting the covid-19 infodemic and fake news detection},
                   author={Nakov, Preslav and Barr{\\'o}n-Cede{\\~n}o, Alberto and da San Martino, Giovanni and Alam, Firoj and Stru{\\ss}, Julia Maria and Mandl, Thomas and M{\\'\\i}guez, Rub{\\'e}n and Caselli, Tommaso and Kutlu, Mucahid and Zaghouani, Wajdi and others},
diff --git a/llmebench/datasets/Claim.py b/llmebench/datasets/CT22Claim.py
similarity index 90%
rename from llmebench/datasets/Claim.py
rename to llmebench/datasets/CT22Claim.py
index 923ad528..83e8faf2 100644
--- a/llmebench/datasets/Claim.py
+++ b/llmebench/datasets/CT22Claim.py
@@ -1,15 +1,13 @@
-import pandas as pd
-
 from llmebench.datasets.dataset_base import DatasetBase
 
 
-class CovidClaimDataset(DatasetBase):
+class CT22ClaimDataset(DatasetBase):
     def __init__(self, **kwargs):
-        super(CovidClaimDataset, self).__init__(**kwargs)
+        super(CT22ClaimDataset, self).__init__(**kwargs)
 
     def metadata():
         return {
-            "language": "ar",
+            "language": ["ar", "bg", "nl", "en", "tr"],
             "citation": """@inproceedings{nakov2022overview,
                 title={Overview of the CLEF-2022 CheckThat! lab task 1 on identifying relevant claims in tweets},
                 author={Nakov, Preslav and Barr{\\o}n-Cede{\\~n}o, Alberto and Da San Martino, Giovanni and Alam, Firoj and Kutlu, Mucahid and Zaghouani, Wajdi and Li, Chengkai and Shaar, Shaden and Mubarak, Hamdy and Nikolov, Alex},
diff --git a/llmebench/datasets/Harmful.py b/llmebench/datasets/CT22Harmful.py
similarity index 100%
rename from llmebench/datasets/Harmful.py
rename to llmebench/datasets/CT22Harmful.py
diff --git a/llmebench/datasets/Subjectivity.py b/llmebench/datasets/CT23Subjectivity.py
similarity index 100%
rename from llmebench/datasets/Subjectivity.py
rename to llmebench/datasets/CT23Subjectivity.py
diff --git a/llmebench/datasets/PropagandaSemEval23.py b/llmebench/datasets/SemEval23T3Propaganda.py
similarity index 95%
rename from llmebench/datasets/PropagandaSemEval23.py
rename to llmebench/datasets/SemEval23T3Propaganda.py
index cc130442..b0e13173 100644
--- a/llmebench/datasets/PropagandaSemEval23.py
+++ b/llmebench/datasets/SemEval23T3Propaganda.py
@@ -1,19 +1,18 @@
 import json
-import os
 from pathlib import Path
 
 from llmebench.datasets.dataset_base import DatasetBase
 
 
-class PropagandaSemEval23Dataset(DatasetBase):
+class SemEval23T3Dataset(DatasetBase):
     def __init__(self, techniques_path=None, **kwargs):
         # Get the path to the file listing the target techniques
         self.techniques_path = Path(techniques_path) if techniques_path else None
-        super(PropagandaSemEval23Dataset, self).__init__(**kwargs)
+        super(SemEval23T3Dataset, self).__init__(**kwargs)
 
     def metadata():
         return {
-            "language": "multilingual",
+            "language": ["en", "es", "fr", "ge", "gr", "it", "ka", "po", "ru"],
             "citation": """@inproceedings{piskorski-etal-2023-semeval,
                 title = "{S}em{E}val-2023 Task 3: Detecting the Category, the Framing, and the Persuasion Techniques in Online News in a Multi-lingual Setup",
                 author = "Piskorski, Jakub  and
diff --git a/llmebench/datasets/Propaganda.py b/llmebench/datasets/WANLP22T3Propaganda.py
similarity index 76%
rename from llmebench/datasets/Propaganda.py
rename to llmebench/datasets/WANLP22T3Propaganda.py
index e3e7074f..fc623ca8 100644
--- a/llmebench/datasets/Propaganda.py
+++ b/llmebench/datasets/WANLP22T3Propaganda.py
@@ -5,17 +5,22 @@
 from llmebench.datasets.dataset_base import DatasetBase
 
 
-class PropagandaTweetDataset(DatasetBase):
+class WANLP22T3PropagandaDataset(DatasetBase):
     def __init__(self, techniques_path=None, **kwargs):
         # Get the path to the file listing the target techniques
         self.techniques_path = Path(techniques_path) if techniques_path else None
-        super(PropagandaTweetDataset, self).__init__(**kwargs)
+        super(WANLP22T3PropagandaDataset, self).__init__(**kwargs)
 
     def metadata():
         return {
             "language": "ar",
-            "citation": """@article{wanlp2023,
-                year={2023}
+            "citation": """@inproceedings{alam2022overview,
+              title={Overview of the $\{$WANLP$\}$ 2022 Shared Task on Propaganda Detection in $\{$A$\}$ rabic},
+              author={Alam, Firoj and Mubarak, Hamdy and Zaghouani, Wajdi and Da San Martino, Giovanni and Nakov, Preslav and others},
+              booktitle={Proceedings of the The Seventh Arabic Natural Language Processing Workshop (WANLP)},
+              pages={108--118},
+              year={2022},
+              organization={Association for Computational Linguistics}
             }""",
         }
 
diff --git a/llmebench/tasks/PropagandaMultilabelSemEval23.py b/llmebench/tasks/MultilabelPropaganda.py
similarity index 86%
rename from llmebench/tasks/PropagandaMultilabelSemEval23.py
rename to llmebench/tasks/MultilabelPropaganda.py
index 5e9b6dcd..e0a33eed 100644
--- a/llmebench/tasks/PropagandaMultilabelSemEval23.py
+++ b/llmebench/tasks/MultilabelPropaganda.py
@@ -1,14 +1,12 @@
-import itertools
-
 from sklearn import preprocessing
 from sklearn.metrics import f1_score
 
 from llmebench.tasks.task_base import TaskBase
 
 
-class PropagandaMultilabelSemEval23Task(TaskBase):
+class SemEval23T3PropagandaTask(TaskBase):
     def __init__(self, **kwargs):
-        super(PropagandaMultilabelSemEval23Task, self).__init__(**kwargs)
+        super(SemEval23T3PropagandaTask, self).__init__(**kwargs)
 
     def evaluate(self, true_labels, predicted_labels):
         # Handle cases when model fails!
diff --git a/llmebench/tasks/PropagandaMultilabel.py b/llmebench/tasks/PropagandaMultilabel.py
deleted file mode 100644
index c661a2a1..00000000
--- a/llmebench/tasks/PropagandaMultilabel.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import itertools
-
-from sklearn import preprocessing
-from sklearn.metrics import f1_score
-
-from llmebench.tasks.task_base import TaskBase
-
-
-class PropagandaMultilabelTask(TaskBase):
-    def __init__(self, **kwargs):
-        super(PropagandaMultilabelTask, self).__init__(**kwargs)
-
-    def evaluate(self, true_labels, predicted_labels):
-        # Handle cases when model fails!
-        # Flatten true labels as it is a list of lists
-        predicted_labels = [p if p else ["no technique"] for p in predicted_labels]
-
-        # Need the pre-defined list of techniques
-        techniques = self.dataset.get_predefined_techniques()
-
-        # Binarize labels and use them for multi-label evaluation
-        mlb = preprocessing.MultiLabelBinarizer()
-        mlb.fit([techniques])
-        gold = mlb.transform(true_labels)
-        pred = mlb.transform(predicted_labels)
-
-        micro_f1 = f1_score(gold, pred, average="micro")
-
-        return {"Micro F1": micro_f1}
diff --git a/llmebench/tasks/Subjectivity.py b/llmebench/tasks/Subjectivity.py
index 96272969..e88e3e63 100644
--- a/llmebench/tasks/Subjectivity.py
+++ b/llmebench/tasks/Subjectivity.py
@@ -9,9 +9,9 @@
 from llmebench.tasks.task_base import TaskBase
 
 
-class SubjectivityTask(TaskBase):
+class CT23SubjectivityTask(TaskBase):
     def __init__(self, **kwargs):
-        super(SubjectivityTask, self).__init__(**kwargs)
+        super(CT23SubjectivityTask, self).__init__(**kwargs)
 
     def evaluate(self, gold_labels, pred_labels):
         pred_labels = [