Add Random model and assets for all tasks (#232)

This commit introduces a RandomModel that handles multiple tasks types and returns randomized results. This can be used to construct a useful baseline. The commit also adds Random baselines for almost all tasks except machine translation. * Fix incorrect task imports * Add Random Model * Minor fixes to existing datasets/assets * Add Random assets for all existing tasks
qcri · Sep 18, 2023 · 4464309 · 4464309
1 parent f4ec529
commit 4464309
Show file tree

Hide file tree

Showing 68 changed files with 2,342 additions and 19 deletions.
diff --git a/assets/ar/QA/ARCD_Random.py b/assets/ar/QA/ARCD_Random.py
@@ -0,0 +1,30 @@
+import random
+
+from llmebench.datasets import ARCDDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import QATask, TaskType
+
+
+def config():
+    return {
+        "dataset": ARCDDataset,
+        "dataset_args": {},
+        "task": QATask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {"task_type": TaskType.QuestionAnswering},
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    tokens = response["random_response"]["context"].split(" ")
+
+    start_idx = random.choice(range(len(tokens)))
+    answer_length = random.choice(range(len(tokens) - start_idx))
+
+    return " ".join(tokens[start_idx : start_idx + answer_length])
diff --git a/assets/ar/QA/MLQA_Random.py b/assets/ar/QA/MLQA_Random.py
@@ -0,0 +1,30 @@
+import random
+
+from llmebench.datasets import MLQADataset
+from llmebench.models import RandomModel
+from llmebench.tasks import QATask, TaskType
+
+
+def config():
+    return {
+        "dataset": MLQADataset,
+        "dataset_args": {},
+        "task": QATask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {"task_type": TaskType.QuestionAnswering},
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    tokens = response["random_response"]["context"].split(" ")
+
+    start_idx = random.choice(range(len(tokens)))
+    answer_length = random.choice(range(len(tokens) - start_idx))
+
+    return " ".join(tokens[start_idx : start_idx + answer_length])
diff --git a/assets/ar/QA/TyDiQA_Random.py b/assets/ar/QA/TyDiQA_Random.py
@@ -0,0 +1,30 @@
+import random
+
+from llmebench.datasets import TyDiQADataset
+from llmebench.models import RandomModel
+from llmebench.tasks import QATask, TaskType
+
+
+def config():
+    return {
+        "dataset": TyDiQADataset,
+        "dataset_args": {},
+        "task": QATask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {"task_type": TaskType.QuestionAnswering},
+        "general_args": {"test_split": "dev"},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    tokens = response["random_response"]["context"].split(" ")
+
+    start_idx = random.choice(range(len(tokens)))
+    answer_length = random.choice(range(len(tokens) - start_idx))
+
+    return " ".join(tokens[start_idx : start_idx + answer_length])
diff --git a/assets/ar/QA/XQuAD_Random.py b/assets/ar/QA/XQuAD_Random.py
@@ -0,0 +1,30 @@
+import random
+
+from llmebench.datasets import XQuADDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import QATask, TaskType
+
+
+def config():
+    return {
+        "dataset": XQuADDataset,
+        "dataset_args": {},
+        "task": QATask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {"task_type": TaskType.QuestionAnswering},
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    tokens = response["random_response"]["context"].split(" ")
+
+    start_idx = random.choice(range(len(tokens)))
+    answer_length = random.choice(range(len(tokens) - start_idx))
+
+    return " ".join(tokens[start_idx : start_idx + answer_length])
diff --git a/assets/ar/demographic_attributes/gender/ArabGend_Random.py b/assets/ar/demographic_attributes/gender/ArabGend_Random.py
@@ -0,0 +1,26 @@
+from llmebench.datasets import ArabGendDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import DemographyGenderTask, TaskType
+
+
+def config():
+    return {
+        "dataset": ArabGendDataset,
+        "dataset_args": {},
+        "task": DemographyGenderTask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {
+            "task_type": TaskType.Classification,
+            "class_labels": ["m", "f"],
+        },
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    return response["random_response"]
diff --git a/assets/ar/demographic_attributes/gender/ArapTweet_Random.py b/assets/ar/demographic_attributes/gender/ArapTweet_Random.py
@@ -0,0 +1,26 @@
+from llmebench.datasets import ArapTweetDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import DemographyGenderTask, TaskType
+
+
+def config():
+    return {
+        "dataset": ArapTweetDataset,
+        "dataset_args": {},
+        "task": DemographyGenderTask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {
+            "task_type": TaskType.Classification,
+            "class_labels": ["Female", "Male"],
+        },
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    return response["random_response"]
diff --git a/assets/ar/demographic_attributes/location/Location_Random.py b/assets/ar/demographic_attributes/location/Location_Random.py
@@ -0,0 +1,49 @@
+from llmebench.datasets import LocationDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import DemographyLocationTask, TaskType
+
+
+def config():
+    return {
+        "dataset": LocationDataset,
+        "dataset_args": {},
+        "task": DemographyLocationTask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {
+            "task_type": TaskType.Classification,
+            "class_labels": [
+                "ae",
+                "OTHERS",
+                "bh",
+                "dz",
+                "eg",
+                "iq",
+                "jo",
+                "kw",
+                "lb",
+                "ly",
+                "ma",
+                "om",
+                "ps",
+                "qa",
+                "sa",
+                "sd",
+                "so",
+                "sy",
+                "tn",
+                "UNK",
+                "ye",
+                "mr",
+            ],
+        },
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    return response["random_response"]
diff --git a/assets/ar/demographic_attributes/name_info/NameInfo_Random.py b/assets/ar/demographic_attributes/name_info/NameInfo_Random.py
@@ -0,0 +1,127 @@
+from llmebench.datasets import NameInfoDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import DemographyNameInfoTask, TaskType
+
+
+def config():
+    return {
+        "dataset": NameInfoDataset,
+        "dataset_args": {},
+        "task": DemographyNameInfoTask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {
+            "task_type": TaskType.Classification,
+            "class_labels": [
+                "gb",
+                "us",
+                "cl",
+                "fr",
+                "ru",
+                "pl",
+                "in",
+                "it",
+                "kr",
+                "gh",
+                "ca",
+                "sa",
+                "at",
+                "de",
+                "cn",
+                "br",
+                "dk",
+                "se",
+                "bd",
+                "cu",
+                "jp",
+                "be",
+                "es",
+                "co",
+                "id",
+                "iq",
+                "pk",
+                "tr",
+                "il",
+                "ch",
+                "ar",
+                "ro",
+                "nl",
+                "ps",
+                "ug",
+                "ir",
+                "cg",
+                "do",
+                "ee",
+                "tn",
+                "gr",
+                "np",
+                "ie",
+                "sy",
+                "hu",
+                "eg",
+                "ma",
+                "ve",
+                "ph",
+                "no",
+                "bg",
+                "si",
+                "ke",
+                "au",
+                "et",
+                "py",
+                "af",
+                "pt",
+                "th",
+                "bo",
+                "mx",
+                "lb",
+                "za",
+                "fi",
+                "hr",
+                "vn",
+                "ly",
+                "nz",
+                "qa",
+                "kh",
+                "ci",
+                "ng",
+                "sg",
+                "cm",
+                "dz",
+                "tz",
+                "ae",
+                "pe",
+                "az",
+                "lu",
+                "ec",
+                "cz",
+                "ua",
+                "uy",
+                "sd",
+                "ao",
+                "my",
+                "lv",
+                "kw",
+                "tw",
+                "bh",
+                "lk",
+                "ye",
+                "cr",
+                "jo",
+                "pa",
+                "om",
+                "uz",
+                "by",
+                "kz",
+            ],
+        },
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    return response["random_response"]
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py
@@ -0,0 +1,26 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import RandomModel
+from llmebench.tasks import AdultTask, TaskType
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "dataset_args": {},
+        "task": AdultTask,
+        "task_args": {},
+        "model": RandomModel,
+        "model_args": {
+            "task_type": TaskType.Classification,
+            "class_labels": ["ADULT", "NOT_ADULT"],
+        },
+        "general_args": {},
+    }
+
+
+def prompt(input_sample):
+    return input_sample
+
+
+def post_process(response):
+    return response["random_response"]