Skip to content

Commit

Permalink
Add Random model and assets for all tasks (#232)
Browse files Browse the repository at this point in the history
This commit introduces a RandomModel that handles multiple tasks types and returns randomized results. This can be used to construct a useful baseline. The commit also adds Random baselines for almost all tasks except machine translation.

* Fix incorrect task imports

* Add Random Model

* Minor fixes to existing datasets/assets

* Add Random assets for all existing tasks
  • Loading branch information
fdalvi authored Sep 18, 2023
1 parent f4ec529 commit 4464309
Show file tree
Hide file tree
Showing 68 changed files with 2,342 additions and 19 deletions.
30 changes: 30 additions & 0 deletions assets/ar/QA/ARCD_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import random

from llmebench.datasets import ARCDDataset
from llmebench.models import RandomModel
from llmebench.tasks import QATask, TaskType


def config():
return {
"dataset": ARCDDataset,
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": RandomModel,
"model_args": {"task_type": TaskType.QuestionAnswering},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
tokens = response["random_response"]["context"].split(" ")

start_idx = random.choice(range(len(tokens)))
answer_length = random.choice(range(len(tokens) - start_idx))

return " ".join(tokens[start_idx : start_idx + answer_length])
30 changes: 30 additions & 0 deletions assets/ar/QA/MLQA_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import random

from llmebench.datasets import MLQADataset
from llmebench.models import RandomModel
from llmebench.tasks import QATask, TaskType


def config():
return {
"dataset": MLQADataset,
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": RandomModel,
"model_args": {"task_type": TaskType.QuestionAnswering},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
tokens = response["random_response"]["context"].split(" ")

start_idx = random.choice(range(len(tokens)))
answer_length = random.choice(range(len(tokens) - start_idx))

return " ".join(tokens[start_idx : start_idx + answer_length])
30 changes: 30 additions & 0 deletions assets/ar/QA/TyDiQA_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import random

from llmebench.datasets import TyDiQADataset
from llmebench.models import RandomModel
from llmebench.tasks import QATask, TaskType


def config():
return {
"dataset": TyDiQADataset,
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": RandomModel,
"model_args": {"task_type": TaskType.QuestionAnswering},
"general_args": {"test_split": "dev"},
}


def prompt(input_sample):
return input_sample


def post_process(response):
tokens = response["random_response"]["context"].split(" ")

start_idx = random.choice(range(len(tokens)))
answer_length = random.choice(range(len(tokens) - start_idx))

return " ".join(tokens[start_idx : start_idx + answer_length])
30 changes: 30 additions & 0 deletions assets/ar/QA/XQuAD_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import random

from llmebench.datasets import XQuADDataset
from llmebench.models import RandomModel
from llmebench.tasks import QATask, TaskType


def config():
return {
"dataset": XQuADDataset,
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": RandomModel,
"model_args": {"task_type": TaskType.QuestionAnswering},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
tokens = response["random_response"]["context"].split(" ")

start_idx = random.choice(range(len(tokens)))
answer_length = random.choice(range(len(tokens) - start_idx))

return " ".join(tokens[start_idx : start_idx + answer_length])
26 changes: 26 additions & 0 deletions assets/ar/demographic_attributes/gender/ArabGend_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from llmebench.datasets import ArabGendDataset
from llmebench.models import RandomModel
from llmebench.tasks import DemographyGenderTask, TaskType


def config():
return {
"dataset": ArabGendDataset,
"dataset_args": {},
"task": DemographyGenderTask,
"task_args": {},
"model": RandomModel,
"model_args": {
"task_type": TaskType.Classification,
"class_labels": ["m", "f"],
},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
return response["random_response"]
26 changes: 26 additions & 0 deletions assets/ar/demographic_attributes/gender/ArapTweet_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from llmebench.datasets import ArapTweetDataset
from llmebench.models import RandomModel
from llmebench.tasks import DemographyGenderTask, TaskType


def config():
return {
"dataset": ArapTweetDataset,
"dataset_args": {},
"task": DemographyGenderTask,
"task_args": {},
"model": RandomModel,
"model_args": {
"task_type": TaskType.Classification,
"class_labels": ["Female", "Male"],
},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
return response["random_response"]
49 changes: 49 additions & 0 deletions assets/ar/demographic_attributes/location/Location_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from llmebench.datasets import LocationDataset
from llmebench.models import RandomModel
from llmebench.tasks import DemographyLocationTask, TaskType


def config():
return {
"dataset": LocationDataset,
"dataset_args": {},
"task": DemographyLocationTask,
"task_args": {},
"model": RandomModel,
"model_args": {
"task_type": TaskType.Classification,
"class_labels": [
"ae",
"OTHERS",
"bh",
"dz",
"eg",
"iq",
"jo",
"kw",
"lb",
"ly",
"ma",
"om",
"ps",
"qa",
"sa",
"sd",
"so",
"sy",
"tn",
"UNK",
"ye",
"mr",
],
},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
return response["random_response"]
127 changes: 127 additions & 0 deletions assets/ar/demographic_attributes/name_info/NameInfo_Random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from llmebench.datasets import NameInfoDataset
from llmebench.models import RandomModel
from llmebench.tasks import DemographyNameInfoTask, TaskType


def config():
return {
"dataset": NameInfoDataset,
"dataset_args": {},
"task": DemographyNameInfoTask,
"task_args": {},
"model": RandomModel,
"model_args": {
"task_type": TaskType.Classification,
"class_labels": [
"gb",
"us",
"cl",
"fr",
"ru",
"pl",
"in",
"it",
"kr",
"gh",
"ca",
"sa",
"at",
"de",
"cn",
"br",
"dk",
"se",
"bd",
"cu",
"jp",
"be",
"es",
"co",
"id",
"iq",
"pk",
"tr",
"il",
"ch",
"ar",
"ro",
"nl",
"ps",
"ug",
"ir",
"cg",
"do",
"ee",
"tn",
"gr",
"np",
"ie",
"sy",
"hu",
"eg",
"ma",
"ve",
"ph",
"no",
"bg",
"si",
"ke",
"au",
"et",
"py",
"af",
"pt",
"th",
"bo",
"mx",
"lb",
"za",
"fi",
"hr",
"vn",
"ly",
"nz",
"qa",
"kh",
"ci",
"ng",
"sg",
"cm",
"dz",
"tz",
"ae",
"pe",
"az",
"lu",
"ec",
"cz",
"ua",
"uy",
"sd",
"ao",
"my",
"lv",
"kw",
"tw",
"bh",
"lk",
"ye",
"cr",
"jo",
"pa",
"om",
"uz",
"by",
"kz",
],
},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
return response["random_response"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from llmebench.datasets import AdultDataset
from llmebench.models import RandomModel
from llmebench.tasks import AdultTask, TaskType


def config():
return {
"dataset": AdultDataset,
"dataset_args": {},
"task": AdultTask,
"task_args": {},
"model": RandomModel,
"model_args": {
"task_type": TaskType.Classification,
"class_labels": ["ADULT", "NOT_ADULT"],
},
"general_args": {},
}


def prompt(input_sample):
return input_sample


def post_process(response):
return response["random_response"]
Loading

0 comments on commit 4464309

Please sign in to comment.