-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Random model and assets for all tasks (#232)
This commit introduces a RandomModel that handles multiple tasks types and returns randomized results. This can be used to construct a useful baseline. The commit also adds Random baselines for almost all tasks except machine translation. * Fix incorrect task imports * Add Random Model * Minor fixes to existing datasets/assets * Add Random assets for all existing tasks
- Loading branch information
Showing
68 changed files
with
2,342 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import random | ||
|
||
from llmebench.datasets import ARCDDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import QATask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ARCDDataset, | ||
"dataset_args": {}, | ||
"task": QATask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": {"task_type": TaskType.QuestionAnswering}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
tokens = response["random_response"]["context"].split(" ") | ||
|
||
start_idx = random.choice(range(len(tokens))) | ||
answer_length = random.choice(range(len(tokens) - start_idx)) | ||
|
||
return " ".join(tokens[start_idx : start_idx + answer_length]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import random | ||
|
||
from llmebench.datasets import MLQADataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import QATask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": MLQADataset, | ||
"dataset_args": {}, | ||
"task": QATask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": {"task_type": TaskType.QuestionAnswering}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
tokens = response["random_response"]["context"].split(" ") | ||
|
||
start_idx = random.choice(range(len(tokens))) | ||
answer_length = random.choice(range(len(tokens) - start_idx)) | ||
|
||
return " ".join(tokens[start_idx : start_idx + answer_length]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import random | ||
|
||
from llmebench.datasets import TyDiQADataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import QATask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": TyDiQADataset, | ||
"dataset_args": {}, | ||
"task": QATask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": {"task_type": TaskType.QuestionAnswering}, | ||
"general_args": {"test_split": "dev"}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
tokens = response["random_response"]["context"].split(" ") | ||
|
||
start_idx = random.choice(range(len(tokens))) | ||
answer_length = random.choice(range(len(tokens) - start_idx)) | ||
|
||
return " ".join(tokens[start_idx : start_idx + answer_length]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import random | ||
|
||
from llmebench.datasets import XQuADDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import QATask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": XQuADDataset, | ||
"dataset_args": {}, | ||
"task": QATask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": {"task_type": TaskType.QuestionAnswering}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
tokens = response["random_response"]["context"].split(" ") | ||
|
||
start_idx = random.choice(range(len(tokens))) | ||
answer_length = random.choice(range(len(tokens) - start_idx)) | ||
|
||
return " ".join(tokens[start_idx : start_idx + answer_length]) |
26 changes: 26 additions & 0 deletions
26
assets/ar/demographic_attributes/gender/ArabGend_Random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from llmebench.datasets import ArabGendDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import DemographyGenderTask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArabGendDataset, | ||
"dataset_args": {}, | ||
"task": DemographyGenderTask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": { | ||
"task_type": TaskType.Classification, | ||
"class_labels": ["m", "f"], | ||
}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
return response["random_response"] |
26 changes: 26 additions & 0 deletions
26
assets/ar/demographic_attributes/gender/ArapTweet_Random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from llmebench.datasets import ArapTweetDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import DemographyGenderTask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArapTweetDataset, | ||
"dataset_args": {}, | ||
"task": DemographyGenderTask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": { | ||
"task_type": TaskType.Classification, | ||
"class_labels": ["Female", "Male"], | ||
}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
return response["random_response"] |
49 changes: 49 additions & 0 deletions
49
assets/ar/demographic_attributes/location/Location_Random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from llmebench.datasets import LocationDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import DemographyLocationTask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": LocationDataset, | ||
"dataset_args": {}, | ||
"task": DemographyLocationTask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": { | ||
"task_type": TaskType.Classification, | ||
"class_labels": [ | ||
"ae", | ||
"OTHERS", | ||
"bh", | ||
"dz", | ||
"eg", | ||
"iq", | ||
"jo", | ||
"kw", | ||
"lb", | ||
"ly", | ||
"ma", | ||
"om", | ||
"ps", | ||
"qa", | ||
"sa", | ||
"sd", | ||
"so", | ||
"sy", | ||
"tn", | ||
"UNK", | ||
"ye", | ||
"mr", | ||
], | ||
}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
return response["random_response"] |
127 changes: 127 additions & 0 deletions
127
assets/ar/demographic_attributes/name_info/NameInfo_Random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from llmebench.datasets import NameInfoDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import DemographyNameInfoTask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": NameInfoDataset, | ||
"dataset_args": {}, | ||
"task": DemographyNameInfoTask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": { | ||
"task_type": TaskType.Classification, | ||
"class_labels": [ | ||
"gb", | ||
"us", | ||
"cl", | ||
"fr", | ||
"ru", | ||
"pl", | ||
"in", | ||
"it", | ||
"kr", | ||
"gh", | ||
"ca", | ||
"sa", | ||
"at", | ||
"de", | ||
"cn", | ||
"br", | ||
"dk", | ||
"se", | ||
"bd", | ||
"cu", | ||
"jp", | ||
"be", | ||
"es", | ||
"co", | ||
"id", | ||
"iq", | ||
"pk", | ||
"tr", | ||
"il", | ||
"ch", | ||
"ar", | ||
"ro", | ||
"nl", | ||
"ps", | ||
"ug", | ||
"ir", | ||
"cg", | ||
"do", | ||
"ee", | ||
"tn", | ||
"gr", | ||
"np", | ||
"ie", | ||
"sy", | ||
"hu", | ||
"eg", | ||
"ma", | ||
"ve", | ||
"ph", | ||
"no", | ||
"bg", | ||
"si", | ||
"ke", | ||
"au", | ||
"et", | ||
"py", | ||
"af", | ||
"pt", | ||
"th", | ||
"bo", | ||
"mx", | ||
"lb", | ||
"za", | ||
"fi", | ||
"hr", | ||
"vn", | ||
"ly", | ||
"nz", | ||
"qa", | ||
"kh", | ||
"ci", | ||
"ng", | ||
"sg", | ||
"cm", | ||
"dz", | ||
"tz", | ||
"ae", | ||
"pe", | ||
"az", | ||
"lu", | ||
"ec", | ||
"cz", | ||
"ua", | ||
"uy", | ||
"sd", | ||
"ao", | ||
"my", | ||
"lv", | ||
"kw", | ||
"tw", | ||
"bh", | ||
"lk", | ||
"ye", | ||
"cr", | ||
"jo", | ||
"pa", | ||
"om", | ||
"uz", | ||
"by", | ||
"kz", | ||
], | ||
}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
return response["random_response"] |
26 changes: 26 additions & 0 deletions
26
assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from llmebench.datasets import AdultDataset | ||
from llmebench.models import RandomModel | ||
from llmebench.tasks import AdultTask, TaskType | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": AdultDataset, | ||
"dataset_args": {}, | ||
"task": AdultTask, | ||
"task_args": {}, | ||
"model": RandomModel, | ||
"model_args": { | ||
"task_type": TaskType.Classification, | ||
"class_labels": ["ADULT", "NOT_ADULT"], | ||
}, | ||
"general_args": {}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return input_sample | ||
|
||
|
||
def post_process(response): | ||
return response["random_response"] |
Oops, something went wrong.