Skip to content

Commit

Permalink
fixed data loading issue
Browse files Browse the repository at this point in the history
  • Loading branch information
bsabri committed Oct 14, 2023
1 parent 9ccadfa commit 10664ca
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 19 deletions.
8 changes: 2 additions & 6 deletions assets/en/PIQA_Phi_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@
def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "phi-1.5",
"model": "https://huggingface.co/microsoft/phi-1_5",
"description": "Locally hosted Phi-1.5b model using FastChat.",
}


def config():
return {
"dataset": PIQADataset,
"dataset_args": {
"src_lang": "ar",
"tgt_lang": "en",
},
"task": PIQATask,
"model": FastChatModel,
"model_args": {
Expand All @@ -34,7 +30,7 @@ def prompt(input_sample):
},
{
"role": "user",
"content": f'Question: {input_sample["goal"]},\nA. {input_sample["sol1"]}\nB. {input_sample["sol2"]}\nAnswer: ',
"content": f'Question: {input_sample["goal"]},\nA. {input_sample["sol1"]}\nB. {input_sample["sol2"]} \nAnswer: ',
},
]

Expand Down
29 changes: 16 additions & 13 deletions llmebench/datasets/PIQA.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json

import pandas as pd

from llmebench.datasets.dataset_base import DatasetBase
Expand Down Expand Up @@ -28,7 +27,7 @@ def metadata():
"download_url": "https://yonatanbisk.com/piqa/data/",
"splits": {
"train": "train",
"dev": "dev",
"test": "dev",
},
"task_type": TaskType.Classification,
"class_labels": ["0", "1"],
Expand All @@ -37,27 +36,31 @@ def metadata():
@staticmethod
def get_data_sample():
return {
"input": "When boiling butter, when it's ready, you can",
"sol1": "Pour it onto a plate",
"sol2": "Pour it into a jar",
"input": {
"goal": "When boiling butter, when it's ready, you can",
"sol1": "Pour it onto a plate",
"sol2": "Pour it into a jar",
},
"label": "1",
}

def load_data(self, data_path, no_labels=False):
data_path = self.resolve_path(data_path + ".jsonl")
label_path = self.resolve_path(data_path + "-labels.lst")
data_file = self.resolve_path(str(data_path) + ".jsonl")
label_path = self.resolve_path(str(data_path) + "-labels.lst")
data = []
label_data = pd.read_csv(label_path, sep="\t", header=None)
label_file = pd.read_csv(label_path, sep="\t", header=None)

with open(data_path, "r", encoding="utf-8") as json_file:
with open(data_file, "r", encoding="utf-8") as json_file:
for index, line in enumerate(json_file):
json_obj = json.loads(line)
label = label_data.loc[index]
label = label_file.loc[index]
data.append(
{
"input": json_obj["goal"],
"sol1": json_obj["sol1"],
"sol2": json_obj["sol2"],
"input": {
"goal": json_obj["goal"],
"sol1": json_obj["sol1"],
"sol2": json_obj["sol2"],
},
"label": label,
}
)
Expand Down

0 comments on commit 10664ca

Please sign in to comment.