From 10664ca7977d5aa8d6fa068f66d3e9fdbc65ebba Mon Sep 17 00:00:00 2001 From: bsabri Date: Sat, 14 Oct 2023 20:54:58 +0300 Subject: [PATCH] fixed data loading issue --- assets/en/PIQA_Phi_ZeroShot.py | 8 ++------ llmebench/datasets/PIQA.py | 29 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/assets/en/PIQA_Phi_ZeroShot.py b/assets/en/PIQA_Phi_ZeroShot.py index 548c05f5..aa1a2a7c 100644 --- a/assets/en/PIQA_Phi_ZeroShot.py +++ b/assets/en/PIQA_Phi_ZeroShot.py @@ -6,7 +6,7 @@ def metadata(): return { "author": "Arabic Language Technologies, QCRI, HBKU", - "model": "phi-1.5", + "model": "https://huggingface.co/microsoft/phi-1_5", "description": "Locally hosted Phi-1.5b model using FastChat.", } @@ -14,10 +14,6 @@ def metadata(): def config(): return { "dataset": PIQADataset, - "dataset_args": { - "src_lang": "ar", - "tgt_lang": "en", - }, "task": PIQATask, "model": FastChatModel, "model_args": { @@ -34,7 +30,7 @@ def prompt(input_sample): }, { "role": "user", - "content": f'Question: {input_sample["goal"]},\nA. {input_sample["sol1"]}\nB. {input_sample["sol2"]}\nAnswer: ', + "content": f'Question: {input_sample["goal"]},\nA. {input_sample["sol1"]}\nB. {input_sample["sol2"]} \nAnswer: ', }, ] diff --git a/llmebench/datasets/PIQA.py b/llmebench/datasets/PIQA.py index 3112ee59..481d9721 100644 --- a/llmebench/datasets/PIQA.py +++ b/llmebench/datasets/PIQA.py @@ -1,5 +1,4 @@ import json - import pandas as pd from llmebench.datasets.dataset_base import DatasetBase @@ -28,7 +27,7 @@ def metadata(): "download_url": "https://yonatanbisk.com/piqa/data/", "splits": { "train": "train", - "dev": "dev", + "test": "dev", }, "task_type": TaskType.Classification, "class_labels": ["0", "1"], @@ -37,27 +36,31 @@ def metadata(): @staticmethod def get_data_sample(): return { - "input": "When boiling butter, when it's ready, you can", - "sol1": "Pour it onto a plate", - "sol2": "Pour it into a jar", + "input": { + "goal": "When boiling butter, when it's ready, you can", + "sol1": "Pour it onto a plate", + "sol2": "Pour it into a jar", + }, "label": "1", } def load_data(self, data_path, no_labels=False): - data_path = self.resolve_path(data_path + ".jsonl") - label_path = self.resolve_path(data_path + "-labels.lst") + data_file = self.resolve_path(str(data_path) + ".jsonl") + label_path = self.resolve_path(str(data_path) + "-labels.lst") data = [] - label_data = pd.read_csv(label_path, sep="\t", header=None) + label_file = pd.read_csv(label_path, sep="\t", header=None) - with open(data_path, "r", encoding="utf-8") as json_file: + with open(data_file, "r", encoding="utf-8") as json_file: for index, line in enumerate(json_file): json_obj = json.loads(line) - label = label_data.loc[index] + label = label_file.loc[index] data.append( { - "input": json_obj["goal"], - "sol1": json_obj["sol1"], - "sol2": json_obj["sol2"], + "input": { + "goal": json_obj["goal"], + "sol1": json_obj["sol1"], + "sol2": json_obj["sol2"], + }, "label": label, } )