Skip to content

Commit

Permalink
Fix Lemmatization dataset name and citation
Browse files Browse the repository at this point in the history
  • Loading branch information
fdalvi committed Sep 5, 2023
1 parent f78c7f9 commit db99243
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os

from llmebench.datasets import LemmatizationDataset
from llmebench.datasets import WikiNewsLemmatizationDataset
from llmebench.models import BLOOMPetalModel
from llmebench.tasks import LemmatizationTask


def config():
return {
"dataset": LemmatizationDataset,
"dataset": WikiNewsLemmatizationDataset,
"dataset_args": {},
"task": LemmatizationTask,
"task_args": {},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os

from llmebench.datasets import LemmatizationDataset
from llmebench.datasets import WikiNewsLemmatizationDataset
from llmebench.models import GPTModel, RandomGPTModel
from llmebench.tasks import LemmatizationTask


def config():
return {
"dataset": LemmatizationDataset,
"dataset": WikiNewsLemmatizationDataset,
"dataset_args": {},
"task": LemmatizationTask,
"task_args": {},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os

from llmebench.datasets import LemmatizationDataset
from llmebench.datasets import WikiNewsLemmatizationDataset
from llmebench.models import GPTChatCompletionModel
from llmebench.tasks import LemmatizationTask


def config():
return {
"dataset": LemmatizationDataset,
"dataset": WikiNewsLemmatizationDataset,
"dataset_args": {},
"task": LemmatizationTask,
"task_args": {},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
from llmebench.datasets.dataset_base import DatasetBase


class LemmatizationDataset(DatasetBase):
class WikiNewsLemmatizationDataset(DatasetBase):
def __init__(self, **kwargs):
super(LemmatizationDataset, self).__init__(**kwargs)
super(WikiNewsLemmatizationDataset, self).__init__(**kwargs)

def metadata():
return {
"language": "ar",
"citation": """@inproceedings{mubarak2018build,
title={Build Fast and Accurate Lemmatization for Arabic},
author={Mubarak, Hamdy},
booktitle={Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year={2018}
"citation": """@inproceedings{mubarak-2018-build,
title = "Build Fast and Accurate Lemmatization for {A}rabic",
author = "Mubarak, Hamdy",
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
month = may,
year = "2018",
address = "Miyazaki, Japan",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L18-1181",
}""",
}

Expand All @@ -23,7 +27,6 @@ def get_data_sample(self):
}

def load_data(self, data_path, no_labels=False):
# TODO: modify to iterator
# Format: words \t lemmas
data = []
with open(data_path, "r") as fp:
Expand Down
2 changes: 1 addition & 1 deletion llmebench/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from .Emotion import EmotionDataset
from .Khouja20Factuality import Khouja20FactualityDataset
from .UnifiedFCFactuality import UnifiedFCFactualityDataset
from .Lemmatization import LemmatizationDataset
from .Location import LocationDataset
from .MGBWords import MGBWordsDataset
from .MLQA import MLQADataset
Expand All @@ -43,5 +42,6 @@
from .STSQ2Q import Q2QSimDataset
from .TyDiQA import TyDiQADataset
from .WANLP22T3Propaganda import WANLP22T3PropagandaDataset
from .WikiNewsLemmatization import WikiNewsLemmatizationDataset
from .XNLI import XNLIDataset
from .XQuAD import XQuADDataset

0 comments on commit db99243

Please sign in to comment.