Skip to content

Commit

Permalink
branch rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
bsabri committed Sep 10, 2023
2 parents 060609e + 421839f commit 8be02c5
Show file tree
Hide file tree
Showing 289 changed files with 3,511 additions and 3,060 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ data

# Temporary
tmp

# Model configs
envs
9 changes: 9 additions & 0 deletions CITATION.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@article{dalvi2023llmebench,
title={LLMeBench: A Flexible Framework for Accelerating LLMs Benchmarking},
author={Fahim Dalvi and Maram Hasanain and Sabri Boughorbel and Basel Mousi and Samir Abdaljalil and Nizi Nazar and Ahmed Abdelali and Shammur Absar Chowdhury and Hamdy Mubarak and Ahmed Ali and Majd Hawasly and Nadir Durrani and Firoj Alam},
year={2023},
eprint={2308.04945},
journal={arXiv:2308.04945},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2308.04945}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import BLOOMPetalModel
from llmebench.models import PetalsModel
from llmebench.tasks import MachineTranslationTask


Expand Down Expand Up @@ -59,9 +57,8 @@ def config():
},
"task": MachineTranslationTask,
"task_args": {},
"model": BLOOMPetalModel,
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 3,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import GPTModel
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import MachineTranslationTask


Expand Down Expand Up @@ -59,13 +57,8 @@ def config():
},
"task": MachineTranslationTask,
"task_args": {},
"model": GPTModel,
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 5,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import MachineTranslationTask


Expand Down Expand Up @@ -58,13 +56,8 @@ def config():
},
"task": MachineTranslationTask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 5,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import BLOOMPetalModel
from llmebench.models import PetalsModel
from llmebench.tasks import QATask


Expand All @@ -11,9 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": BLOOMPetalModel,
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import GPTModel, RandomGPTModel
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,13 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTModel,
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import random

from llmebench.datasets import ARCDDataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask

random.seed(3333)
Expand All @@ -14,14 +13,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,14 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {"data_path": "data/QA/arcd/arcd-test.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import BLOOMPetalModel
from llmebench.models import PetalsModel
from llmebench.tasks import QATask


Expand All @@ -11,9 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": BLOOMPetalModel,
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import GPTModel, RandomGPTModel
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,13 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTModel,
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import random

from llmebench.datasets import MLQADataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask

random.seed(3333)
Expand All @@ -14,14 +13,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,14 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import BLOOMPetalModel
from llmebench.models import PetalsModel
from llmebench.tasks import QATask


Expand All @@ -11,9 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": BLOOMPetalModel,
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import GPTModel, RandomGPTModel
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,13 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTModel,
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import random

from llmebench.datasets import TyDiQADataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask

random.seed(3333)
Expand All @@ -14,14 +13,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import GPTChatCompletionModel
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask


Expand All @@ -11,14 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": GPTChatCompletionModel,
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from llmebench.datasets import XQuADDataset
from llmebench.models import BLOOMPetalModel
from llmebench.models import PetalsModel
from llmebench.tasks import QATask


Expand All @@ -11,9 +9,8 @@ def config():
"dataset_args": {},
"task": QATask,
"task_args": {},
"model": BLOOMPetalModel,
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},
Expand Down
Loading

0 comments on commit 8be02c5

Please sign in to comment.