Skip to content

Commit

Permalink
Merge branch 'main' into feat/graceful_benchmark_failure
Browse files Browse the repository at this point in the history
  • Loading branch information
fdalvi committed Sep 10, 2023
2 parents 7dbf97d + 469b1c7 commit 6cef63c
Show file tree
Hide file tree
Showing 222 changed files with 249 additions and 1,322 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ data

# Temporary
tmp

# Model configs
envs
3 changes: 0 additions & 3 deletions assets/ar/MT/AraBench_ar2en_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import PetalsModel
from llmebench.tasks import MachineTranslationTask
Expand Down Expand Up @@ -61,7 +59,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 3,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/MT/AraBench_ar2en_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import MachineTranslationTask
Expand Down Expand Up @@ -61,11 +59,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 5,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/MT/AraBench_ar2en_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import AraBenchDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import MachineTranslationTask
Expand Down Expand Up @@ -60,11 +58,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 5,
},
"general_args": {"data_path": "data/MT/"},
Expand Down
3 changes: 0 additions & 3 deletions assets/ar/QA/ARCD_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import PetalsModel
from llmebench.tasks import QATask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/ARCD_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/ARCD_GPT4_FewShot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import random

from llmebench.datasets import ARCDDataset
Expand All @@ -16,12 +15,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
8 changes: 0 additions & 8 deletions assets/ar/QA/ARCD_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ARCDDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,12 +11,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {"data_path": "data/QA/arcd/arcd-test.json"},
Expand Down
3 changes: 0 additions & 3 deletions assets/ar/QA/MLQA_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import PetalsModel
from llmebench.tasks import QATask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/MLQA_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/MLQA_GPT4_FewShot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import random

from llmebench.datasets import MLQADataset
Expand All @@ -16,12 +15,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
8 changes: 0 additions & 8 deletions assets/ar/QA/MLQA_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import MLQADataset
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,12 +11,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {
Expand Down
3 changes: 0 additions & 3 deletions assets/ar/QA/TyDiQA_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import PetalsModel
from llmebench.tasks import QATask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/TyDiQA_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/TyDiQA_GPT4_FewShot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import random

from llmebench.datasets import TyDiQADataset
Expand All @@ -16,12 +15,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
8 changes: 0 additions & 8 deletions assets/ar/QA/TydiQA_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import TyDiQADataset
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,12 +11,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},
Expand Down
3 changes: 0 additions & 3 deletions assets/ar/QA/XQuAD_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import XQuADDataset
from llmebench.models import PetalsModel
from llmebench.tasks import QATask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"max_tries": 5,
},
"general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/XQuAD_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import XQuADDataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"max_tries": 3,
},
"general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},
Expand Down
8 changes: 0 additions & 8 deletions assets/ar/QA/XQuAD_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import XQuADDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import QATask
Expand All @@ -13,12 +11,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 50,
},
"general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},
Expand Down
7 changes: 0 additions & 7 deletions assets/ar/QA/XQuaD_GPT4_FewShot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import random

from llmebench.datasets import XQuADDataset
Expand All @@ -16,12 +15,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": "NA",
"max_tries": 30,
},
"general_args": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ArabGendDataset
from llmebench.models import PetalsModel
from llmebench.tasks import DemographyGenderTask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"class_labels": ["m", "f"],
"max_tries": 3,
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ArabGendDataset
from llmebench.models import LegacyOpenAIModel
from llmebench.tasks import DemographyGenderTask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": LegacyOpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": ["m", "f"],
"max_tries": 3,
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ArabGendDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import DemographyGenderTask
Expand All @@ -13,11 +11,6 @@ def config():
"task_args": {},
"model": OpenAIModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": ["m", "f"],
"max_tries": 3,
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from llmebench.datasets import ArapTweetDataset
from llmebench.models import PetalsModel
from llmebench.tasks import DemographyGenderTask
Expand All @@ -13,7 +11,6 @@ def config():
"task_args": {},
"model": PetalsModel,
"model_args": {
"api_url": os.environ["API_URL"],
"class_labels": ["Female", "Male"],
"max_tries": 3,
},
Expand Down
Loading

0 comments on commit 6cef63c

Please sign in to comment.