Add support for env files for model configuration (#204)

This commit implements the models reading environment variables directly to pick up model configuration, instead of relying on the assets to pass them in. Specific settings can still be passed in the assets as before, and they will have priority over env variables. A side effect of this is that OpenAI models also work now, not just Azure based ones. Finally, a new --env options enables loading env variables from .env files, and sample .env files are provided in the repo. * Add support for .env files * Add support for environment variables based configuration for OpenAI model * Remove all hardcoded model config options from assets * Add support for environment variable based configuration for Petals model * Remove all hardcoded model config options from BLOOMZ assets * Simplify tests as assets do not read environment variables by default * Fix missing check for model parameters in OpenAI model * Add sample env files * Fix model param reading bug * Update new assets * Add tests for OpenAI model configuration * Add tests for Petals model configuration * Remove unused os import from assets * Removed spurious class labels
qcri · Sep 10, 2023 · 469b1c7 · 469b1c7
1 parent d981265
commit 469b1c7
Show file tree

Hide file tree

Showing 222 changed files with 249 additions and 1,322 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,6 @@ data
 
 # Temporary
 tmp
+
+# Model configs
+envs
diff --git a/assets/ar/MT/AraBench_ar2en_BLOOMZ_ZeroShot.py b/assets/ar/MT/AraBench_ar2en_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import AraBenchDataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import MachineTranslationTask
@@ -61,7 +59,6 @@ def config():
                     "task_args": {},
                     "model": PetalsModel,
                     "model_args": {
-                        "api_url": os.environ["API_URL"],
                         "max_tries": 3,
                     },
                     "general_args": {"data_path": "data/MT/"},

diff --git a/assets/ar/MT/AraBench_ar2en_GPT35_ZeroShot.py b/assets/ar/MT/AraBench_ar2en_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import AraBenchDataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import MachineTranslationTask
@@ -61,11 +59,6 @@ def config():
                     "task_args": {},
                     "model": LegacyOpenAIModel,
                     "model_args": {
-                        "api_type": "azure",
-                        "api_version": "2023-03-15-preview",
-                        "api_base": os.environ["AZURE_API_URL"],
-                        "api_key": os.environ["AZURE_API_KEY"],
-                        "engine_name": os.environ["ENGINE_NAME"],
                         "max_tries": 5,
                     },
                     "general_args": {"data_path": "data/MT/"},

diff --git a/assets/ar/MT/AraBench_ar2en_GPT4_ZeroShot.py b/assets/ar/MT/AraBench_ar2en_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import AraBenchDataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import MachineTranslationTask
@@ -60,11 +58,6 @@ def config():
                     "task_args": {},
                     "model": OpenAIModel,
                     "model_args": {
-                        "api_type": "azure",
-                        "api_version": "2023-03-15-preview",
-                        "api_base": os.environ["AZURE_API_URL"],
-                        "api_key": os.environ["AZURE_API_KEY"],
-                        "engine_name": os.environ["ENGINE_NAME"],
                         "max_tries": 5,
                     },
                     "general_args": {"data_path": "data/MT/"},

diff --git a/assets/ar/QA/ARCD_BLOOMZ_ZeroShot.py b/assets/ar/QA/ARCD_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ARCDDataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import QATask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "max_tries": 5,
         },
         "general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},

diff --git a/assets/ar/QA/ARCD_GPT35_ZeroShot.py b/assets/ar/QA/ARCD_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ARCDDataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import QATask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": LegacyOpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "max_tries": 3,
         },
         "general_args": {"data_path": "data/QA/ARCD/arcd-test.json"},

diff --git a/assets/ar/QA/ARCD_GPT4_FewShot.py b/assets/ar/QA/ARCD_GPT4_FewShot.py
@@ -1,4 +1,3 @@
-import os
 import random
 
 from llmebench.datasets import ARCDDataset
@@ -16,12 +15,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 30,
         },
         "general_args": {

diff --git a/assets/ar/QA/ARCD_GPT4_ZeroShot.py b/assets/ar/QA/ARCD_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ARCDDataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import QATask
@@ -13,12 +11,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 50,
         },
         "general_args": {"data_path": "data/QA/arcd/arcd-test.json"},

diff --git a/assets/ar/QA/MLQA_BLOOMZ_ZeroShot.py b/assets/ar/QA/MLQA_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import MLQADataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import QATask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "max_tries": 5,
         },
         "general_args": {

diff --git a/assets/ar/QA/MLQA_GPT35_ZeroShot.py b/assets/ar/QA/MLQA_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import MLQADataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import QATask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": LegacyOpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "max_tries": 3,
         },
         "general_args": {

diff --git a/assets/ar/QA/MLQA_GPT4_FewShot.py b/assets/ar/QA/MLQA_GPT4_FewShot.py
@@ -1,4 +1,3 @@
-import os
 import random
 
 from llmebench.datasets import MLQADataset
@@ -16,12 +15,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 30,
         },
         "general_args": {

diff --git a/assets/ar/QA/MLQA_GPT4_ZeroShot.py b/assets/ar/QA/MLQA_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import MLQADataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import QATask
@@ -13,12 +11,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 50,
         },
         "general_args": {

diff --git a/assets/ar/QA/TyDiQA_BLOOMZ_ZeroShot.py b/assets/ar/QA/TyDiQA_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import TyDiQADataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import QATask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "max_tries": 5,
         },
         "general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},

diff --git a/assets/ar/QA/TyDiQA_GPT35_ZeroShot.py b/assets/ar/QA/TyDiQA_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import TyDiQADataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import QATask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": LegacyOpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "max_tries": 3,
         },
         "general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},

diff --git a/assets/ar/QA/TyDiQA_GPT4_FewShot.py b/assets/ar/QA/TyDiQA_GPT4_FewShot.py
@@ -1,4 +1,3 @@
-import os
 import random
 
 from llmebench.datasets import TyDiQADataset
@@ -16,12 +15,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 30,
         },
         "general_args": {

diff --git a/assets/ar/QA/TydiQA_GPT4_ZeroShot.py b/assets/ar/QA/TydiQA_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import TyDiQADataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import QATask
@@ -13,12 +11,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 50,
         },
         "general_args": {"data_path": "data/QA/tydiqa/tydiqa-goldp-dev-arabic.json"},

diff --git a/assets/ar/QA/XQuAD_BLOOMZ_ZeroShot.py b/assets/ar/QA/XQuAD_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import XQuADDataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import QATask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "max_tries": 5,
         },
         "general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},

diff --git a/assets/ar/QA/XQuAD_GPT35_ZeroShot.py b/assets/ar/QA/XQuAD_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import XQuADDataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import QATask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": LegacyOpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "max_tries": 3,
         },
         "general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},

diff --git a/assets/ar/QA/XQuAD_GPT4_ZeroShot.py b/assets/ar/QA/XQuAD_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import XQuADDataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import QATask
@@ -13,12 +11,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 50,
         },
         "general_args": {"data_path": "data/QA/xquad/xquad.ar.json"},

diff --git a/assets/ar/QA/XQuaD_GPT4_FewShot.py b/assets/ar/QA/XQuaD_GPT4_FewShot.py
@@ -1,4 +1,3 @@
-import os
 import random
 
 from llmebench.datasets import XQuADDataset
@@ -16,12 +15,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
-            "class_labels": "NA",
             "max_tries": 30,
         },
         "general_args": {

diff --git a/assets/ar/demographic_attributes/gender/ArabGend_BLOOMZ_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArabGend_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ArabGendDataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import DemographyGenderTask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "class_labels": ["m", "f"],
             "max_tries": 3,
         },

diff --git a/assets/ar/demographic_attributes/gender/ArabGend_GPT35_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArabGend_GPT35_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ArabGendDataset
 from llmebench.models import LegacyOpenAIModel
 from llmebench.tasks import DemographyGenderTask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": LegacyOpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "class_labels": ["m", "f"],
             "max_tries": 3,
         },

diff --git a/assets/ar/demographic_attributes/gender/ArabGend_GPT4_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArabGend_GPT4_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ArabGendDataset
 from llmebench.models import OpenAIModel
 from llmebench.tasks import DemographyGenderTask
@@ -13,11 +11,6 @@ def config():
         "task_args": {},
         "model": OpenAIModel,
         "model_args": {
-            "api_type": "azure",
-            "api_version": "2023-03-15-preview",
-            "api_base": os.environ["AZURE_API_URL"],
-            "api_key": os.environ["AZURE_API_KEY"],
-            "engine_name": os.environ["ENGINE_NAME"],
             "class_labels": ["m", "f"],
             "max_tries": 3,
         },

diff --git a/assets/ar/demographic_attributes/gender/ArapTweet_BLOOMZ_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArapTweet_BLOOMZ_ZeroShot.py
@@ -1,5 +1,3 @@
-import os
-
 from llmebench.datasets import ArapTweetDataset
 from llmebench.models import PetalsModel
 from llmebench.tasks import DemographyGenderTask
@@ -13,7 +11,6 @@ def config():
         "task_args": {},
         "model": PetalsModel,
         "model_args": {
-            "api_url": os.environ["API_URL"],
             "class_labels": ["Female", "Male"],
             "max_tries": 3,
         },
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,3 +21,6 @@ data @@
     # Temporary
     tmp
+    # Model configs
+    envs