Skip to content

Commit

Permalink
Add metadata to all assets (#234)
Browse files Browse the repository at this point in the history
This commit adds metadata to all assets, including author and model information, descriptions and scores. Tests and docs updated to reflect the same.

* Add metadata to all assets

* Update asset tests for metadata

* Update asset tutorial
  • Loading branch information
fdalvi authored Sep 18, 2023
1 parent 5814882 commit f14121f
Show file tree
Hide file tree
Showing 286 changed files with 2,541 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
from llmebench.tasks import MachineTranslationTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Helsinki-NLP/opus-mt-ar-en",
"description": "Sample HuggingFace Inference API asset for machine translation.",
}


def config():
return {
"dataset": AraBenchDataset,
Expand Down
8 changes: 8 additions & 0 deletions assets/ar/MT/AraBench_ar2en_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
from llmebench.tasks import MachineTranslationTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "bloomz-176b (8bit quantized)",
"description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
}


def config():
return {
"dataset": AraBenchDataset,
Expand Down
8 changes: 8 additions & 0 deletions assets/ar/MT/AraBench_ar2en_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
from llmebench.tasks import MachineTranslationTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-35-turbo (version 0301)",
"description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
}


def config():
return {
"dataset": AraBenchDataset,
Expand Down
8 changes: 8 additions & 0 deletions assets/ar/MT/AraBench_ar2en_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
from llmebench.tasks import MachineTranslationTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
}


def config():
return {
"dataset": AraBenchDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/ARCD_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "bloomz-176b (8bit quantized)",
"description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
"scores": {"F1": "0.368"},
}


def config():
return {
"dataset": ARCDDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/ARCD_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-35-turbo (version 0301)",
"description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.502"},
}


def config():
return {
"dataset": ARCDDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/ARCD_GPT4_FewShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
random.seed(3333)


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"F1": "0.704"},
}


def config():
return {
"dataset": ARCDDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/ARCD_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.705"},
}


def config():
return {
"dataset": ARCDDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/ARCD_Random.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from llmebench.tasks import QATask, TaskType


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Random",
"description": "Random Baseline.",
"scores": {"F1": "0.085"},
}


def config():
return {
"dataset": ARCDDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "bloomz-176b (8bit quantized)",
"description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
"scores": {"F1": "0.377"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-35-turbo (version 0301)",
"description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.376"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_GPT4_FewShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
random.seed(3333)


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"F1": "0.653"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.620"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_Random.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from llmebench.tasks import QATask, TaskType


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Random",
"description": "Random Baseline.",
"scores": {"F1": "0.066"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/MLQA_mdeberta_v3_base_squad2_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "timpal0l/mdeberta-v3-base-squad2",
"description": "Sample HuggingFace Inference API asset for question answering.",
"scores": {"F1": "ar/QA/MLQA"},
}


def config():
return {
"dataset": MLQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/TyDiQA_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "bloomz-176b (8bit quantized)",
"description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
"scores": {"F1": "0.456"},
}


def config():
return {
"dataset": TyDiQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/TyDiQA_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-35-turbo (version 0301)",
"description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.480"},
}


def config():
return {
"dataset": TyDiQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/TyDiQA_GPT4_FewShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
random.seed(3333)


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"F1": "0.739"},
}


def config():
return {
"dataset": TyDiQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/TyDiQA_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.744"},
}


def config():
return {
"dataset": TyDiQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/TyDiQA_Random.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from llmebench.tasks import QATask, TaskType


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Random",
"description": "Random Baseline.",
"scores": {"F1": "0.111"},
}


def config():
return {
"dataset": TyDiQADataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/XQuAD_BLOOMZ_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "bloomz-176b (8bit quantized)",
"description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
"scores": {"F1": "0.367"},
}


def config():
return {
"dataset": XQuADDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/XQuAD_GPT35_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-35-turbo (version 0301)",
"description": "GPT35 model hosted on Azure, using the Completion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.442"},
}


def config():
return {
"dataset": XQuADDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/XQuAD_GPT4_FewShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
random.seed(3333)


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"F1": "0.722"},
}


def config():
return {
"dataset": XQuADDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/XQuAD_GPT4_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"F1": "0.729"},
}


def config():
return {
"dataset": XQuADDataset,
Expand Down
9 changes: 9 additions & 0 deletions assets/ar/QA/XQuAD_Random.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from llmebench.tasks import QATask, TaskType


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Random",
"description": "Random Baseline.",
"scores": {"F1": "0.047"},
}


def config():
return {
"dataset": XQuADDataset,
Expand Down
Loading

0 comments on commit f14121f

Please sign in to comment.