Skip to content

Commit

Permalink
Add Jais ZeroShot assets for Arabic QA tasks (#258)
Browse files Browse the repository at this point in the history
* Add JAIS13b for arabic QA

* Format code and minor metadata changes

---------

Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
  • Loading branch information
AridHasan and fdalvi authored Jan 31, 2024
1 parent bf938cc commit 891654d
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 0 deletions.
33 changes: 33 additions & 0 deletions assets/ar/QA/ARCD_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from llmebench.datasets import ARCDDataset
from llmebench.models import FastChatModel
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": ARCDDataset,
"task": QATask,
"model": FastChatModel,
}


def prompt(input_sample):
base_prompt = f"مهمتك هي الإجابة على الأسئلة باللغة العربية بناءً على سياق معين.\nملاحظة: يجب أن تكون إجاباتك مستخرجة من السياق المحدد دون أي اضافات.\nلست بحاجة إلى تقديم إجابة كاملة.\nالسياق: {input_sample['context']}\n السؤال: {input_sample['question']}\n الجواب:"
return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
return response["choices"][0]["message"]["content"]
36 changes: 36 additions & 0 deletions assets/ar/QA/MLQA_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from llmebench.datasets import MLQADataset
from llmebench.models import FastChatModel
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": MLQADataset,
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
return response["choices"][0]["message"]["content"]
37 changes: 37 additions & 0 deletions assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from llmebench.datasets import TyDiQADataset
from llmebench.models import FastChatModel
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": TyDiQADataset,
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
},
"general_args": {"test_split": "dev"},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
return response["choices"][0]["message"]["content"]
36 changes: 36 additions & 0 deletions assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from llmebench.datasets import XQuADDataset
from llmebench.models import FastChatModel
from llmebench.tasks import QATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": XQuADDataset,
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
return response["choices"][0]["message"]["content"]

0 comments on commit 891654d

Please sign in to comment.