Skip to content

Commit

Permalink
Improve Arabic QA assets for Jais (#261)
Browse files Browse the repository at this point in the history
* Added Jais-Zero-Shot Asset for the XQUAD dataset

* added jais-zero-shot-asset for the tydiqa dataset

* added jais-zero-shot asset for the arcd dataset

* Added Zero Shot Asset for the MLQA dataset

* added zero-shot-jais asset for anercorp dataset

* added zero-shot-jais asset for the aqmar dataset

* Added zero-shot-jais asset for the mgb words dataset

* formatted written assets for jais

* Fix casing in QA asset names

* Add scores for QA assets

* Remove NER assets as Jais cannot reliably do the task

---------

Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
  • Loading branch information
baselmousi and fdalvi authored Jan 31, 2024
1 parent 7fde120 commit 6ecd798
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
7 changes: 6 additions & 1 deletion assets/ar/QA/ARCD_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def metadata():
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
"scores": {"F1": "0.546"},
}


Expand All @@ -16,11 +17,15 @@ def config():
"dataset": ARCDDataset,
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
base_prompt = f"مهمتك هي الإجابة على الأسئلة باللغة العربية بناءً على سياق معين.\nملاحظة: يجب أن تكون إجاباتك مستخرجة من السياق المحدد دون أي اضافات.\nلست بحاجة إلى تقديم إجابة كاملة.\nالسياق: {input_sample['context']}\n السؤال: {input_sample['question']}\n الجواب:"
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"

return [
{
"role": "user",
Expand Down
4 changes: 3 additions & 1 deletion assets/ar/QA/MLQA_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def metadata():
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
"scores": {"F1": "0.540"},
}


Expand All @@ -17,13 +18,14 @@ def config():
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
"max_tries": 3,
},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"

return [
{
"role": "user",
Expand Down
4 changes: 3 additions & 1 deletion assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def metadata():
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
"scores": {"F1": "0.724"},
}


Expand All @@ -17,14 +18,15 @@ def config():
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
"max_tries": 3,
},
"general_args": {"test_split": "dev"},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"

return [
{
"role": "user",
Expand Down
4 changes: 3 additions & 1 deletion assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def metadata():
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
"scores": {"F1": "0.636"},
}


Expand All @@ -17,13 +18,14 @@ def config():
"task": QATask,
"model": FastChatModel,
"model_args": {
"max_tries": 50,
"max_tries": 3,
},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"

return [
{
"role": "user",
Expand Down

0 comments on commit 6ecd798

Please sign in to comment.