Add Jais ZeroShot assets for Arabic QA tasks (#258)

* Add JAIS13b for arabic QA * Format code and minor metadata changes --------- Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
qcri · Jan 31, 2024 · 891654d · 891654d
1 parent bf938cc
commit 891654d
Show file tree

Hide file tree

Showing 4 changed files with 142 additions and 0 deletions.
diff --git a/assets/ar/QA/ARCD_JAIS13b_ZeroShot.py b/assets/ar/QA/ARCD_JAIS13b_ZeroShot.py
@@ -0,0 +1,33 @@
+from llmebench.datasets import ARCDDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": ARCDDataset,
+        "task": QATask,
+        "model": FastChatModel,
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"مهمتك هي الإجابة على الأسئلة باللغة العربية بناءً على سياق معين.\nملاحظة: يجب أن تكون إجاباتك مستخرجة من السياق المحدد دون أي اضافات.\nلست بحاجة إلى تقديم إجابة كاملة.\nالسياق: {input_sample['context']}\n السؤال: {input_sample['question']}\n الجواب:"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/MLQA_JAIS13b_ZeroShot.py b/assets/ar/QA/MLQA_JAIS13b_ZeroShot.py
@@ -0,0 +1,36 @@
+from llmebench.datasets import MLQADataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": MLQADataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 50,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py b/assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py
@@ -0,0 +1,37 @@
+from llmebench.datasets import TyDiQADataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": TyDiQADataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 50,
+        },
+        "general_args": {"test_split": "dev"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py b/assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py
@@ -0,0 +1,36 @@
+from llmebench.datasets import XQuADDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": XQuADDataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 50,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]