Merge branch 'qcri:main' into main

qcri · Feb 3, 2024 · 7d9238e · 7d9238e
2 parents 368c9ef + d3a91d7
commit 7d9238e
Show file tree

Hide file tree

Showing 44 changed files with 2,431 additions and 46 deletions.
diff --git a/assets/ar/QA/ARCD_JAIS13b_ZeroShot.py b/assets/ar/QA/ARCD_JAIS13b_ZeroShot.py
@@ -0,0 +1,38 @@
+from llmebench.datasets import ARCDDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+        "scores": {"F1": "0.546"},
+    }
+
+
+def config():
+    return {
+        "dataset": ARCDDataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/MLQA_JAIS13b_ZeroShot.py b/assets/ar/QA/MLQA_JAIS13b_ZeroShot.py
@@ -0,0 +1,38 @@
+from llmebench.datasets import MLQADataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+        "scores": {"F1": "0.540"},
+    }
+
+
+def config():
+    return {
+        "dataset": MLQADataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py b/assets/ar/QA/TyDiQA_JAIS13b_ZeroShot.py
@@ -0,0 +1,39 @@
+from llmebench.datasets import TyDiQADataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+        "scores": {"F1": "0.724"},
+    }
+
+
+def config():
+    return {
+        "dataset": TyDiQADataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "dev"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py b/assets/ar/QA/XQuAD_JAIS13b_ZeroShot.py
@@ -0,0 +1,38 @@
+from llmebench.datasets import XQuADDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import QATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+        "scores": {"F1": "0.636"},
+    }
+
+
+def config():
+    return {
+        "dataset": XQuADDataset,
+        "task": QATask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/demographic_attributes/gender/ArabGend_JAIS13b_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArabGend_JAIS13b_ZeroShot.py
@@ -0,0 +1,47 @@
+from llmebench.datasets import ArabGendDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClassificationTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": ArabGendDataset,
+        "task": ClassificationTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["m", "f"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Identify the gender from the following name as 'female' or 'male'.\n\n"
+        f"name: {input_sample}"
+        f"gender: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    if label.lower() == "male":
+        return "m"
+    elif "female" in label.lower():
+        return "f"
+    else:
+        return None
diff --git a/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py
@@ -0,0 +1,57 @@
+from llmebench.datasets import ArapTweetDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClassificationTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": ArapTweetDataset,
+        "task": ClassificationTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["Female", "Male"],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Identify the gender from the following name as 'Female' or 'Male'.\n\n"
+        f"name: {input_sample}"
+        f"gender: \n"
+    )
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.strip()
+    if "gender: Female" in label or "\nFemale" in label or label == "Female":
+        label = "Female"
+    elif (
+        "gender: Male" in label
+        or "\nMale" in label
+        or "likely to be 'Male'" in label
+        or label == "Male"
+        or "typically a 'Male' name" in label
+    ):
+        label = "Male"
+    else:
+        label = None
+
+    return label
diff --git a/assets/ar/demographic_attributes/location/Location_JAIS13b_ZeroShot.py b/assets/ar/demographic_attributes/location/Location_JAIS13b_ZeroShot.py
@@ -0,0 +1,80 @@
+from llmebench.datasets import LocationDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import DemographyLocationTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "Locally hosted Jais-13b-chat model using FastChat.",
+    }
+
+
+def config():
+    return {
+        "dataset": LocationDataset,
+        "task": DemographyLocationTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "ae",
+                "OTHERS",
+                "bh",
+                "dz",
+                "eg",
+                "iq",
+                "jo",
+                "kw",
+                "lb",
+                "ly",
+                "ma",
+                "om",
+                "ps",
+                "qa",
+                "sa",
+                "sd",
+                "so",
+                "sy",
+                "tn",
+                "UNK",
+                "ye",
+                "mr",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Given the following 'user location', identify and map it to its corresponding country code in accordance with ISO 3166-1 alpha-2. "
+        f"Please write the country code only, with no additional explanations. "
+        f"If the country is not an Arab country, please write 'OTHERS'. If the location doesn't map to a recognized country, write 'UNK'.\n\n"
+        f"user location: {input_sample}\n"
+        f"country code: \n"
+    )
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label_list = config()["model_args"]["class_labels"]
+
+    if "country code: " in label:
+        label_fixed = label.replace("country code: ", "")
+    elif label.lower() == "uae":
+        label_fixed = "ae"
+    elif label in label_list:
+        label_fixed = label
+    else:
+        label_fixed = None
+
+    return label_fixed