Skip to content

Commit

Permalink
Add Jais ZeroShot assets for semantic tasks (#256)
Browse files Browse the repository at this point in the history
* Jais13b added for arabic semantics

* Format code, remove unused imports and minor metadata changes

---------

Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
  • Loading branch information
AridHasan and fdalvi authored Jan 31, 2024
1 parent 49ebada commit f48adce
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 0 deletions.
57 changes: 57 additions & 0 deletions assets/ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from llmebench.datasets import XNLIDataset
from llmebench.models import FastChatModel
from llmebench.tasks import XNLITask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": XNLIDataset,
"task": XNLITask,
"model": FastChatModel,
}


def prompt(input_sample):
sent1, sent2 = input_sample.split("\t")
prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف."
base_prompt = (
prompt_text
+ "\nالفرضية المسبقة: "
+ sent1
+ "\nالفرضية اللاحقة: "
+ sent2
+ "\n"
+ "التصنيف: "
)

return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
input_label = response["choices"][0]["message"]["content"]
input_label = input_label.replace(".", "").strip().lower()

if "غير معروف" in input_label or "حيادي" in input_label:
pred_label = "neutral"
elif "صحيح" in input_label or "تدل" in input_label:
pred_label = "entailment"
elif "خطأ" in input_label or "تناقض" in input_label:
pred_label = "contradiction"
else:
print(input_label)
pred_label = None

return pred_label
57 changes: 57 additions & 0 deletions assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from llmebench.datasets import STSQ2QDataset
from llmebench.models import FastChatModel
from llmebench.tasks import Q2QSimDetectionTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": STSQ2QDataset,
"task": Q2QSimDetectionTask,
"model": FastChatModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
q1, q2 = input_sample.split("\t")
input_sample = q1 + "\t" + q2
base_prompt = f"Are the following two questions semantically similar (i.e., asking for similar information)? The output should be exactly in form yes or no.\n\n{input_sample}"

return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
input_label = response["choices"][0]["message"]["content"]
input_label = input_label.replace(".", "").strip().lower()
pred_label = ""

if "yes" in input_label or "label: 1" in input_label:
pred_label = "1"
if (
input_label == "no"
or input_label.startswith("no,")
or "label: 0" in input_label
or "label: no" in input_label
or "not semantically similar" in input_label
):
pred_label = "0"

if pred_label == "":
pred_label = None

return pred_label
58 changes: 58 additions & 0 deletions assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from llmebench.datasets import SemEval17T1STSDataset
from llmebench.models import FastChatModel
from llmebench.tasks import STSTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": SemEval17T1STSDataset,
"task": STSTask,
"model": FastChatModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
base_prompt = (
f"Given two sentences, produce a continuous valued similarity score on a "
f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are "
f"completely independent and 5 signifying semantic equivalence. The output "
f"should be exactly in form Similarity score =. \n{input_sample}"
)
return [
{
"role": "user",
"content": base_prompt,
},
]


def post_process(response):
raw_response = response["choices"][0]["message"]["content"]

if "Similarity score =" in raw_response:
pred_num = (
raw_response.split("Similarity score = ")[1]
.strip()
.split(" ")[0]
.rstrip(".")
)
score = float(pred_num)
else:
try:
pred_sum = float(raw_response)
score = pred_sum
except Exception as e:
score = None

return score
58 changes: 58 additions & 0 deletions assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from llmebench.datasets import SemEval17T2STSDataset
from llmebench.models import FastChatModel
from llmebench.tasks import STSTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "Locally hosted Jais-13b-chat model using FastChat.",
}


def config():
return {
"dataset": SemEval17T2STSDataset,
"task": STSTask,
"model": FastChatModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
bsae_prompt = (
f"Given two sentences, produce a continuous valued similarity score on a "
f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are "
f"completely independent and 5 signifying semantic equivalence. The output "
f"should be exactly in form Similarity score =. \n{input_sample}"
)
return [
{
"role": "user",
"content": bsae_prompt,
},
]


def post_process(response):
raw_response = response["choices"][0]["message"]["content"]

if "Similarity score =" in raw_response:
pred_num = (
raw_response.split("Similarity score = ")[1]
.strip()
.split(" ")[0]
.rstrip(".")
)
score = float(pred_num)
else:
try:
pred_sum = float(raw_response)
score = pred_sum
except Exception as e:
score = None

return score

0 comments on commit f48adce

Please sign in to comment.