-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Jais ZeroShot assets for semantic tasks (#256)
* Jais13b added for arabic semantics * Format code, remove unused imports and minor metadata changes --------- Co-authored-by: Fahim Imaduddin Dalvi <[email protected]>
- Loading branch information
Showing
4 changed files
with
230 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from llmebench.datasets import XNLIDataset | ||
from llmebench.models import FastChatModel | ||
from llmebench.tasks import XNLITask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "Jais-13b-chat", | ||
"description": "Locally hosted Jais-13b-chat model using FastChat.", | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": XNLIDataset, | ||
"task": XNLITask, | ||
"model": FastChatModel, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
sent1, sent2 = input_sample.split("\t") | ||
prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف." | ||
base_prompt = ( | ||
prompt_text | ||
+ "\nالفرضية المسبقة: " | ||
+ sent1 | ||
+ "\nالفرضية اللاحقة: " | ||
+ sent2 | ||
+ "\n" | ||
+ "التصنيف: " | ||
) | ||
|
||
return [ | ||
{ | ||
"role": "user", | ||
"content": base_prompt, | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
input_label = input_label.replace(".", "").strip().lower() | ||
|
||
if "غير معروف" in input_label or "حيادي" in input_label: | ||
pred_label = "neutral" | ||
elif "صحيح" in input_label or "تدل" in input_label: | ||
pred_label = "entailment" | ||
elif "خطأ" in input_label or "تناقض" in input_label: | ||
pred_label = "contradiction" | ||
else: | ||
print(input_label) | ||
pred_label = None | ||
|
||
return pred_label |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from llmebench.datasets import STSQ2QDataset | ||
from llmebench.models import FastChatModel | ||
from llmebench.tasks import Q2QSimDetectionTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "Jais-13b-chat", | ||
"description": "Locally hosted Jais-13b-chat model using FastChat.", | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": STSQ2QDataset, | ||
"task": Q2QSimDetectionTask, | ||
"model": FastChatModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
q1, q2 = input_sample.split("\t") | ||
input_sample = q1 + "\t" + q2 | ||
base_prompt = f"Are the following two questions semantically similar (i.e., asking for similar information)? The output should be exactly in form yes or no.\n\n{input_sample}" | ||
|
||
return [ | ||
{ | ||
"role": "user", | ||
"content": base_prompt, | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
input_label = input_label.replace(".", "").strip().lower() | ||
pred_label = "" | ||
|
||
if "yes" in input_label or "label: 1" in input_label: | ||
pred_label = "1" | ||
if ( | ||
input_label == "no" | ||
or input_label.startswith("no,") | ||
or "label: 0" in input_label | ||
or "label: no" in input_label | ||
or "not semantically similar" in input_label | ||
): | ||
pred_label = "0" | ||
|
||
if pred_label == "": | ||
pred_label = None | ||
|
||
return pred_label |
58 changes: 58 additions & 0 deletions
58
assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from llmebench.datasets import SemEval17T1STSDataset | ||
from llmebench.models import FastChatModel | ||
from llmebench.tasks import STSTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "Jais-13b-chat", | ||
"description": "Locally hosted Jais-13b-chat model using FastChat.", | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": SemEval17T1STSDataset, | ||
"task": STSTask, | ||
"model": FastChatModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
base_prompt = ( | ||
f"Given two sentences, produce a continuous valued similarity score on a " | ||
f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are " | ||
f"completely independent and 5 signifying semantic equivalence. The output " | ||
f"should be exactly in form Similarity score =. \n{input_sample}" | ||
) | ||
return [ | ||
{ | ||
"role": "user", | ||
"content": base_prompt, | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
raw_response = response["choices"][0]["message"]["content"] | ||
|
||
if "Similarity score =" in raw_response: | ||
pred_num = ( | ||
raw_response.split("Similarity score = ")[1] | ||
.strip() | ||
.split(" ")[0] | ||
.rstrip(".") | ||
) | ||
score = float(pred_num) | ||
else: | ||
try: | ||
pred_sum = float(raw_response) | ||
score = pred_sum | ||
except Exception as e: | ||
score = None | ||
|
||
return score |
58 changes: 58 additions & 0 deletions
58
assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from llmebench.datasets import SemEval17T2STSDataset | ||
from llmebench.models import FastChatModel | ||
from llmebench.tasks import STSTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "Jais-13b-chat", | ||
"description": "Locally hosted Jais-13b-chat model using FastChat.", | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": SemEval17T2STSDataset, | ||
"task": STSTask, | ||
"model": FastChatModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
bsae_prompt = ( | ||
f"Given two sentences, produce a continuous valued similarity score on a " | ||
f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are " | ||
f"completely independent and 5 signifying semantic equivalence. The output " | ||
f"should be exactly in form Similarity score =. \n{input_sample}" | ||
) | ||
return [ | ||
{ | ||
"role": "user", | ||
"content": bsae_prompt, | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
raw_response = response["choices"][0]["message"]["content"] | ||
|
||
if "Similarity score =" in raw_response: | ||
pred_num = ( | ||
raw_response.split("Similarity score = ")[1] | ||
.strip() | ||
.split(" ")[0] | ||
.rstrip(".") | ||
) | ||
score = float(pred_num) | ||
else: | ||
try: | ||
pred_sum = float(raw_response) | ||
score = pred_sum | ||
except Exception as e: | ||
score = None | ||
|
||
return score |