From f48adcee6d0a9a119f6991e492ecf98edfd9ba80 Mon Sep 17 00:00:00 2001 From: Arid Hasan <18038960+AridHasan@users.noreply.github.com> Date: Wed, 31 Jan 2024 06:51:06 -0400 Subject: [PATCH] Add Jais ZeroShot assets for semantic tasks (#256) * Jais13b added for arabic semantics * Format code, remove unused imports and minor metadata changes --------- Co-authored-by: Fahim Imaduddin Dalvi --- .../ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py | 57 ++++++++++++++++++ .../semantics/STS/Q2QSim_JAIS13b_ZeroShot.py | 57 ++++++++++++++++++ .../STS/SemEval17T1STS_JAIS13b_ZeroShot.py | 58 +++++++++++++++++++ .../STS/SemEval17T2STS_JAIS13b_ZeroShot.py | 58 +++++++++++++++++++ 4 files changed, 230 insertions(+) create mode 100644 assets/ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py create mode 100644 assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py create mode 100644 assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py create mode 100644 assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py diff --git a/assets/ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py b/assets/ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..022bff8d --- /dev/null +++ b/assets/ar/semantics/NLI/XNLI_JAIS13b_ZeroShot.py @@ -0,0 +1,57 @@ +from llmebench.datasets import XNLIDataset +from llmebench.models import FastChatModel +from llmebench.tasks import XNLITask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "Locally hosted Jais-13b-chat model using FastChat.", + } + + +def config(): + return { + "dataset": XNLIDataset, + "task": XNLITask, + "model": FastChatModel, + } + + +def prompt(input_sample): + sent1, sent2 = input_sample.split("\t") + prompt_text = "نقدم لك جملتين تمثلان فرضيتين. مهمتك هي تصنيف الفرضية اللاحقة بالنسبة للفرضية المسبقة تبعاً لواحدة من هذه التصنيفات: صحيح (الفرضية اللاحقة تدل على نفس الفرضية المسبقة)، خطأ (الفرضية اللاحقة تناقض الفرضية المسبقة)، أو غير معروف (حيادي). يجب أن يقتصر ردك على واحدة من هذه التصنيفات: صحيح، خطأ، أو غير معروف." + base_prompt = ( + prompt_text + + "\nالفرضية المسبقة: " + + sent1 + + "\nالفرضية اللاحقة: " + + sent2 + + "\n" + + "التصنيف: " + ) + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if "غير معروف" in input_label or "حيادي" in input_label: + pred_label = "neutral" + elif "صحيح" in input_label or "تدل" in input_label: + pred_label = "entailment" + elif "خطأ" in input_label or "تناقض" in input_label: + pred_label = "contradiction" + else: + print(input_label) + pred_label = None + + return pred_label diff --git a/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py b/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..0a06b019 --- /dev/null +++ b/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py @@ -0,0 +1,57 @@ +from llmebench.datasets import STSQ2QDataset +from llmebench.models import FastChatModel +from llmebench.tasks import Q2QSimDetectionTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "Locally hosted Jais-13b-chat model using FastChat.", + } + + +def config(): + return { + "dataset": STSQ2QDataset, + "task": Q2QSimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + q1, q2 = input_sample.split("\t") + input_sample = q1 + "\t" + q2 + base_prompt = f"Are the following two questions semantically similar (i.e., asking for similar information)? The output should be exactly in form yes or no.\n\n{input_sample}" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + pred_label = "" + + if "yes" in input_label or "label: 1" in input_label: + pred_label = "1" + if ( + input_label == "no" + or input_label.startswith("no,") + or "label: 0" in input_label + or "label: no" in input_label + or "not semantically similar" in input_label + ): + pred_label = "0" + + if pred_label == "": + pred_label = None + + return pred_label diff --git a/assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py b/assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..f3bdcd6d --- /dev/null +++ b/assets/ar/semantics/STS/SemEval17T1STS_JAIS13b_ZeroShot.py @@ -0,0 +1,58 @@ +from llmebench.datasets import SemEval17T1STSDataset +from llmebench.models import FastChatModel +from llmebench.tasks import STSTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "Locally hosted Jais-13b-chat model using FastChat.", + } + + +def config(): + return { + "dataset": SemEval17T1STSDataset, + "task": STSTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + base_prompt = ( + f"Given two sentences, produce a continuous valued similarity score on a " + f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are " + f"completely independent and 5 signifying semantic equivalence. The output " + f"should be exactly in form Similarity score =. \n{input_sample}" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + raw_response = response["choices"][0]["message"]["content"] + + if "Similarity score =" in raw_response: + pred_num = ( + raw_response.split("Similarity score = ")[1] + .strip() + .split(" ")[0] + .rstrip(".") + ) + score = float(pred_num) + else: + try: + pred_sum = float(raw_response) + score = pred_sum + except Exception as e: + score = None + + return score diff --git a/assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py b/assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..9f377eb3 --- /dev/null +++ b/assets/ar/semantics/STS/SemEval17T2STS_JAIS13b_ZeroShot.py @@ -0,0 +1,58 @@ +from llmebench.datasets import SemEval17T2STSDataset +from llmebench.models import FastChatModel +from llmebench.tasks import STSTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "Locally hosted Jais-13b-chat model using FastChat.", + } + + +def config(): + return { + "dataset": SemEval17T2STSDataset, + "task": STSTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + } + + +def prompt(input_sample): + bsae_prompt = ( + f"Given two sentences, produce a continuous valued similarity score on a " + f"scale from 0 to 5, with 0 indicating that the semantics of the sentences are " + f"completely independent and 5 signifying semantic equivalence. The output " + f"should be exactly in form Similarity score =. \n{input_sample}" + ) + return [ + { + "role": "user", + "content": bsae_prompt, + }, + ] + + +def post_process(response): + raw_response = response["choices"][0]["message"]["content"] + + if "Similarity score =" in raw_response: + pred_num = ( + raw_response.split("Similarity score = ")[1] + .strip() + .split(" ")[0] + .rstrip(".") + ) + score = float(pred_num) + else: + try: + pred_sum = float(raw_response) + score = pred_sum + except Exception as e: + score = None + + return score