diff --git a/assets/ar/sentiment_emotion_others/sentiment/ArSAS_Llama_7b_chat_ZeroShot.py b/assets/ar/sentiment_emotion_others/sentiment/ArSAS_Llama_7b_chat_ZeroShot.py new file mode 100644 index 00000000..7b25e54a --- /dev/null +++ b/assets/ar/sentiment_emotion_others/sentiment/ArSAS_Llama_7b_chat_ZeroShot.py @@ -0,0 +1,51 @@ +from llmebench.datasets import ArSASDataset +from llmebench.models import FastChatModel +from llmebench.tasks import SentimentTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Llama-2-13b-chat-hf", + "description": "Locally hosted Llama-2-13b-chat hf model using FastChat. Poor performance is expected, since Llama 2 is not explicitly trained with Arabic data.", + "scores": {"Macro-F1": "0.106"}, + } + + +def config(): + return { + "dataset": ArSASDataset, + "task": SentimentTask, + "model": FastChatModel, + } + + +def prompt(input_sample): + return [ + { + "role": "system", + "content": "You are an AI assistant that helps people find information.", + }, + { + "role": "user", + "content": f'Classify the sentiment of the following sentence as "Positive", "Negative", "Neutral" or "Mixed". Output only the label and nothing else.\nSentence: {input_sample}\nLabel: ', + }, + ] + + +def post_process(response): + out = response["choices"][0]["message"]["content"] + out = out.strip().lower() + + if "i apologize" in out: + return None + + j = out.find("label:") + if j > 0: + out = out[j + len("label:") :] + else: + j = out.find(" is:\n\n") + if j > 0: + out = out[j + len(" is:\n\n") :] + out = out.strip().title() + return out diff --git a/assets/en/sentiment_emotion_others/sentiment/SST2_GPT4_ZeroShot.py b/assets/en/sentiment_emotion_others/sentiment/SST2_GPT4_ZeroShot.py new file mode 100644 index 00000000..544cb7ff --- /dev/null +++ b/assets/en/sentiment_emotion_others/sentiment/SST2_GPT4_ZeroShot.py @@ -0,0 +1,66 @@ +from llmebench.datasets import HuggingFaceDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import SentimentTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "gpt-4-32k (version 0314)", + "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.", + } + + +def config(): + return { + "dataset": HuggingFaceDataset, + "dataset_args": { + "huggingface_dataset_name": "sst2", + "column_mapping": { + "input": "sentence", + "label": "label", + "input_id": "idx", + }, + }, + "task": SentimentTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["positive", "negative"], + "max_tries": 3, + }, + "general_args": {"custom_test_split": "validation"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"You are tasked with analyzing the sentiment of the given sentence. " + f"Please read it carefully and determine whether the sentiment expressed is positive or negative. Provide only label.\n\n" + f"sentence: {input_sample}\n" + f"label:\n" + ) + return [ + { + "role": "system", + "content": "You are a data annotation expert specializing in sentiment analysis.", + }, + {"role": "user", "content": prompt_string}, + ] + + +def post_process(response): + if not response: + return None + label = response["choices"][0]["message"]["content"].lower() + + label_fixed = label.replace("label:", "").replace("sentiment: ", "").strip() + + if label_fixed.startswith("Please provide the text"): + label_fixed = None + + if label_fixed == "positive": + return 1 + elif label_fixed == "negative": + return 0 + + return None diff --git a/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py b/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py index 258c83be..ba5691a6 100644 --- a/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py +++ b/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py @@ -8,7 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Llama-2-13b-chat-hf", "description": "Locally hosted Llama-2-13b-chat hf model using FastChat.", - "scores": {"Accuracy": "0.861"}, + "scores": {"Accuracy": "0.924"}, } @@ -30,29 +30,43 @@ def config(): def prompt(input_sample): + prompt_string = ( + f"You are tasked with analyzing the sentiment of the given sentence. " + f"Please read it carefully and determine whether the sentiment expressed is positive or negative. Provide only label.\n\n" + f"sentence: {input_sample.strip()}\n" + f"label:\n" + ) return [ { "role": "system", - "content": "You are an AI assistant that helps people find information.", - }, - { - "role": "user", - "content": f'Classify the sentiment of the following sentence as "Positive" or "Negative". Output only the label and nothing else.\nSentence: {input_sample}\nLabel: ', + "content": "You are a data annotation expert specializing in sentiment analysis.", }, + {"role": "user", "content": prompt_string}, ] def post_process(response): - out = response["choices"][0]["message"]["content"] - out = out.strip().lower() - j = out.find("label:") - if j > 0: - out = out[j + len("label:") :] - out = out.strip().lower() - - if out == "positive": - return 1 - elif out == "negative": - return 0 + mapping = {"positive": 1, "negative": 0} + + pred_label = response["choices"][0]["message"]["content"].lower() + + if "\n\nlabel: negative" in pred_label: + pred_label = "negative" + elif "\n\nlabel: positive" in pred_label: + pred_label = "positive" + elif "\n\nlabel:" in pred_label: + pred_label = pred_label.split("\n\nlabel:")[1] + pred_label = pred_label.strip().lower() + if pred_label == "positive" or pred_label == "negative": + return mapping[pred_label] + elif "\n\nnegative" in pred_label: + pred_label = "negative" + elif "\n\npositive" in pred_label: + pred_label = "positive" + else: + pred_label = None + + if pred_label is not None: + return mapping[pred_label] else: return None diff --git a/llmebench/datasets/HuggingFace.py b/llmebench/datasets/HuggingFace.py index 56938d7b..07d04eb8 100644 --- a/llmebench/datasets/HuggingFace.py +++ b/llmebench/datasets/HuggingFace.py @@ -59,7 +59,7 @@ def get_data_sample(): def load_data(self, data_split, no_labels=False): dataset = datasets.load_dataset( - huggingface_dataset_name, split=data_split, cache_dir=self.data_dir + self.huggingface_dataset_name, split=data_split, cache_dir=self.data_dir ) data = []