Add Llama 2 assets for SST2 and ArSAS (#245)

Add Llama 2 assets for SST2 and ArSAS, and GPT4 asset for SST2. * Add ArSAS sentiment asset for Llama * Add GPT4 asset for SST2 * Fix bug in HuggingFace loader * Update Llama2 asset for SST2 * Fix GPT4 asset --------- Co-authored-by: Firoj Alam <[email protected]>
qcri · Oct 28, 2023 · 9b9de5a · 9b9de5a
1 parent 4bd995d
commit 9b9de5a
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 18 deletions.
diff --git a/assets/ar/sentiment_emotion_others/sentiment/ArSAS_Llama_7b_chat_ZeroShot.py b/assets/ar/sentiment_emotion_others/sentiment/ArSAS_Llama_7b_chat_ZeroShot.py
@@ -0,0 +1,51 @@
+from llmebench.datasets import ArSASDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Llama-2-13b-chat-hf",
+        "description": "Locally hosted Llama-2-13b-chat hf model using FastChat. Poor performance is expected, since Llama 2 is not explicitly trained with Arabic data.",
+        "scores": {"Macro-F1": "0.106"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArSASDataset,
+        "task": SentimentTask,
+        "model": FastChatModel,
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "system",
+            "content": "You are an AI assistant that helps people find information.",
+        },
+        {
+            "role": "user",
+            "content": f'Classify the sentiment of the following sentence as "Positive", "Negative", "Neutral" or "Mixed". Output only the label and nothing else.\nSentence: {input_sample}\nLabel: ',
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    out = out.strip().lower()
+
+    if "i apologize" in out:
+        return None
+
+    j = out.find("label:")
+    if j > 0:
+        out = out[j + len("label:") :]
+    else:
+        j = out.find(" is:\n\n")
+        if j > 0:
+            out = out[j + len(" is:\n\n") :]
+    out = out.strip().title()
+    return out
diff --git a/assets/en/sentiment_emotion_others/sentiment/SST2_GPT4_ZeroShot.py b/assets/en/sentiment_emotion_others/sentiment/SST2_GPT4_ZeroShot.py
@@ -0,0 +1,66 @@
+from llmebench.datasets import HuggingFaceDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+    }
+
+
+def config():
+    return {
+        "dataset": HuggingFaceDataset,
+        "dataset_args": {
+            "huggingface_dataset_name": "sst2",
+            "column_mapping": {
+                "input": "sentence",
+                "label": "label",
+                "input_id": "idx",
+            },
+        },
+        "task": SentimentTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["positive", "negative"],
+            "max_tries": 3,
+        },
+        "general_args": {"custom_test_split": "validation"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"You are tasked with analyzing the sentiment of the given sentence. "
+        f"Please read it carefully and determine whether the sentiment expressed is positive or negative. Provide only label.\n\n"
+        f"sentence: {input_sample}\n"
+        f"label:\n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "You are a data annotation expert specializing in sentiment analysis.",
+        },
+        {"role": "user", "content": prompt_string},
+    ]
+
+
+def post_process(response):
+    if not response:
+        return None
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label_fixed = label.replace("label:", "").replace("sentiment: ", "").strip()
+
+    if label_fixed.startswith("Please provide the text"):
+        label_fixed = None
+
+    if label_fixed == "positive":
+        return 1
+    elif label_fixed == "negative":
+        return 0
+
+    return None
diff --git a/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py b/assets/en/sentiment_emotion_others/sentiment/SST2_Llama_7b_chat_ZeroShot.py
@@ -8,7 +8,7 @@ def metadata():
         "author": "Arabic Language Technologies, QCRI, HBKU",
         "model": "Llama-2-13b-chat-hf",
         "description": "Locally hosted Llama-2-13b-chat hf model using FastChat.",
-        "scores": {"Accuracy": "0.861"},
+        "scores": {"Accuracy": "0.924"},
     }
 
 
@@ -30,29 +30,43 @@ def config():
 
 
 def prompt(input_sample):
+    prompt_string = (
+        f"You are tasked with analyzing the sentiment of the given sentence. "
+        f"Please read it carefully and determine whether the sentiment expressed is positive or negative. Provide only label.\n\n"
+        f"sentence: {input_sample.strip()}\n"
+        f"label:\n"
+    )
     return [
         {
             "role": "system",
-            "content": "You are an AI assistant that helps people find information.",
-        },
-        {
-            "role": "user",
-            "content": f'Classify the sentiment of the following sentence as "Positive" or "Negative". Output only the label and nothing else.\nSentence: {input_sample}\nLabel: ',
+            "content": "You are a data annotation expert specializing in sentiment analysis.",
         },
+        {"role": "user", "content": prompt_string},
     ]
 
 
 def post_process(response):
-    out = response["choices"][0]["message"]["content"]
-    out = out.strip().lower()
-    j = out.find("label:")
-    if j > 0:
-        out = out[j + len("label:") :]
-    out = out.strip().lower()
-
-    if out == "positive":
-        return 1
-    elif out == "negative":
-        return 0
+    mapping = {"positive": 1, "negative": 0}
+
+    pred_label = response["choices"][0]["message"]["content"].lower()
+
+    if "\n\nlabel: negative" in pred_label:
+        pred_label = "negative"
+    elif "\n\nlabel: positive" in pred_label:
+        pred_label = "positive"
+    elif "\n\nlabel:" in pred_label:
+        pred_label = pred_label.split("\n\nlabel:")[1]
+        pred_label = pred_label.strip().lower()
+    if pred_label == "positive" or pred_label == "negative":
+        return mapping[pred_label]
+    elif "\n\nnegative" in pred_label:
+        pred_label = "negative"
+    elif "\n\npositive" in pred_label:
+        pred_label = "positive"
+    else:
+        pred_label = None
+
+    if pred_label is not None:
+        return mapping[pred_label]
     else:
         return None
diff --git a/llmebench/datasets/HuggingFace.py b/llmebench/datasets/HuggingFace.py
@@ -59,7 +59,7 @@ def get_data_sample():
 
     def load_data(self, data_split, no_labels=False):
         dataset = datasets.load_dataset(
-            huggingface_dataset_name, split=data_split, cache_dir=self.data_dir
+            self.huggingface_dataset_name, split=data_split, cache_dir=self.data_dir
         )
 
         data = []