qcri · AridHasan · Dec 14, 2023 · Dec 22, 2023 · Dec 23, 2023 · Dec 23, 2023
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,7 @@ data
 
 # Tests
 .coverage
+.coverage.Arid.local.57674.812565
 
 
 # IDE stuff

diff --git a/assets/ar/sentiment_emotion_others/emotion/Emotion_JAIS13b_ZeroShot.py b/assets/ar/sentiment_emotion_others/emotion/Emotion_JAIS13b_ZeroShot.py
@@ -0,0 +1,80 @@
+from llmebench.datasets import EmotionDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import EmotionTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "JAIS-13b",
+        "description": "Locally hosted JAIS-13b-chat model using FastChat.",
+        "scores": {"Macro-F1": ""},
+    }
+
+
+def config():
+    return {
+        "dataset": EmotionDataset,
+        "task": EmotionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "anger",
+                "disgust",
+                "fear",
+                "joy",
+                "love",
+                "optimism",
+                "pessimism",
+                "sadness",
+                "surprise",
+                "trust",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Predict all the possible emotions in the following Arabic sentence without explanation and put them in a Python list. List of emotions is: anger, anticipation, disgust, fear, joy, love, optimism, pessimism, sadness, surprise, and trust.\n "
+        f"Sentence: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+emotions_positions = {
+    "anger": 0,
+    "anticipation": 1,
+    "disgust": 2,
+    "fear": 3,
+    "joy": 4,
+    "love": 5,
+    "optimism": 6,
+    "pessimism": 7,
+    "sadness": 8,
+    "surprise": 9,
+    "trust": 10,
+}
+
+
+def emotions_array(labels):
+    labels_arr = []
+    for x, y in emotions_positions.items():
+        v = 0
+        if x.lower() in labels:
+            v = 1
+        labels_arr.append(v)
+    return labels_arr
+
+
+def post_process(response):
+    out = emotions_array(response["choices"][0]["message"]["content"])
+
+    return out
diff --git a/assets/bn/news_classification/fake_news/BanFakeNews_BLOOMZ_ZeroShot.py b/assets/bn/news_classification/fake_news/BanFakeNews_BLOOMZ_ZeroShot.py
@@ -0,0 +1,40 @@
+from llmebench.datasets import BanFakeNewsDataset
+from llmebench.models import PetalsModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "bloomz-176b (8bit quantized)",
+        "description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
+    }
+
+
+def config():
+    return {
+        "dataset": BanFakeNewsDataset,
+        "task": SentimentTask,
+        "model": PetalsModel,
+        "model_args": {
+            "class_labels": ["True", "Clickbaits", "Satire", "Fake"],
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = f"""Label the following news as True, Clickbaits, Satire, or Fake. Provide only the label as your response. 
+
+        news: {input_sample}
+
+        label: """
+
+    return {"prompt": prompt_string}
+
+
+def post_process(response):
+    label = response["outputs"].strip()
+    label = label.replace("<s>", "").replace("</s>", "").strip()
+
+    return label
diff --git a/assets/bn/news_classification/fake_news/BanFakeNews_GPT4_FewShot.py b/assets/bn/news_classification/fake_news/BanFakeNews_GPT4_FewShot.py
@@ -0,0 +1,70 @@
+from llmebench.datasets import BanFakeNewsDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-3.5-turbo",
+        "description": "gpt-3.5-turbo model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+    }
+
+
+def config():
+    return {
+        "dataset": BanFakeNewsDataset,
+        "task": SentimentTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["True", "Clickbaits", "Satire", "Fake"],
+            "max_tries": 20,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "news: "
+            + example["input"]
+            + "\nlabel: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + "news: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = 'Annotate the "news" into "one" of the following categories: "True", "Clickbaits", "Satire", or "Fake".'
+    return [
+        {
+            "role": "system",
+            "content": f"You are a expert annotator. Your task is to analyze the news and identify the appropriate category of the news.\n",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label_fixed = label.replace("label:", "").strip()
+    if label_fixed.startswith("Please provide the text"):
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/bn/news_classification/fake_news/BanFakeNews_GPT4_ZeroShot.py b/assets/bn/news_classification/fake_news/BanFakeNews_GPT4_ZeroShot.py
@@ -0,0 +1,54 @@
+from llmebench.datasets import BanFakeNewsDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-3.5-turbo",
+        "description": "GPT3.5 Turbo 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+    }
+
+
+def config():
+    return {
+        "dataset": BanFakeNewsDataset,
+        "task": SentimentTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["True", "Clickbaits", "Satire", "Fake"],
+            "max_tries": 20,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = f"""Based on the content of the news, please classify it as either "True", "Clickbaits", "Satire", or "Fake". Provide only the label as your response. 
+
+        news: {input_sample}
+
+        label: """
+
+    return [
+        {
+            "role": "system",
+            "content": "You are a expert annotator. Your task is to analyze the news and identify the appropriate category of the news.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    if not response:
+        return None
+    label = response["choices"][0]["message"]["content"]
+
+    label_fixed = label.replace("label:", "").strip()
+    if label_fixed.startswith("Please provide the text"):
+        label_fixed = None
+
+    return label_fixed
diff --git a/...ts/bn/news_classification/news_categorization/BanglaNewsCategorization_BLOOMZ_ZeroShot.py b/...ts/bn/news_classification/news_categorization/BanglaNewsCategorization_BLOOMZ_ZeroShot.py
@@ -0,0 +1,40 @@
+from llmebench.datasets import BanglaNewsCategorizationDataset
+from llmebench.models import PetalsModel
+from llmebench.tasks import SentimentTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "bloomz-176b (8bit quantized)",
+        "description": "Locally hosted BLOOMZ 176b model (8 bit quantized version) using the Petals.",
+    }
+
+
+def config():
+    return {
+        "dataset": BanglaNewsCategorizationDataset,
+        "task": SentimentTask,
+        "model": PetalsModel,
+        "model_args": {
+            "class_labels": ['entertainment', 'state', 'sports', 'national', 'kolkata', 'international'],
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = f"""Label the following news as 'entertainment', 'state', 'sports', 'national', 'kolkata', or 'international'. Provide only the label as your response. 
+
+        news: {input_sample}
+
+        label: """
+
+    return {"prompt": prompt_string}
+
+
+def post_process(response):
+    label = response["outputs"].strip()
+    label = label.replace("<s>", "").replace("</s>", "").strip()
+
+    return label
diff --git a/assets/bn/news_classification/news_categorization/BanglaNewsCategorization_GPT4_FewShot.py b/assets/bn/news_classification/news_categorization/BanglaNewsCategorization_GPT4_FewShot.py
@@ -0,0 +1,71 @@
+from llmebench.datasets import BanglaNewsCategorizationDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SentimentTask
+import os
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-3.5-turbo",
+        "description": "gpt-3.5-turbo model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+    }
+
+
+def config():
+    return {
+        "dataset": BanglaNewsCategorizationDataset,
+        "task": SentimentTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ['entertainment', 'state', 'sports', 'national', 'kolkata', 'international'],
+            "max_tries": 20,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "news: "
+            + example["input"]
+            + "\nlabel: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + "news: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = 'Annotate the "news" into "one" of the following categories: "entertainment", "state", "sports", "national", "kolkata", or "international"'
+    return [
+        {
+            "role": "system",
+            "content": f"You are a expert annotator. Your task is to analyze the news and identify the appropriate category of the news.\n",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label_fixed = label.replace("label:", "").strip()
+    if label_fixed.startswith("Please provide the text"):
+        label_fixed = None
+
+    return label_fixed
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ data @@
     # Tests
     .coverage
+    .coverage.Arid.local.57674.812565
     # IDE stuff
@@ Expand Down @@