From 0cc39d5a66bc49a0eda491e613ebc7548b95572a Mon Sep 17 00:00:00 2001
From: Basel Mousi <59998313+baselmousi@users.noreply.github.com>
Date: Sun, 27 Aug 2023 16:47:30 +0300
Subject: [PATCH] Update AraBench assets for ChatGPT and GPT4 (#179)

* modified machine translation assets

* removed private data files

* Format code

---------

Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
---
 .../MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py      | 14 ++++++--------
 .../MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py       | 15 +++++++--------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py
index 68db7bb7..1e2e343f 100644
--- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py
+++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py
@@ -44,11 +44,6 @@ def config():
         "madar.test.nil.0.sd",
         "madar.test.nil.1.eg",
         "madar.test.nil.2.eg",
-        "summa-2M.test.mgr.0.ma",
-        "summa-AJ.test.msa.0.ms",
-        "summa-BBC.test.msa.0.ms",
-        "summa-LBC.test.lev.0.lb",
-        "summa-Oman.test.glf.0.om",
     ]
     configs = []
     for testset in sets:
@@ -84,14 +79,17 @@ def prompt(input_sample):
     return [
         {
             "role": "system",
-            "content": "You are an AI assistant that helps people find information.",
+            "content": "You are an expert translator specialized in translating texts from Arabic to English. You are concise as you only output the translation of the text without any illustrations or extra details",
         },
         {
             "role": "user",
-            "content": f"Translate the following to English, output only the translation:\n {input_sample}",
+            "content": f"Translate the following text to English.\nText: {input_sample}\nTranslation: ",
         },
     ]
 
 
 def post_process(response):
-    return response["choices"][0]["message"]["content"]
+    response = response["choices"][0]["message"]["content"]
+    response = response.replace('"', "")
+    response = response.strip()
+    return response
diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py
index 147859b7..2631e402 100644
--- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py
+++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py
@@ -44,12 +44,8 @@ def config():
         "madar.test.nil.0.sd",
         "madar.test.nil.1.eg",
         "madar.test.nil.2.eg",
-        "summa-2M.test.mgr.0.ma",
-        "summa-AJ.test.msa.0.ms",
-        "summa-BBC.test.msa.0.ms",
-        "summa-LBC.test.lev.0.lb",
-        "summa-Oman.test.glf.0.om",
     ]
+
     configs = []
     for testset in sets:
         configs.append(
@@ -82,15 +78,18 @@ def config():
 
 def prompt(input_sample):
     return {
-        "system_message": "You are an AI assistant that helps people find information.",
+        "system_message": "You are an expert translator specialized in translating texts from Arabic to English. You are concise as you only output the translation of the text without any illustrations or extra details",
         "messages": [
             {
                 "sender": "user",
-                "text": f"Translate the following to English, output only the translation:\n {input_sample}",
+                "text": f"Translate the following text to English.\nText: {input_sample}\nTranslation: ",
             }
         ],
     }
 
 
 def post_process(response):
-    return response["choices"][0]["text"]
+    response = response["choices"][0]["text"]
+    response = response.replace('"', "")
+    response = response.strip()
+    return response