From 348f0fc6444dda9560a5898eaf977259e2643582 Mon Sep 17 00:00:00 2001 From: DJump13 Date: Thu, 9 May 2024 10:14:10 -0700 Subject: [PATCH] test 2 --- translator/libargos.py | 64 ++++++++++++++++++++ translator/libmarian.py | 22 +++++++ translator/optimizeLanguage.py | 33 ++++++++++ translator/server.py | 107 +++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 translator/libargos.py create mode 100644 translator/libmarian.py create mode 100644 translator/optimizeLanguage.py create mode 100644 translator/server.py diff --git a/translator/libargos.py b/translator/libargos.py new file mode 100644 index 000000000..a41359334 --- /dev/null +++ b/translator/libargos.py @@ -0,0 +1,64 @@ +import argostranslate.package +import argostranslate.settings +import os +from pathlib import Path + +PATH = os.getcwd() +ARGOS_PACKAGES_DIR=Path(f"{PATH}/translator/argos_models") + +#Download all available packages +def download_packages(): + if not ARGOS_PACKAGES_DIR.exists(): os.mkdir(ARGOS_PACKAGES_DIR) + argostranslate.settings.downloads_dir = ARGOS_PACKAGES_DIR + argostranslate.package.update_package_index() + available_packages = argostranslate.package.get_available_packages() + for package in available_packages: + package_name = package.code + ".argosmodel" + if package_name not in os.listdir(ARGOS_PACKAGES_DIR): + package.download() + + +#returns list of installed_packages with names in format matching the filenames +def get_installed_package_names(): + models = [] + installed_packages = argostranslate.package.get_installed_packages() + for package in installed_packages: + model_name = f"translate-{package.from_code}_{package.to_code}.argosmodel" + models.append(model_name) + return models + +#Installs all packages from local directory +def install_packages(): + installed_packages = get_installed_package_names() + for filename in os.listdir(ARGOS_PACKAGES_DIR): + if filename not in installed_packages: + file = os.path.join(ARGOS_PACKAGES_DIR, filename) + argostranslate.package.install_from_path(file) + return argostranslate.package.get_installed_packages() + +#Uninstall all packages +def uninstall_all_packages(): + installed = argostranslate.package.get_installed_packages() + for package in installed: + argostranslate.package.uninstall(package) + +#update all installed packages +def update_packages(): + installed = install_packages() + for package in installed: + package.update() + +#displays all installed packages +def display_installed_packages(): + installed = install_packages() + list = {} + for package in installed: + list.update({package.to_code:package.to_name}) + list.update({package.from_code:package.from_name}) + for code in list: + print(f"{code}, {list[code]}") + +if __name__ == "__main__": + download_packages() + display_installed_packages() + diff --git a/translator/libmarian.py b/translator/libmarian.py new file mode 100644 index 000000000..9f17e112e --- /dev/null +++ b/translator/libmarian.py @@ -0,0 +1,22 @@ +from transformers import MarianMTModel, MarianTokenizer +from typing import Sequence +import os +PATH = os.getcwd() + +def download_package(src, dst): + print(f"Downloading {src}-{dst}...") + model_name = f"Helsinki-NLP/opus-mt-{src}-{dst}" + try: + tokenizer = MarianTokenizer.from_pretrained(model_name) + tokenizer.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") + model = MarianMTModel.from_pretrained(model_name) + model.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") + except OSError: + print("Package not found") + +def package_downloaded(src, dst): + package_name = f"opus-mt-{src}-{dst}" + os.makedirs(f"{PATH}/translator/marian_models", exist_ok=True) + if package_name in os.listdir(f"{PATH}/translator/marian_models"): + return True + return False \ No newline at end of file diff --git a/translator/optimizeLanguage.py b/translator/optimizeLanguage.py new file mode 100644 index 000000000..5553dd834 --- /dev/null +++ b/translator/optimizeLanguage.py @@ -0,0 +1,33 @@ +import sys +import libargos as argos +import libmarian as marian + +def optimize_path(src, dest): + #download all available argos packages + argos.download_packages() + + #check if installed as argos + argos_packages = argos.install_packages() + for package in argos_packages: + if(package.from_code == src and package.to_code == dest): + return + + #check if present as marian + if(marian.package_downloaded(src, dest)): + return + + #download marian package + marian.download_package(src, dest) + +def main(): + lang_one = sys.argv[1] + lang_two = sys.argv[2] + optimize_path(lang_one, lang_two) + optimize_path(lang_two, lang_one) + print(f"{lang_one} and {lang_two} Optimized") + +if __name__ == "__main__": + main() + + + diff --git a/translator/server.py b/translator/server.py new file mode 100644 index 000000000..584efbb1a --- /dev/null +++ b/translator/server.py @@ -0,0 +1,107 @@ +import os +import sys +import json +import argostranslate.package +import argostranslate.translate +from functools import cached_property +from http.server import BaseHTTPRequestHandler +from urllib.parse import parse_qsl, urlparse +from http.server import HTTPServer +from pathlib import Path +from transformers import MarianMTModel, MarianTokenizer +from typing import Sequence +from libargos import install_packages +import socket +import time + +PORTS = [8000, 5000, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008] +TIMEOUT = 3600 +PATH = os.getcwd() + + + +class MarianModel: + def __init__(self, source_lang: str, dest_lang: str) -> None: + path = f"{PATH}/translator/marian_models/opus-mt-{source_lang}-{dest_lang}" + self.model = MarianMTModel.from_pretrained(path, local_files_only = True) + self.tokenizer = MarianTokenizer.from_pretrained(path, local_files_only = True) + + def translate(self, texts: Sequence[str]) -> Sequence[str]: + tokens = self.tokenizer(list(texts), return_tensors="pt", padding=True) + translate_tokens = self.model.generate(**tokens) + return [self.tokenizer.decode(t, skip_special_tokens=True) for t in translate_tokens] + +class WebRequestHandler(BaseHTTPRequestHandler): + @cached_property + def url(self): + return urlparse(self.path) + + @cached_property + def query_data(self): + return dict(parse_qsl(self.url.query)) + + @cached_property + def translate_data(self): + text = self.query_data['text'] + from_code = self.query_data['from'] + to_code = self.query_data['to'] + + # Use Argos if Language Package Exists + if Path(f"{PATH}/translator/argos_models/translate-{from_code}_{to_code}.argosmodel").exists(): + translatedText = argostranslate.translate.translate(text, from_code, to_code) + return translatedText + # Use Marian if Language Package Exists in Marian but not Argos + elif Path(f"{PATH}/translator/marian_models/opus-mt-{from_code}-{to_code}").exists(): + marian = MarianModel(from_code, to_code) + translatedText = marian.translate([text]) + return translatedText[0] + # Use Argos "English in the Middle" if not in Argos and Marian by Default + elif (Path(f"{PATH}/translator/argos_models/translate-{from_code}_en.argosmodel").exists() and \ + Path(f"{PATH}/translator/argos_models/translate-{to_code}_en.argosmodel").exists()) or \ + (Path(f"{PATH}/translator/argos_models/translate-en_{from_code}.argosmodel").exists() and \ + Path(f"{PATH}/translator/argos_models/translate-en_{to_code}.argosmodel").exists()): + translatedText = argostranslate.translate.translate(text, from_code, to_code) + return translatedText + # If a package doesn't exist + else: + return "Translation Unavailable:" + from_code + to_code + + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(self.get_response().encode("utf-8")) + + def get_response(self): + return json.dumps( + { + "translate_data" : self.translate_data if self.query_data else "", + }, + ensure_ascii=False + ) + + +def port_open(port): + #connect_ex returns 0 if it connects to a socket meaning port is closed + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) != 0 + +def start_server(port): + try: + server = HTTPServer(("127.0.0.1", port), WebRequestHandler) + server.timeout = TIMEOUT + server.handle_timeout = lambda: (_ for _ in ()).throw(TimeoutError()) + print(f"Serving at port: {port}", file=sys.stderr) + print(f"Server started at {time.strftime('%I:%M')} with timeout: {TIMEOUT} seconds", file=sys.stderr) + while(True): server.handle_request() + except TimeoutError: + print("Translation server timed out") + sys.exit() + +if __name__ == "__main__": + install_packages() + for port in PORTS: + if(port_open(port)): + start_server(port) + +