diff --git a/translator/libargos.py b/translator/libargos.py deleted file mode 100644 index a41359334..000000000 --- a/translator/libargos.py +++ /dev/null @@ -1,64 +0,0 @@ -import argostranslate.package -import argostranslate.settings -import os -from pathlib import Path - -PATH = os.getcwd() -ARGOS_PACKAGES_DIR=Path(f"{PATH}/translator/argos_models") - -#Download all available packages -def download_packages(): - if not ARGOS_PACKAGES_DIR.exists(): os.mkdir(ARGOS_PACKAGES_DIR) - argostranslate.settings.downloads_dir = ARGOS_PACKAGES_DIR - argostranslate.package.update_package_index() - available_packages = argostranslate.package.get_available_packages() - for package in available_packages: - package_name = package.code + ".argosmodel" - if package_name not in os.listdir(ARGOS_PACKAGES_DIR): - package.download() - - -#returns list of installed_packages with names in format matching the filenames -def get_installed_package_names(): - models = [] - installed_packages = argostranslate.package.get_installed_packages() - for package in installed_packages: - model_name = f"translate-{package.from_code}_{package.to_code}.argosmodel" - models.append(model_name) - return models - -#Installs all packages from local directory -def install_packages(): - installed_packages = get_installed_package_names() - for filename in os.listdir(ARGOS_PACKAGES_DIR): - if filename not in installed_packages: - file = os.path.join(ARGOS_PACKAGES_DIR, filename) - argostranslate.package.install_from_path(file) - return argostranslate.package.get_installed_packages() - -#Uninstall all packages -def uninstall_all_packages(): - installed = argostranslate.package.get_installed_packages() - for package in installed: - argostranslate.package.uninstall(package) - -#update all installed packages -def update_packages(): - installed = install_packages() - for package in installed: - package.update() - -#displays all installed packages -def display_installed_packages(): - installed = install_packages() - list = {} - for package in installed: - list.update({package.to_code:package.to_name}) - list.update({package.from_code:package.from_name}) - for code in list: - print(f"{code}, {list[code]}") - -if __name__ == "__main__": - download_packages() - display_installed_packages() - diff --git a/translator/libmarian.py b/translator/libmarian.py deleted file mode 100644 index 58a5ee5e1..000000000 --- a/translator/libmarian.py +++ /dev/null @@ -1,22 +0,0 @@ -from transformers import MarianMTModel, MarianTokenizer -from typing import Sequence -import os -PATH = os.getcwd() - -def download_package(src, dst): - print(f"Downloading {src}-{dst}...") - model_name = f"Helsinki-NLP/opus-mt-{src}-{dst}" - try: - tokenizer = MarianTokenizer.from_pretrained(model_name) - tokenizer.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") - model = MarianMTModel.from_pretrained(model_name) - model.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") - except OSError: - print("Package not found") - -def package_downloaded(src, dst): - package_name = f"opus-mt-{src}-{dst}" - os.makedirs(f"{PATH}/translator/marian_models", exist_ok=True) - if package_name in os.listdir(f"{PATH}/translator/marian_models"): - return True - return False \ No newline at end of file diff --git a/translator/optimizeLanguage.py b/translator/optimizeLanguage.py deleted file mode 100644 index e9e5f5e35..000000000 --- a/translator/optimizeLanguage.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import libargos as argos -import libmarian as marian - -def optimize_path(src, dest): - #download all available argos packages - argos.download_packages() - - #check if installed as argos - argos_packages = argos.install_packages() - for package in argos_packages: - if(package.from_code == src and package.to_code == dest): - return - - #check if present as marian - if(marian.package_downloaded(src, dest)): - return - - #download marian package - marian.download_package(src, dest) - -def main(): - lang_one = sys.argv[1] - lang_two = sys.argv[2] - optimize_path(lang_one, lang_two) - optimize_path(lang_two, lang_one) - print(f"{lang_one} and {lang_two} Optimized") - -if __name__ == "__main__": - main() - - - diff --git a/translator/server.py b/translator/server.py deleted file mode 100644 index c2ba7ba49..000000000 --- a/translator/server.py +++ /dev/null @@ -1,107 +0,0 @@ -import os -import sys -import json -import argostranslate.package -import argostranslate.translate -from functools import cached_property -from http.server import BaseHTTPRequestHandler -from urllib.parse import parse_qsl, urlparse -from http.server import HTTPServer -from pathlib import Path -from transformers import MarianMTModel, MarianTokenizer -from typing import Sequence -from libargos import install_packages -import socket -import time - -PORTS = [8000, 5000, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008] -TIMEOUT = 3600 -PATH = os.getcwd() - - - -class MarianModel: - def __init__(self, source_lang: str, dest_lang: str) -> None: - path = f"{PATH}/translator/marian_models/opus-mt-{source_lang}-{dest_lang}" - self.model = MarianMTModel.from_pretrained(path, local_files_only = True) - self.tokenizer = MarianTokenizer.from_pretrained(path, local_files_only = True) - - def translate(self, texts: Sequence[str]) -> Sequence[str]: - tokens = self.tokenizer(list(texts), return_tensors="pt", padding=True) - translate_tokens = self.model.generate(**tokens) - return [self.tokenizer.decode(t, skip_special_tokens=True) for t in translate_tokens] - -class WebRequestHandler(BaseHTTPRequestHandler): - @cached_property - def url(self): - return urlparse(self.path) - - @cached_property - def query_data(self): - return dict(parse_qsl(self.url.query)) - - @cached_property - def translate_data(self): - text = self.query_data['text'] - from_code = self.query_data['from'] - to_code = self.query_data['to'] - - # Use Argos if Language Package Exists - if Path(f"{PATH}/translator/argos_models/translate-{from_code}_{to_code}.argosmodel").exists(): - translatedText = argostranslate.translate.translate(text, from_code, to_code) - return translatedText - # Use Marian if Language Package Exists in Marian but not Argos - elif Path(f"{PATH}/translator/marian_models/opus-mt-{from_code}-{to_code}").exists(): - marian = MarianModel(from_code, to_code) - translatedText = marian.translate([text]) - return translatedText[0] - # Use Argos "English in the Middle" if not in Argos and Marian by Default - elif (Path(f"{PATH}/translator/argos_models/translate-{from_code}_en.argosmodel").exists() and \ - Path(f"{PATH}/translator/argos_models/translate-{to_code}_en.argosmodel").exists()) or \ - (Path(f"{PATH}/translator/argos_models/translate-en_{from_code}.argosmodel").exists() and \ - Path(f"{PATH}/translator/argos_models/translate-en_{to_code}.argosmodel").exists()): - translatedText = argostranslate.translate.translate(text, from_code, to_code) - return translatedText - # If a package doesn't exist - else: - return "Translation Unavailable:" + from_code + to_code - - def do_GET(self): - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.end_headers() - self.wfile.write(self.get_response().encode("utf-8")) - - def get_response(self): - return json.dumps( - { - "translate_data" : self.translate_data if self.query_data else "", - }, - ensure_ascii=False - ) - - -def port_open(port): - #connect_ex returns 0 if it connects to a socket meaning port is closed - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(('localhost', port)) != 0 - -def start_server(port): - try: - server = HTTPServer(("127.0.0.1", port), WebRequestHandler) - server.timeout = TIMEOUT - server.handle_timeout = lambda: (_ for _ in ()).throw(TimeoutError()) - print(f"Serving at port: {port}", file=sys.stderr) - print(f"Server started at {time.strftime('%I:%M')} with timeout: {TIMEOUT} seconds", file=sys.stderr) - while(True): server.handle_request() - except TimeoutError: - print("Translation server timed out") - sys.exit() - -if __name__ == "__main__": - install_packages() - for port in PORTS: - if(port_open(port)): - start_server(port) - -