-
Notifications
You must be signed in to change notification settings - Fork 207
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import argostranslate.package | ||
import argostranslate.settings | ||
import os | ||
from pathlib import Path | ||
|
||
PATH = os.getcwd() | ||
ARGOS_PACKAGES_DIR=Path(f"{PATH}/translator/argos_models") | ||
|
||
#Download all available packages | ||
def download_packages(): | ||
if not ARGOS_PACKAGES_DIR.exists(): os.mkdir(ARGOS_PACKAGES_DIR) | ||
argostranslate.settings.downloads_dir = ARGOS_PACKAGES_DIR | ||
argostranslate.package.update_package_index() | ||
available_packages = argostranslate.package.get_available_packages() | ||
for package in available_packages: | ||
package_name = package.code + ".argosmodel" | ||
if package_name not in os.listdir(ARGOS_PACKAGES_DIR): | ||
package.download() | ||
|
||
|
||
#returns list of installed_packages with names in format matching the filenames | ||
def get_installed_package_names(): | ||
models = [] | ||
installed_packages = argostranslate.package.get_installed_packages() | ||
for package in installed_packages: | ||
model_name = f"translate-{package.from_code}_{package.to_code}.argosmodel" | ||
models.append(model_name) | ||
return models | ||
|
||
#Installs all packages from local directory | ||
def install_packages(): | ||
installed_packages = get_installed_package_names() | ||
for filename in os.listdir(ARGOS_PACKAGES_DIR): | ||
if filename not in installed_packages: | ||
file = os.path.join(ARGOS_PACKAGES_DIR, filename) | ||
argostranslate.package.install_from_path(file) | ||
return argostranslate.package.get_installed_packages() | ||
|
||
#Uninstall all packages | ||
def uninstall_all_packages(): | ||
installed = argostranslate.package.get_installed_packages() | ||
for package in installed: | ||
argostranslate.package.uninstall(package) | ||
|
||
#update all installed packages | ||
def update_packages(): | ||
installed = install_packages() | ||
for package in installed: | ||
package.update() | ||
|
||
#displays all installed packages | ||
def display_installed_packages(): | ||
installed = install_packages() | ||
list = {} | ||
for package in installed: | ||
list.update({package.to_code:package.to_name}) | ||
list.update({package.from_code:package.from_name}) | ||
for code in list: | ||
print(f"{code}, {list[code]}") | ||
|
||
if __name__ == "__main__": | ||
download_packages() | ||
display_installed_packages() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from transformers import MarianMTModel, MarianTokenizer | ||
from typing import Sequence | ||
import os | ||
PATH = os.getcwd() | ||
|
||
def download_package(src, dst): | ||
print(f"Downloading {src}-{dst}...") | ||
model_name = f"Helsinki-NLP/opus-mt-{src}-{dst}" | ||
try: | ||
tokenizer = MarianTokenizer.from_pretrained(model_name) | ||
tokenizer.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") | ||
model = MarianMTModel.from_pretrained(model_name) | ||
model.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}") | ||
except OSError: | ||
print("Package not found") | ||
|
||
def package_downloaded(src, dst): | ||
package_name = f"opus-mt-{src}-{dst}" | ||
os.makedirs(f"{PATH}/translator/marian_models", exist_ok=True) | ||
if package_name in os.listdir(f"{PATH}/translator/marian_models"): | ||
return True | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import sys | ||
import libargos as argos | ||
import libmarian as marian | ||
|
||
def optimize_path(src, dest): | ||
#download all available argos packages | ||
argos.download_packages() | ||
|
||
#check if installed as argos | ||
argos_packages = argos.install_packages() | ||
for package in argos_packages: | ||
if(package.from_code == src and package.to_code == dest): | ||
return | ||
|
||
#check if present as marian | ||
if(marian.package_downloaded(src, dest)): | ||
return | ||
|
||
#download marian package | ||
marian.download_package(src, dest) | ||
|
||
def main(): | ||
lang_one = sys.argv[1] | ||
lang_two = sys.argv[2] | ||
optimize_path(lang_one, lang_two) | ||
optimize_path(lang_two, lang_one) | ||
print(f"{lang_one} and {lang_two} Optimized") | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import os | ||
import sys | ||
import json | ||
import argostranslate.package | ||
import argostranslate.translate | ||
from functools import cached_property | ||
from http.server import BaseHTTPRequestHandler | ||
from urllib.parse import parse_qsl, urlparse | ||
from http.server import HTTPServer | ||
from pathlib import Path | ||
from transformers import MarianMTModel, MarianTokenizer | ||
from typing import Sequence | ||
from libargos import install_packages | ||
import socket | ||
import time | ||
|
||
PORTS = [8000, 5000, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008] | ||
TIMEOUT = 3600 | ||
PATH = os.getcwd() | ||
|
||
|
||
|
||
class MarianModel: | ||
def __init__(self, source_lang: str, dest_lang: str) -> None: | ||
path = f"{PATH}/translator/marian_models/opus-mt-{source_lang}-{dest_lang}" | ||
self.model = MarianMTModel.from_pretrained(path, local_files_only = True) | ||
self.tokenizer = MarianTokenizer.from_pretrained(path, local_files_only = True) | ||
|
||
def translate(self, texts: Sequence[str]) -> Sequence[str]: | ||
tokens = self.tokenizer(list(texts), return_tensors="pt", padding=True) | ||
translate_tokens = self.model.generate(**tokens) | ||
return [self.tokenizer.decode(t, skip_special_tokens=True) for t in translate_tokens] | ||
|
||
class WebRequestHandler(BaseHTTPRequestHandler): | ||
@cached_property | ||
def url(self): | ||
return urlparse(self.path) | ||
|
||
@cached_property | ||
def query_data(self): | ||
return dict(parse_qsl(self.url.query)) | ||
|
||
@cached_property | ||
def translate_data(self): | ||
text = self.query_data['text'] | ||
from_code = self.query_data['from'] | ||
to_code = self.query_data['to'] | ||
|
||
# Use Argos if Language Package Exists | ||
if Path(f"{PATH}/translator/argos_models/translate-{from_code}_{to_code}.argosmodel").exists(): | ||
translatedText = argostranslate.translate.translate(text, from_code, to_code) | ||
return translatedText | ||
# Use Marian if Language Package Exists in Marian but not Argos | ||
elif Path(f"{PATH}/translator/marian_models/opus-mt-{from_code}-{to_code}").exists(): | ||
marian = MarianModel(from_code, to_code) | ||
translatedText = marian.translate([text]) | ||
return translatedText[0] | ||
# Use Argos "English in the Middle" if not in Argos and Marian by Default | ||
elif (Path(f"{PATH}/translator/argos_models/translate-{from_code}_en.argosmodel").exists() and \ | ||
Path(f"{PATH}/translator/argos_models/translate-{to_code}_en.argosmodel").exists()) or \ | ||
(Path(f"{PATH}/translator/argos_models/translate-en_{from_code}.argosmodel").exists() and \ | ||
Path(f"{PATH}/translator/argos_models/translate-en_{to_code}.argosmodel").exists()): | ||
translatedText = argostranslate.translate.translate(text, from_code, to_code) | ||
return translatedText | ||
# If a package doesn't exist | ||
else: | ||
return "Translation Unavailable:" + from_code + to_code | ||
|
||
def do_GET(self): | ||
self.send_response(200) | ||
self.send_header("Content-Type", "application/json") | ||
self.end_headers() | ||
self.wfile.write(self.get_response().encode("utf-8")) | ||
|
||
def get_response(self): | ||
return json.dumps( | ||
{ | ||
"translate_data" : self.translate_data if self.query_data else "", | ||
}, | ||
ensure_ascii=False | ||
) | ||
|
||
|
||
def port_open(port): | ||
#connect_ex returns 0 if it connects to a socket meaning port is closed | ||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | ||
return s.connect_ex(('localhost', port)) != 0 | ||
|
||
def start_server(port): | ||
try: | ||
server = HTTPServer(("127.0.0.1", port), WebRequestHandler) | ||
server.timeout = TIMEOUT | ||
server.handle_timeout = lambda: (_ for _ in ()).throw(TimeoutError()) | ||
print(f"Serving at port: {port}", file=sys.stderr) | ||
print(f"Server started at {time.strftime('%I:%M')} with timeout: {TIMEOUT} seconds", file=sys.stderr) | ||
while(True): server.handle_request() | ||
except TimeoutError: | ||
print("Translation server timed out") | ||
sys.exit() | ||
|
||
if __name__ == "__main__": | ||
install_packages() | ||
for port in PORTS: | ||
if(port_open(port)): | ||
start_server(port) | ||
|
||
|