diff --git a/.gitignore b/.gitignore index 934f65f..ac03c3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .env -model_cache/ +model_cache* +venv/ diff --git a/.travis.yml b/.travis.yml index 91ed902..1392535 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ before_install: branches: only: - main - - CD-demo-md + - Ka-hapsing jobs: include: - name: Check YAML format @@ -36,7 +36,7 @@ jobs: - tox -e shellcheck - stage: deploy name: deploy - if: (type = push or type = cron) and (branch = main or branch = CD-demo-md) + if: (type = push or type = cron) and (branch = main or branch = Ka-hapsing) install: - pip install ansible before_script: @@ -54,6 +54,7 @@ jobs: --extra-vars "DEPLOY_HOST=${DEPLOY_HOST}" --extra-vars "DEPLOY_HOST_USER=${DEPLOY_HOST_USER}" --extra-vars "TRAVIS_TSUKI_SSH_KEY_PATH=${TRAVIS_TSUKI_SSH_KEY_PATH}" - --extra-vars "VIRTUAL_HOST=${VIRTUAL_HOST}" + --extra-vars "ASR_VIRTUAL_HOST=${ASR_VIRTUAL_HOST}" + --extra-vars "TTS_VIRTUAL_HOST=${TTS_VIRTUAL_HOST}" --extra-vars "SENTRY_DSN=${SENTRY_DSN}" ' diff --git a/DEMO.md b/DEMO.md deleted file mode 100644 index ec16599..0000000 --- a/DEMO.md +++ /dev/null @@ -1,12 +0,0 @@ -# 族語AI語音辨識系統 - -ILRDF Automatic-Speech-Recognition System - -## 研發團隊 - -- [李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com) -- [陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com) -- [意傳科技](https://ithuan.tw/) -- [原住民族語言研究發展基金會](https://www.ilrdf.org.tw/) - -感謝[聯和科創](https://www.104.com.tw/company/1a2x6bmu75)、[Pipalofasaran to Sowal no Pangcah/'Amis 台灣阿美族語言永續發展學會/原民會阿美族語言推動組織](https://www.facebook.com/groups/ypspt/about)、[台灣太魯閣族語言發展學會](https://qkktt.com/)、[台灣原住民族賽德克族語言文化學會](https://www.facebook.com/3S3TBL/)及族語老師們大力協助! diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 02f34d0..0000000 --- a/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM registry.hf.space/formospeech-formosan-asr:latest - -COPY app.py DEMO.md ./ \ No newline at end of file diff --git a/app.py b/app.py deleted file mode 100644 index a9d38de..0000000 --- a/app.py +++ /dev/null @@ -1,112 +0,0 @@ -import gradio as gr -import torch -from omegaconf import OmegaConf -from transformers import pipeline - -device = "cuda" if torch.cuda.is_available() else "cpu" -torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 - - -def load_pipe(model_id: str): - return pipeline( - "automatic-speech-recognition", - model=model_id, - max_new_tokens=128, - chunk_length_s=30, - batch_size=8, - torch_dtype=torch_dtype, - device=device, - ) - - -OmegaConf.register_new_resolver("load_pipe", load_pipe) - -models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml")) - - -def automatic_speech_recognition(model_id: str, dialect_id: str, audio_file: str): - model = models_config[model_id]["model"] - generate_kwargs = { - "task": "transcribe", - "language": "id", - "num_beams": 1, - "prompt_ids": torch.from_numpy(model.tokenizer.get_prompt_ids(dialect_id)).to( - device - ), - } - return model(audio_file, generate_kwargs=generate_kwargs)["text"].replace(f" {dialect_id}", "") - - -def when_model_selected(model_id: str): - model_config = models_config[model_id] - - dialect_drop_down_choices = [ - (k, v) for k, v in model_config["dialect_mapping"].items() - ] - - return gr.update( - choices=dialect_drop_down_choices, - value=dialect_drop_down_choices[0][1], - ) - - -demo = gr.Blocks( - title="臺灣南島語語音辨識系統", - css="@import url(https://tauhu.tw/tauhu-oo.css);", - theme=gr.themes.Default( - font=( - "tauhu-oo", - gr.themes.GoogleFont("Source Sans Pro"), - "ui-sans-serif", - "system-ui", - "sans-serif", - ) - ), -) - -with demo: - default_model_id = list(models_config.keys())[0] - model_drop_down = gr.Dropdown( - models_config.keys(), - value=default_model_id, - label="模型", - ) - - dialect_drop_down = gr.Radio( - choices=[ - (k, v) - for k, v in models_config[default_model_id]["dialect_mapping"].items() - ], - value=list(models_config[default_model_id]["dialect_mapping"].values())[0], - label="族別", - ) - - model_drop_down.input( - when_model_selected, - inputs=[model_drop_down], - outputs=[dialect_drop_down], - ) - - with open("DEMO.md") as tong: - gr.Markdown(tong.read()) - - gr.Interface( - automatic_speech_recognition, - inputs=[ - model_drop_down, - dialect_drop_down, - gr.Audio( - label="上傳或錄音", - type="filepath", - waveform_options=gr.WaveformOptions( - sample_rate=16000, - ), - ), - ], - outputs=[ - gr.Text(interactive=False, label="辨識結果"), - ], - allow_flagging="auto", - ) - -demo.launch() diff --git a/asr/Dockerfile b/asr/Dockerfile new file mode 100644 index 0000000..b6730db --- /dev/null +++ b/asr/Dockerfile @@ -0,0 +1,4 @@ +FROM registry.hf.space/ithuan-formosan-asr:latest + +EXPOSE 7860 +CMD ["python", "app.py"] diff --git a/deploy/.env.template b/deploy/.env.template index fd0fd25..123ddc3 100644 --- a/deploy/.env.template +++ b/deploy/.env.template @@ -1,5 +1,6 @@ # 線頂機專案網域名。若按算tī開發機試,就設定做`localhost`。 -VIRTUAL_HOST={{ VIRTUAL_HOST }} +ASR_VIRTUAL_HOST={{ ASR_VIRTUAL_HOST }} +TTS_VIRTUAL_HOST={{ TTS_VIRTUAL_HOST }} # Sentry監控事件 ## 登入Sentry,到本專案 > Settings > Client Keys(DSN) diff --git a/deploy/deploy.yaml b/deploy/deploy.yaml index 9b4ae3a..fadf5c4 100644 --- a/deploy/deploy.yaml +++ b/deploy/deploy.yaml @@ -21,9 +21,14 @@ ansible.builtin.template: src: .env.template dest: "{{ tsuanan_path }}/.env" - - name: 設定model_cache檔案簿仔 + - name: 設定辨識model_cache檔案簿仔 ansible.builtin.file: - path: "{{ tsuanan_path }}/model_cache" + path: "{{ tsuanan_path }}/model_cache_asr" + state: directory + mode: '1777' + - name: 設定合成model_cache檔案簿仔 + ansible.builtin.file: + path: "{{ tsuanan_path }}/model_cache_tts" state: directory mode: '1777' - name: 開docker compose up -d diff --git a/docker-compose.yml b/docker-compose.yml index d1bbf33..988399e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,9 @@ --- services: asr: - build: ./ - command: python app.py - expose: - - 7860 + build: ./asr/ volumes: - - "./model_cache:/home/user/.cache:rw" + - "./model_cache_asr:/home/user/.cache:rw" deploy: resources: reservations: @@ -15,11 +12,28 @@ services: count: all capabilities: [gpu] networks: - - default - nginx-bridge environment: - - VIRTUAL_HOST=${VIRTUAL_HOST:-sapolita.ithuan.tw} - - LETSENCRYPT_HOST=${VIRTUAL_HOST:-sapolita.ithuan.tw} + - VIRTUAL_HOST=${ASR_VIRTUAL_HOST:-sapolita.ithuan.tw} + - LETSENCRYPT_HOST=${ASR_VIRTUAL_HOST:-sapolita.ithuan.tw} + - LETSENCRYPT_EMAIL=ithuan@ithuan.tw + restart: always + tts: + build: ./tts/ + volumes: + - "./model_cache_tts:/home/user/.cache:rw" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + networks: + - nginx-bridge + environment: + - VIRTUAL_HOST=${TTS_VIRTUAL_HOST:-hnang-kari-ai-asi-sluhay.ithuan.tw} + - LETSENCRYPT_HOST=${TTS_VIRTUAL_HOST:-hnang-kari-ai-asi-sluhay.ithuan.tw} - LETSENCRYPT_EMAIL=ithuan@ithuan.tw restart: always diff --git a/tts/Dockerfile b/tts/Dockerfile new file mode 100644 index 0000000..074954e --- /dev/null +++ b/tts/Dockerfile @@ -0,0 +1,9 @@ +FROM registry.hf.space/ithuan-formosan-tts:latest + +COPY requirements.txt ./ +RUN pip install -r requirements.txt + +COPY models.yaml configs/models.yaml + +EXPOSE 7860 +CMD ["python", "app.py"] diff --git a/tts/models.yaml b/tts/models.yaml new file mode 100644 index 0000000..420a241 --- /dev/null +++ b/tts/models.yaml @@ -0,0 +1,17 @@ +--- +yourtts-three-denoised-finetune: + model: ${load_model:united-link/yourtts-formosan-three-denoised-finetune} + language_mapping: + 阿美: 阿美 + 賽德克: 賽德克 + 太魯閣: 太魯閣 + speaker_mapping: # display_name: id + 男/阿美/秀姑巒/族語E樂園: klokah_eval_ami_concat#wav_concat/klokah_eval_ami/005060_0.24-4.62.wav + 女/阿美/南勢/族語E樂園: klokah_eval_ami_concat#wav_concat/klokah_eval_ami/003879_0.0-5.01.wav + 男/賽德克/都達/族語E樂園: klokah_eval_sdq_concat#wav_concat/klokah_eval_sdq/002414_3.1-8.38.wav + 男/賽德克/德固達雅/族語E樂園: klokah_eval_sdq_concat#wav_concat/klokah_eval_sdq/003047_13.95-17.19.wav + 女/賽德克/德鹿谷/族語E樂園: klokah_eval_sdq_concat#wav_concat/klokah_eval_sdq/002443_0.0-3.66.wav + 男/太魯閣/族語E樂園: klokah_eval_trv_concat#wav_concat/klokah_eval_trv/000899_19.44-23.49.wav + 女/太魯閣/族語E樂園: klokah_eval_trv_concat#wav_concat/klokah_eval_trv/000771_3.3-8.22.wav + 女/阿美/秀姑巒/意傳: ithuan_ami + 女/太魯閣/意傳: ithuan_trv diff --git a/tts/requirements.in b/tts/requirements.in new file mode 100644 index 0000000..25acedd --- /dev/null +++ b/tts/requirements.in @@ -0,0 +1 @@ +gradio diff --git a/tts/requirements.txt b/tts/requirements.txt new file mode 100644 index 0000000..159167e --- /dev/null +++ b/tts/requirements.txt @@ -0,0 +1,171 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile +# +aiofiles==23.2.1 + # via gradio +annotated-types==0.7.0 + # via pydantic +anyio==4.6.2.post1 + # via + # gradio + # httpx + # starlette +certifi==2024.8.30 + # via + # httpcore + # httpx + # requests +charset-normalizer==3.4.0 + # via requests +click==8.1.7 + # via + # typer + # uvicorn +contourpy==1.2.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +exceptiongroup==1.2.2 + # via anyio +fastapi==0.115.5 + # via gradio +ffmpy==0.4.0 + # via gradio +filelock==3.16.1 + # via huggingface-hub +fonttools==4.54.1 + # via matplotlib +fsspec==2024.10.0 + # via + # gradio-client + # huggingface-hub +gradio==4.44.1 + # via -r requirements.in +gradio-client==1.3.0 + # via gradio +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.6 + # via httpx +httpx==0.27.2 + # via + # gradio + # gradio-client +huggingface-hub==0.26.1 + # via + # gradio + # gradio-client +idna==3.10 + # via + # anyio + # httpx + # requests +importlib-resources==6.4.5 + # via gradio +jinja2==3.1.4 + # via gradio +kiwisolver==1.4.7 + # via matplotlib +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via + # gradio + # jinja2 +matplotlib==3.8.4 + # via gradio +mdurl==0.1.2 + # via markdown-it-py +numpy==1.22.0 + # via + # contourpy + # gradio + # matplotlib + # pandas +orjson==3.10.11 + # via gradio +packaging==24.1 + # via + # gradio + # gradio-client + # huggingface-hub + # matplotlib +pandas==1.5.3 + # via gradio +pillow==10.4.0 + # via + # gradio + # matplotlib +pydantic==2.9.2 + # via + # fastapi + # gradio +pydantic-core==2.23.4 + # via pydantic +pydub==0.25.1 + # via gradio +pygments==2.18.0 + # via rich +pyparsing==3.2.0 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # matplotlib + # pandas +python-multipart==0.0.17 + # via gradio +pytz==2024.2 + # via pandas +pyyaml==6.0.2 + # via + # gradio + # huggingface-hub +requests==2.32.3 + # via huggingface-hub +rich==13.9.3 + # via typer +ruff==0.7.3 + # via gradio +semantic-version==2.10.0 + # via gradio +shellingham==1.5.4 + # via typer +six==1.16.0 + # via python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx +starlette==0.41.2 + # via fastapi +tomlkit==0.12.0 + # via gradio +tqdm==4.66.5 + # via huggingface-hub +typer==0.12.5 + # via gradio +typing-extensions==4.12.2 + # via + # anyio + # fastapi + # gradio + # gradio-client + # huggingface-hub + # pydantic + # pydantic-core + # rich + # typer + # uvicorn +urllib3==2.2.3 + # via + # gradio + # requests +uvicorn==0.32.0 + # via gradio +websockets==12.0 + # via gradio-client