diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml index 05b02b084..df368f1f6 100644 --- a/.github/workflows/gcp_models.yml +++ b/.github/workflows/gcp_models.yml @@ -11,6 +11,7 @@ on: env: SERVICE: models REGION: us-central1 + CONTAINER_NAME: speech-models jobs: deploy: @@ -21,27 +22,61 @@ jobs: runs-on: ubuntu-latest steps: + # To workaround "no space left on device" issue of GitHub-hosted runner + - name: Delete huge unnecessary tools folder + run: rm -rf /opt/hostedtoolcache + - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Google Auth id: auth - uses: 'google-github-actions/auth@v0' + uses: 'google-github-actions/auth@v2' with: credentials_json: ${{ secrets.GCP_CREDENTIALS }} + - run: gcloud auth configure-docker + - name: Build and Push Docker image run: | - docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile . - docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} - - name: Deploy to Cloud Run - id: deploy - uses: google-github-actions/deploy-cloudrun@v0 - with: - service: ${{ env.SERVICE }} - region: ${{ env.REGION }} - image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} + docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} -f backend/modal/Dockerfile . + docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} + + - name: Create SSH Key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SPEECH_MODELS_SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + + - name: Deploy Docker image + run: | + ssh -o StrictHostKeyChecking=no \ + ${{ secrets.SPEECH_MODELS_SSH_USERNAME }}@${{ secrets.SPEECH_MODELS_SSH_HOST }} \ + "set -x; \ + echo '[+] Pull latest Speech Models image...'; \ + docker pull gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}; \ + echo '[+] Remove current Speech Models container...'; \ + docker rm -f ${{ env.CONTAINER_NAME }}; \ + echo '[+ + ] Start new Speech Models container...'; \ + docker run -d --name ${{ env.CONTAINER_NAME }} -p 8080:8080 \ + --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \ + --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \ + --device /dev/nvidia0:/dev/nvidia0 \ + --device /dev/nvidia-uvm:/dev/nvidia-uvm \ + --device /dev/nvidiactl:/dev/nvidiactl \ + -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \ + -e HUGGINGFACE_TOKEN=${{ secrets.HUGGINGFACE_TOKEN }} \ + gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}" + + # - name: Deploy to Cloud Run + # id: deploy + # uses: google-github-actions/deploy-cloudrun@v2 + # with: + # service: ${{ env.SERVICE }} + # region: ${{ env.REGION }} + # image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} # If required, use the Cloud Run url output in later steps - - name: Show Output - run: echo ${{ steps.deploy.outputs.url }} \ No newline at end of file + # - name: Show Output + # run: echo ${{ steps.deploy.outputs.url }} \ No newline at end of file diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile index d45213648..c22f79020 100644 --- a/backend/modal/Dockerfile +++ b/backend/modal/Dockerfile @@ -1,16 +1,30 @@ -FROM tiangolo/uvicorn-gunicorn:python3.11 +FROM python:3.11 AS builder -RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl -RUN apt-get install unzip -RUN apt-get -y install python3 -RUN apt-get -y install python3-pip -RUN apt-get -y install git -RUN apt-get -y install ffmpeg +ENV PATH="/opt/venv/bin:$PATH" +RUN python -m venv /opt/venv COPY backend/requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt -COPY backend/modal/ /app +FROM python:3.11-slim + +WORKDIR /app +ENV PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/venv/bin:$PATH" +ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64" + +RUN apt-get update && apt-get -y install build-essential ffmpeg curl unzip wget software-properties-common && \ +wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \ +dpkg -i cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \ +cp /var/cuda-repo-debian11-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \ +add-apt-repository contrib && \ +apt-get update && \ +apt-get -y install cuda-toolkit-12-6 && \ +rm -rf /var/lib/apt/lists/* cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb + +COPY --from=builder /opt/venv /opt/venv +COPY backend/database /app/database +COPY backend/utils /app/utils +COPY backend/modal/ . EXPOSE 8080 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/backend/modal/main.py b/backend/modal/main.py index 47ff560b7..5a1ee2ea6 100644 --- a/backend/modal/main.py +++ b/backend/modal/main.py @@ -3,11 +3,10 @@ from fastapi import FastAPI, UploadFile, File, Form from speech_profile_modal import ResponseItem, endpoint as speaker_identification_endpoint -from vad_modal import endpoint as vad_endpoint +from vad_modal import vad_endpoint app = FastAPI() - @app.post('/v1/speaker-identification') def speaker_identification( uid: str, audio_file: UploadFile = File, segments: str = Form(...) @@ -15,8 +14,8 @@ def speaker_identification( print('speaker_identification') return speaker_identification_endpoint(uid, audio_file, segments) - @app.post('/v1/vad') -def vad(audio_file: UploadFile = File): +def vad(file: UploadFile = File): print('vad') - return vad_endpoint(audio_file) + print(vad_endpoint) + return vad_endpoint(file) diff --git a/backend/modal/vad_modal.py b/backend/modal/vad_modal.py index 82353a51e..4d5b82150 100644 --- a/backend/modal/vad_modal.py +++ b/backend/modal/vad_modal.py @@ -3,7 +3,6 @@ import torch from fastapi import UploadFile -from modal import App, web_endpoint, Secret, Image from pyannote.audio import Pipeline # Instantiate pretrained voice activity detection pipeline @@ -13,26 +12,18 @@ use_auth_token=os.getenv('HUGGINGFACE_TOKEN') ).to(device) -app = App(name='vad') -image = ( - Image.debian_slim() - .pip_install("pyannote.audio") - .pip_install("torch") - .pip_install("torchaudio") -) +# app = App(name='vad') +# image = ( +# Image.debian_slim() +# .pip_install("pyannote.audio") +# .pip_install("torch") +# .pip_install("torchaudio") +# ) os.makedirs('_temp', exist_ok=True) -@app.function( - image=image, - keep_warm=1, - memory=(1024, 2048), - cpu=4, - secrets=[Secret.from_name('huggingface-token')], -) -@web_endpoint(method='POST') -def endpoint(file: UploadFile): +def vad_endpoint(file: UploadFile): upload_id = str(uuid.uuid4()) file_path = f"_temp/{upload_id}_{file.filename}" with open(file_path, 'wb') as f: @@ -47,4 +38,4 @@ def endpoint(file: UploadFile): 'end': segment.end, 'duration': segment.duration, }) - return data + return data \ No newline at end of file