diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml
index 05b02b084..df368f1f6 100644
--- a/.github/workflows/gcp_models.yml
+++ b/.github/workflows/gcp_models.yml
@@ -11,6 +11,7 @@ on:
 env:
   SERVICE: models
   REGION: us-central1
+  CONTAINER_NAME: speech-models
 
 jobs:
   deploy:
@@ -21,27 +22,61 @@ jobs:
 
     runs-on: ubuntu-latest
     steps:
+      # To workaround "no space left on device" issue of GitHub-hosted runner
+      - name: Delete huge unnecessary tools folder
+        run: rm -rf /opt/hostedtoolcache
+
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Google Auth
         id: auth
-        uses: 'google-github-actions/auth@v0'
+        uses: 'google-github-actions/auth@v2'
         with:
           credentials_json: ${{ secrets.GCP_CREDENTIALS }}
+
       - run: gcloud auth configure-docker
+
       - name: Build and Push Docker image
         run: |
-          docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile .
-          docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
-      - name: Deploy to Cloud Run
-        id: deploy
-        uses: google-github-actions/deploy-cloudrun@v0
-        with:
-          service: ${{ env.SERVICE }}
-          region: ${{ env.REGION }}
-          image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
+          docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} -f backend/modal/Dockerfile .
+          docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}
+
+      - name: Create SSH Key
+        run: |
+          mkdir -p ~/.ssh
+          echo "${{ secrets.SPEECH_MODELS_SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+
+      - name: Deploy Docker image
+        run: |
+          ssh -o StrictHostKeyChecking=no \
+          ${{ secrets.SPEECH_MODELS_SSH_USERNAME }}@${{ secrets.SPEECH_MODELS_SSH_HOST }} \
+          "set -x; \
+          echo '[+] Pull latest Speech Models image...'; \
+          docker pull gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}; \
+          echo '[+] Remove current Speech Models container...'; \
+          docker rm -f ${{ env.CONTAINER_NAME }}; \
+          echo '[+
+          ] Start new Speech Models container...'; \
+          docker run -d --name ${{ env.CONTAINER_NAME }} -p 8080:8080 \
+            --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
+            --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
+            --device /dev/nvidia0:/dev/nvidia0 \
+            --device /dev/nvidia-uvm:/dev/nvidia-uvm \
+            --device /dev/nvidiactl:/dev/nvidiactl \
+            -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
+            -e HUGGINGFACE_TOKEN=${{ secrets.HUGGINGFACE_TOKEN }} \
+            gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}"
+
+      # - name: Deploy to Cloud Run
+      #   id: deploy
+      #   uses: google-github-actions/deploy-cloudrun@v2
+      #   with:
+      #     service: ${{ env.SERVICE }}
+      #     region: ${{ env.REGION }}
+      #     image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
 
       # If required, use the Cloud Run url output in later steps
-      - name: Show Output
-        run: echo ${{ steps.deploy.outputs.url }}
\ No newline at end of file
+      # - name: Show Output
+      #   run: echo ${{ steps.deploy.outputs.url }}
\ No newline at end of file
diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile
index d45213648..c22f79020 100644
--- a/backend/modal/Dockerfile
+++ b/backend/modal/Dockerfile
@@ -1,16 +1,30 @@
-FROM tiangolo/uvicorn-gunicorn:python3.11
+FROM python:3.11 AS builder
 
-RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
-RUN apt-get install unzip
-RUN apt-get -y install python3
-RUN apt-get -y install python3-pip
-RUN apt-get -y install git
-RUN apt-get -y install ffmpeg
+ENV PATH="/opt/venv/bin:$PATH"
+RUN python -m venv /opt/venv
 
 COPY backend/requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements.txt
 
-COPY backend/modal/ /app
+FROM python:3.11-slim
+
+WORKDIR /app
+ENV PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/venv/bin:$PATH"
+ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
+
+RUN apt-get update && apt-get -y install build-essential ffmpeg curl unzip wget software-properties-common && \ 
+wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \
+dpkg -i cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \
+cp /var/cuda-repo-debian11-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
+add-apt-repository contrib && \
+apt-get update && \
+apt-get -y install cuda-toolkit-12-6 && \
+rm -rf /var/lib/apt/lists/* cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb
+
+COPY --from=builder /opt/venv /opt/venv
+COPY backend/database /app/database
+COPY backend/utils /app/utils
+COPY backend/modal/ .
 
 EXPOSE 8080
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
\ No newline at end of file
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/backend/modal/main.py b/backend/modal/main.py
index 47ff560b7..5a1ee2ea6 100644
--- a/backend/modal/main.py
+++ b/backend/modal/main.py
@@ -3,11 +3,10 @@
 from fastapi import FastAPI, UploadFile, File, Form
 
 from speech_profile_modal import ResponseItem, endpoint as speaker_identification_endpoint
-from vad_modal import endpoint as vad_endpoint
+from vad_modal import vad_endpoint
 
 app = FastAPI()
 
-
 @app.post('/v1/speaker-identification')
 def speaker_identification(
         uid: str, audio_file: UploadFile = File, segments: str = Form(...)
@@ -15,8 +14,8 @@ def speaker_identification(
     print('speaker_identification')
     return speaker_identification_endpoint(uid, audio_file, segments)
 
-
 @app.post('/v1/vad')
-def vad(audio_file: UploadFile = File):
+def vad(file: UploadFile = File):
     print('vad')
-    return vad_endpoint(audio_file)
+    print(vad_endpoint)
+    return vad_endpoint(file)
diff --git a/backend/modal/vad_modal.py b/backend/modal/vad_modal.py
index 82353a51e..4d5b82150 100644
--- a/backend/modal/vad_modal.py
+++ b/backend/modal/vad_modal.py
@@ -3,7 +3,6 @@
 
 import torch
 from fastapi import UploadFile
-from modal import App, web_endpoint, Secret, Image
 from pyannote.audio import Pipeline
 
 # Instantiate pretrained voice activity detection pipeline
@@ -13,26 +12,18 @@
     use_auth_token=os.getenv('HUGGINGFACE_TOKEN')
 ).to(device)
 
-app = App(name='vad')
-image = (
-    Image.debian_slim()
-    .pip_install("pyannote.audio")
-    .pip_install("torch")
-    .pip_install("torchaudio")
-)
+# app = App(name='vad')
+# image = (
+#     Image.debian_slim()
+#     .pip_install("pyannote.audio")
+#     .pip_install("torch")
+#     .pip_install("torchaudio")
+# )
 
 os.makedirs('_temp', exist_ok=True)
 
 
-@app.function(
-    image=image,
-    keep_warm=1,
-    memory=(1024, 2048),
-    cpu=4,
-    secrets=[Secret.from_name('huggingface-token')],
-)
-@web_endpoint(method='POST')
-def endpoint(file: UploadFile):
+def vad_endpoint(file: UploadFile):
     upload_id = str(uuid.uuid4())
     file_path = f"_temp/{upload_id}_{file.filename}"
     with open(file_path, 'wb') as f:
@@ -47,4 +38,4 @@ def endpoint(file: UploadFile):
             'end': segment.end,
             'duration': segment.duration,
         })
-    return data
+    return data
\ No newline at end of file