Skip to content

Commit

Permalink
add Dockerfile and CI image build
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjoyo committed Apr 3, 2024
1 parent de7e27d commit 1e0d983
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 7 deletions.
78 changes: 78 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
tags:
- "v*.*.*"

env:
REGISTRY_IMAGE: holisticon/bpm-ai-inference

jobs:
publish:
runs-on: ubuntu-latest
Expand All @@ -16,3 +19,78 @@ jobs:
pypi_token: ${{ secrets.PYPI_TOKEN }}
# Install project without developer requirements.
ignore_dev_requirements: "yes"

build-push:
strategy:
matrix:
include:
- arch: amd64
- arch: arm64
runs-on: ubuntu-latest
outputs:
image: ${{ steps.image.outputs.image }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- run: docker context create builders
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
version: latest
endpoint: builders
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Get the version
id: vars
run: echo ::set-output name=tag::$(echo ${GITHUB_REF:10})
- name: Build and push
uses: docker/build-push-action@v5
with:
platforms: linux/${{ matrix.arch }}
provenance: false # otherwise a manifest list is created and create-push-manifest job fails
context: .
file: ./Dockerfile
build-args: |
PYTHON_VERSION=3.11
push: true
tags: |
${{ env.REGISTRY_IMAGE }}:${{ steps.vars.outputs.tag }}-cpu-${{ matrix.arch }}
${{ env.REGISTRY_IMAGE }}:latest-cpu-${{ matrix.arch }}
- id: image
run: echo "::set-output name=image::$(echo ${{ env.REGISTRY_IMAGE }}:latest-cpu)"

create-push-manifest:
runs-on: ubuntu-latest
needs: [build-push]
steps:
-
name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
-
name: Get version
id: vars
run: echo ::set-output name=tag::$(echo ${GITHUB_REF:10})
-
name: Create version-manifest and push
run: |
docker manifest create \
${{ env.REGISTRY_IMAGE }}:${{ steps.vars.outputs.tag }}-cpu \
--amend ${{ env.REGISTRY_IMAGE }}:${{ steps.vars.outputs.tag }}-cpu-amd64 \
--amend ${{ env.REGISTRY_IMAGE }}:${{ steps.vars.outputs.tag }}-cpu-arm64
docker manifest push ${{ env.REGISTRY_IMAGE }}:${{ steps.vars.outputs.tag }}-cpu
-
name: Create latest-manifest and push
run: |
docker manifest create \
${{ env.REGISTRY_IMAGE }}:latest-cpu \
--amend ${{ env.REGISTRY_IMAGE }}:latest-cpu-amd64 \
--amend ${{ env.REGISTRY_IMAGE }}:latest-cpu-arm64
docker manifest push ${{ env.REGISTRY_IMAGE }}:latest-cpu
42 changes: 42 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
ARG PYTHON_VERSION="3.11"

###############################################################################
# 1. Install dependencies
###############################################################################
# poetry setup code based on https://github.com/thehale/docker-python-poetry (does not provide multiarch images)
FROM python:${PYTHON_VERSION} AS build-python
ARG POETRY_VERSION="1.6.1"

ENV POETRY_VERSION=${POETRY_VERSION}
ENV POETRY_HOME="/opt/poetry"
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV POETRY_NO_INTERACTION=1
ENV PATH="$POETRY_HOME/bin:$PATH"

RUN apt-get update && apt-get install -y --no-install-recommends curl
# Install Poetry via the official installer: https://python-poetry.org/docs/master/#installing-with-the-official-installer
# This script respects $POETRY_VERSION & $POETRY_HOME
RUN curl -sSL https://install.python-poetry.org | python3 -
# only install dependencies into project virtualenv
WORKDIR /app
COPY requirements.txt pyproject.toml poetry.lock ./
RUN poetry run python -m pip install -r requirements.txt
RUN poetry install --only main --no-root --no-cache

###############################################################################
# 2. Final, minimal image that starts the inference server daemon
###############################################################################
FROM python:${PYTHON_VERSION}-slim
ARG PYTHON_VERSION

ENV PYTHONUNBUFFERED=1

WORKDIR /app
COPY ./bpm_ai_inference/ ./bpm_ai_inference/
COPY --from=build-python /app/.venv/lib/python${PYTHON_VERSION}/site-packages /usr/local/lib/python${PYTHON_VERSION}/site-packages
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl tesseract-ocr poppler-utils \
&& apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/*

COPY init.py .
CMD ["python3", "init.py", "python -m bpm_ai_inference.daemon"]
2 changes: 1 addition & 1 deletion bpm_ai_inference/deamon.py → bpm_ai_inference/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)

daemon = create_remote_object_daemon(
host=os.environ.get('DAEMON_HOST', 'localhost'),
host=os.environ.get('DAEMON_HOST', '0.0.0.0'),
port=int(os.environ.get('DAEMON_PORT', 6666))
)

Expand Down
29 changes: 29 additions & 0 deletions init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import subprocess
import os
import time
import sys


def main():
# Get the list of processes from command line arguments
processes_params = [arg.split() for arg in sys.argv[1:]]
# Start all processes
processes = [subprocess.Popen(params) for params in processes_params]
print(f"[init] Started {len(processes)} processes.")
# Continuously monitor the processes
while True:
for i, process in enumerate(processes):
exit_status = process.poll()
if exit_status is not None:
print(f"[init] Process '{' '.join(processes_params[i])}' exited with status {exit_status}, terminating other processes...")
# Terminate all other processes
for j, other_process in enumerate(processes):
if i != j:
other_process.terminate()
return
# Sleep for a bit to avoid busy-waiting
time.sleep(1)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,23 @@ bpm-ai-core = "2.2.0"
faster-whisper = "^0.10.0"
lingua-language-detector = "^2.0.2"
pytesseract = "^0.3.10"
torch = [
{ version = "=2.2.2", source="pypi", markers = "sys_platform == 'darwin'" },
{ version = "=2.2.2+cpu", source = "torch-cpu", markers = "sys_platform != 'darwin'" },
]
transformers = "^4.37.2"
sacremoses = "^0.1.1"
sentencepiece = "^0.2.0"
nltk = "^3.8.0"
pyro5 = "^5.15"


[tool.poetry.group.dev.dependencies]
torch = [
{ version = "=2.2.2", source="pypi", markers = "sys_platform == 'darwin'" },
{ version = "=2.2.2+cpu", source = "torch-cpu", markers = "sys_platform != 'darwin'" },
]
spacy = [
{ version = "=3.7.4", markers = "sys_platform != 'darwin'" },
{ version = "=3.7.4", extras = ["apple"], markers = "sys_platform == 'darwin' and platform_machine == 'arm64'" },
]
pyro5 = "^5.15"


[[tool.poetry.source]]
name = "torch-cpu"
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
spacy==3.7.4
torch==2.2.2
--extra-index-url https://download.pytorch.org/whl/cpu
1 change: 1 addition & 0 deletions tests/test_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ async def test_faster_whisper():


async def test_faster_whisper_url():
# todo flaky: av.error.EOFError: [Errno 541478725] End of file: 'audio.ogg'
fw = FasterWhisperASR()
result = await fw.transcribe("https://upload.wikimedia.org/wikipedia/commons/d/dd/Armstrong_Small_Step.ogg")
assert "giant leap for mankind" in result.text.lower()

0 comments on commit 1e0d983

Please sign in to comment.