docker-build #47
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "docker-build" | |
on: | |
pull_request: | |
paths: | |
- "docker/**" | |
- "!docker/README.md" | |
- ".github/workflows/docker-build.yml" | |
push: | |
branches: | |
- "master" | |
schedule: | |
# Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated | |
- cron: "0 8 * * 0" | |
workflow_dispatch: | |
# Cancel outdated workflows if they are still running | |
concurrency: | |
group: docker-build-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
docker-build: | |
name: Build and Install FlexFlow in a Docker Container | |
runs-on: ubuntu-20.04 | |
strategy: | |
matrix: | |
gpu_backend: ["cuda", "hip_rocm"] | |
cuda_version: ["11.1", "11.2", "11.3", "11.5", "11.6","11.7", "11.8"] | |
exclude: | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.2" | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.3" | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.5" | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.6" | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.7" | |
- gpu_backend: "hip_rocm" | |
cuda_version: "11.8" | |
fail-fast: false | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Free additional space on runner | |
run: .github/workflows/helpers/free_space_on_runner.sh | |
- name: Build Docker container | |
env: | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
cuda_version: ${{ matrix.cuda_version }} | |
run: | | |
# On push to inference, build for all compatible architectures, so that we can publish | |
# a pre-built general-purpose image. On all other cases, only build for one architecture | |
# to save time. | |
if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "inference" ]]; then | |
export FF_CUDA_ARCH=all | |
else | |
export FF_CUDA_ARCH=70 | |
fi | |
./docker/build.sh flexflow | |
- name: Check availability of Python flexflow.core module | |
if: ${{ matrix.gpu_backend == 'cuda' }} | |
env: | |
cuda_version: ${{ matrix.cuda_version }} | |
run: docker run --env CPU_ONLY_TEST=1 --entrypoint /bin/bash flexflow-cuda-${cuda_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; exit()'" | |
- name: Publish Docker environment image (on push to inference) | |
if: github.repository_owner == 'flexflow' | |
env: | |
FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
cuda_version: ${{ matrix.cuda_version }} | |
run: | | |
if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "inference" ]]; then | |
./docker/publish.sh flexflow-environment | |
./docker/publish.sh flexflow | |
else | |
echo "No need to update Docker containers in ghrc.io registry at this time." | |
fi | |
notify-slack: | |
name: Notify Slack in case of failure | |
runs-on: ubuntu-20.04 | |
needs: docker-build | |
if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} | |
steps: | |
- name: Send Slack message | |
env: | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
run: | | |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! <https://github.com/flexflow/FlexFlow/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK |