diff --git a/.github/workflows/docker-build-skip.yml b/.github/workflows/docker-build-skip.yml index 59b584c6c4..02b703467c 100644 --- a/.github/workflows/docker-build-skip.yml +++ b/.github/workflows/docker-build-skip.yml @@ -13,27 +13,22 @@ concurrency: cancel-in-progress: true jobs: - docker-build: - name: Build and Install FlexFlow in a Docker Container - runs-on: ubuntu-20.04 + docker-build-rocm: + name: Build and Install FlexFlow in a Docker Container (ROCm backend) + runs-on: ubuntu-latest strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] - cuda_version: ["11.1", "11.2", "11.3", "11.5", "11.6", "11.7", "11.8"] - # The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported - exclude: - - gpu_backend: "hip_rocm" - cuda_version: "11.1" - - gpu_backend: "hip_rocm" - cuda_version: "11.2" - - gpu_backend: "hip_rocm" - cuda_version: "11.3" - - gpu_backend: "hip_rocm" - cuda_version: "11.5" - - gpu_backend: "hip_rocm" - cuda_version: "11.6" - - gpu_backend: "hip_rocm" - cuda_version: "11.7" + hip_version: ["5.3", "5.4", "5.5", "5.6"] + fail-fast: false + steps: + - run: 'echo "No docker-build required"' + + docker-build-cuda: + name: Build and Install FlexFlow in a Docker Container (CUDA backend) + runs-on: ubuntu-latest + strategy: + matrix: + cuda_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0"] fail-fast: false steps: - run: 'echo "No docker-build required"' diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 899de4664e..655310e141 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -9,9 +9,9 @@ on: branches: - "inference" - "master" - schedule: - # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated - - cron: "0 8 * * 0" + # schedule: + # # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated + # - cron: "0 8 * * 0" workflow_dispatch: # Cancel outdated workflows if they are still running @@ -20,120 +20,174 @@ concurrency: cancel-in-progress: true jobs: - docker-build: - name: Build and Install FlexFlow in a Docker Container - runs-on: ubuntu-20.04 + oracle-runner-start: + name: Start a self-hosted Oracle machine to build the ROCM Docker images + runs-on: ubuntu-latest + if: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + env: + OCI_CLI_USER: ${{ secrets.OCI_CLI_USER }} + OCI_CLI_TENANCY: ${{ secrets.OCI_CLI_TENANCY }} + OCI_CLI_FINGERPRINT: ${{ secrets.OCI_CLI_FINGERPRINT }} + OCI_CLI_KEY_CONTENT: ${{ secrets.OCI_CLI_KEY_CONTENT }} + OCI_CLI_REGION: ${{ secrets.OCI_CLI_REGION }} + OCI_INSTANCE_ID: ${{ secrets.INSTANCE_ID_FFCI }} + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + + - name: Install Oracle Cloud Infrastructure library + run: pip install oci + + - name: Start Oracle Machine + run: python3 .github/workflows/helpers/oracle_con.py --start --instance_id $OCI_INSTANCE_ID + + # 1. in docker-build cases: if there are push/ workflow_dispatch to inference, + # the job should be run on oracle machine to build rocm and 11.8 cuda version + # 2. add a job to turn off the oracle machine if not needed + rocm_runner_choice: + name: Determine what runner to use to build the ROCm Docker image(s) + runs-on: ubuntu-latest + outputs: + rocm_runner: ${{ steps.step1.outputs.runner }} + steps: + - name: Determine the runner + id: step1 + env: + deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + run: | + if [ $deploy_needed == "true" ]; then + echo "::set-output name=runner::[self-hosted, cpu_only]" + else + echo "::set-output name=runner::ubuntu-20.04" + fi + + docker-build-rocm: + needs: rocm_runner_choice + name: Build and Install FlexFlow in a Docker Container (ROCm backend) + runs-on: ${{ needs.rocm_runner_choice.outputs.rocm_runner }} strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] - gpu_backend_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0", "5.3", "5.4", "5.5", "5.6"] - # The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported - exclude: - - gpu_backend: "cuda" - gpu_backend_version: "5.3" - - gpu_backend: "cuda" - gpu_backend_version: "5.4" - - gpu_backend: "cuda" - gpu_backend_version: "5.5" - - gpu_backend: "cuda" - gpu_backend_version: "5.6" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.1" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.2" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.3" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.4" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.5" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.6" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.7" - - gpu_backend: "hip_rocm" - gpu_backend_version: "11.8" - - gpu_backend: "hip_rocm" - gpu_backend_version: "12.0" + hip_version: ["5.3", "5.4", "5.5", "5.6"] fail-fast: false env: - FF_GPU_BACKEND: ${{ matrix.gpu_backend }} - gpu_backend_version: ${{ matrix.gpu_backend_version }} - # one of the two variables below will be unused - cuda_version: ${{ matrix.gpu_backend_version }} - hip_version: ${{ matrix.gpu_backend_version }} - branch_name: ${{ github.head_ref || github.ref_name }} - timeout-minutes: 480 + FF_GPU_BACKEND: "hip_rocm" + hip_version: ${{ matrix.hip_version }} steps: - name: Checkout Git Repository + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.hip_version == '5.6' }} uses: actions/checkout@v3 with: submodules: recursive - name: Free additional space on runner - env: - deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} - build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} - run: | - if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then - .github/workflows/helpers/free_space_on_runner.sh - else - echo "Skipping this step to save time" - fi + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.hip_version == '5.6' }} + run: .github/workflows/helpers/free_space_on_runner.sh - name: Build Docker container + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.hip_version == '5.6' }} env: - deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} - build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} + deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + build_needed: ${{ matrix.hip_version == '5.6' }} run: | # On push to inference, build for all compatible architectures, so that we can publish # a pre-built general-purpose image. On all other cases, only build for one architecture # to save time. if [[ $deploy_needed == "true" ]] ; then - export FF_CUDA_ARCH=all export FF_HIP_ARCH=all ./docker/build.sh flexflow elif [[ $build_needed == "true" ]]; then - export FF_CUDA_ARCH=70 export FF_HIP_ARCH=gfx1100,gfx1036 ./docker/build.sh flexflow - else - echo "Skipping build to save time" fi - name: Check availability of flexflow modules in Python - if: ${{ matrix.gpu_backend == 'cuda' }} + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.hip_version == '5.6' }} + run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${hip_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" + + - name: Publish Docker environment image (on push to inference) + if: ${{ github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} env: - deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} - build_needed: ${{ ( matrix.gpu_backend == 'hip_rocm' && matrix.gpu_backend_version == '5.6' ) || ( matrix.gpu_backend == 'cuda' && matrix.gpu_backend_version == '11.8' ) }} + FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} run: | - if [[ $deploy_needed == "true" || $build_needed == "true" ]]; then - if [[ $FF_GPU_BACKEND == "cuda" ]]; then - docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" - else - docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${gpu_backend_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" - fi - else - echo "Skipping test to save time" + ./docker/publish.sh flexflow-environment + ./docker/publish.sh flexflow + + docker-build-cuda: + name: Build and Install FlexFlow in a Docker Container (CUDA backend) + runs-on: ubuntu-20.04 + strategy: + matrix: + cuda_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0"] + fail-fast: false + env: + FF_GPU_BACKEND: "cuda" + cuda_version: ${{ matrix.cuda_version }} + steps: + - name: Checkout Git Repository + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }} + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Free additional space on runner + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }} + run: .github/workflows/helpers/free_space_on_runner.sh + + - name: Build Docker container + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }} + env: + deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + build_needed: ${{ matrix.cuda_version == '11.8' }} + run: | + # On push to inference, build for all compatible architectures, so that we can publish + # a pre-built general-purpose image. On all other cases, only build for one architecture + # to save time. + if [[ $deploy_needed == "true" ]] ; then + export FF_CUDA_ARCH=all + ./docker/build.sh flexflow + elif [[ $build_needed == "true" ]]; then + export FF_CUDA_ARCH=86 + ./docker/build.sh flexflow fi + - name: Check availability of flexflow modules in Python + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }} + run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${cuda_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" + - name: Publish Docker environment image (on push to inference) - if: github.repository_owner == 'flexflow' + if: ${{ github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} env: FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} - deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && env.branch_name == 'inference' }} run: | - if [[ $deploy_needed == "true" ]]; then - ./docker/publish.sh flexflow-environment - ./docker/publish.sh flexflow - else - echo "No need to update Docker containers in ghrc.io registry at this time." - fi + ./docker/publish.sh flexflow-environment + ./docker/publish.sh flexflow + + oracle-runner-stop: + needs: docker-build-rocm + if: ${{ always() && ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + runs-on: ubuntu-latest + name: Turn off the self-hosted Oracle machine where we built the ROCM Docker images + env: + OCI_CLI_USER: ${{ secrets.OCI_CLI_USER }} + OCI_CLI_TENANCY: ${{ secrets.OCI_CLI_TENANCY }} + OCI_CLI_FINGERPRINT: ${{ secrets.OCI_CLI_FINGERPRINT }} + OCI_CLI_KEY_CONTENT: ${{ secrets.OCI_CLI_KEY_CONTENT }} + OCI_CLI_REGION: ${{ secrets.OCI_CLI_REGION }} + OCI_INSTANCE_ID: ${{ secrets.INSTANCE_ID_FFCI }} + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + + - name: Install Oracle Cloud Infrastructure library + run: pip install oci + + - name: Stop Oracle Machine + run: python3 docker/oracle_con.py --stop --instance_id $OCI_INSTANCE_ID notify-slack: name: Notify Slack in case of failure runs-on: ubuntu-20.04 - needs: docker-build + needs: [docker-build-cuda, docker-build-rocm] if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} steps: - name: Send Slack message diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index d604a7cea9..aee16832f3 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -61,7 +61,7 @@ jobs: python-interface-check: name: Check Python Interface - runs-on: self-hosted + runs-on: [self-hosted, gpu] defaults: run: shell: bash -l {0} # required to use an activated conda environment @@ -135,7 +135,7 @@ jobs: inference-tests: name: Inference Tests - runs-on: self-hosted + runs-on: [self-hosted, gpu] defaults: run: shell: bash -l {0} # required to use an activated conda environment @@ -210,7 +210,7 @@ jobs: gpu-ci-flexflow: name: Single Machine, Multiple GPUs Tests - runs-on: self-hosted + runs-on: [self-hosted, gpu] # skip this time-consuming test for PRs to the inference branch # if: ${{ github.event_name != 'pull_request' || github.base_ref != 'inference' }} defaults: diff --git a/.github/workflows/helpers/oracle_con.py b/.github/workflows/helpers/oracle_con.py new file mode 100644 index 0000000000..4fd41930b7 --- /dev/null +++ b/.github/workflows/helpers/oracle_con.py @@ -0,0 +1,38 @@ +import oci +import argparse +import os + +parser = argparse.ArgumentParser(description="Program with optional flags") +group = parser.add_mutually_exclusive_group() +group.add_argument("--start", action="store_true", help="Start action") +group.add_argument("--stop", action="store_true", help="Stop action") +parser.add_argument("--instance_id", type=str, required=True, help="instance id required") +args = parser.parse_args() + +oci_key_content = os.getenv("OCI_CLI_KEY_CONTENT") + +config = { + "user": os.getenv("OCI_CLI_USER"), + "key_content": os.getenv("OCI_CLI_KEY_CONTENT"), + "fingerprint": os.getenv("OCI_CLI_FINGERPRINT"), + "tenancy": os.getenv("OCI_CLI_TENANCY"), + "region": os.getenv("OCI_CLI_REGION") +} + +# Initialize the OCI configuration +# config = oci.config.from_file() +oci.config.validate_config(config) + +# Initialize the ComputeClient to interact with VM instances +compute = oci.core.ComputeClient(config) + +# Replace 'your_instance_id' with the actual instance ID of your VM +instance_id = args.instance_id + +# Perform the action +if args.start: + # Start the VM + compute.instance_action(instance_id, "START") +else: + # Stop the VM + compute.instance_action(instance_id, "STOP") diff --git a/python/flexflow/serve/__init__.py b/python/flexflow/serve/__init__.py index 7531c006a8..2c6395aca1 100644 --- a/python/flexflow/serve/__init__.py +++ b/python/flexflow/serve/__init__.py @@ -141,6 +141,7 @@ def init( configs_dict = { "num_gpus": num_gpus, "memory_per_gpu": memory_per_gpu, + "num_cpus": num_cpus, "zero_copy_memory_per_node": zero_copy_memory_per_node, "legion_utility_processors": legion_utility_processors, "data_parallelism_degree": data_parallelism_degree, @@ -174,6 +175,8 @@ def init( __check_positive_int(configs_dict, param) # Set default values + if configs_dict.get("num_cpus", None) is None: + configs_dict["num_cpus"] = 4 if configs_dict.get("legion_utility_processors", None) is None: configs_dict["legion_utility_processors"] = 8 if configs_dict.get("data_parallelism_degree", None) is None: