diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index 6577564b..18bdbb1d 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -26,12 +26,33 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} jobs: + set_rocm_docker_devices: + # the script in ./scripts/setup_rocm_docker_devices.sh sets the environment variable ROCM_DOCKER_DEVICES + # which is a string of the form --device /dev/kfd --device /dev/dri/renderD128 --device /dev/dri/renderD129 ... + # that's used in the next job to mount the devices in the docker container + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up ROCM_DOCKER_DEVICES env var + run: | + chmod +x ./scripts/setup_rocm_docker_devices.sh + ./scripts/setup_rocm_docker_devices.sh + shell: bash + + - name: Set outputs + id: rocm_docker_devices + run: echo "::set-output name=rocm_docker_devices::$ROCM_DOCKER_DEVICES" + run_api_rocm_tests: + needs: set_rocm_docker_devices + runs-on: [self-hosted, amd-gpu, single-gpu, mi250] container: image: ghcr.io/huggingface/optimum-benchmark:latest-rocm - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ ${{ needs.set_rocm_docker_devices.outputs.rocm_docker_devices }} steps: - name: Checkout diff --git a/scripts/rocm_docker_devices.sh b/scripts/rocm_docker_devices.sh new file mode 100644 index 00000000..85040220 --- /dev/null +++ b/scripts/rocm_docker_devices.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Check if the variable is set +if [ -z "$ROCR_VISIBLE_DEVICES" ]; then + echo "Environment variable ROCR_VISIBLE_DEVICES is not set" + exit 1 +fi + +# Get the list of renderDxxx devices in /dev/dri +ROCM_RENDER_DEVICES=($(ls /dev/dri/renderD*)) + +# Split the ROCR_VISIBLE_DEVICES variable by commas to get individual device indices +IFS=',' read -r -a DEVICE_INDICES <<<"$ROCR_VISIBLE_DEVICES" + +# Construct the --device options for Docker +ROCM_DOCKER_DEVICES="--device /dev/kfd" +for INDEX in "${DEVICE_INDICES[@]}"; do + if [ "$INDEX" -lt "${#ROCM_RENDER_DEVICES[@]}" ]; then + ROCM_DOCKER_DEVICES+=" --device ${ROCM_RENDER_DEVICES[$INDEX]}" + else + echo "Index $INDEX is out of range for available render devices" + exit 1 + fi +done + +# export the ROCM_DOCKER_DEVICES variable +export ROCM_DOCKER_DEVICES