From a90f79fe980cb7d7602338a6ceb18570c3a07cb9 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Tue, 17 Dec 2024 14:12:51 +0800 Subject: [PATCH] test --- .github/workflows/nightly_ondemand.yml | 277 +------------------------ 1 file changed, 1 insertion(+), 276 deletions(-) diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 2edc06102..bd48d308f 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -66,29 +66,6 @@ concurrency: cancel-in-progress: true jobs: - Linux-Nightly-Ondemand-UT-Tests: - if: github.event_name == 'schedule' || ${{ inputs.ut_suite }} - uses: ./.github/workflows/_linux_ut.yml - with: - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} - runner: linux.idc.xpu - - Linux-Weekly-UT-Tests-ABI-0: - if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' - uses: ./.github/workflows/_linux_ut.yml - with: - abi: 0 - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - ut: op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} - runner: linux.idc.xpu - Linux-Nightly-Ondemand-E2E-Tests: runs-on: pvc_e2e # Don't run on forked repos @@ -119,256 +96,4 @@ jobs: uses: actions/checkout@v4 - name: Prepare Conda ENV run: | - which conda && conda clean -ay - conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci - conda create -n e2e_ci python=${{ env.python }} cmake ninja -y - source activate e2e_ci - pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm - - name: Prepare Stock Pytorch - run: | - pwd - cd ../ && rm -rf pytorch - source activate e2e_ci - git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}') - # apply PRs for stock pytorch - pip install requests - python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py - git status && git show -s - git submodule sync && git submodule update --init --recursive - if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then - echo "Don't replace torch-xpu-ops!" - else - rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ - # Workaround for torch-xpu-ops ci test - sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt - fi - - name: Identify pinned versions - id: pinned - run: | - source .github/scripts/env.sh - cd ../pytorch - if [ -z ${{ inputs.triton }} ]; then - echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - else - echo "TRITON_COMMIT_ID=${{ inputs.triton }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - fi - echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "TORCHBENCH_COMMIT_ID=$(> "${GITHUB_ENV}" - echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - . /etc/os-release - echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo ${GITHUB_ENV} - - name: Triton Installation - run: | - source activate e2e_ci - cd ../pytorch - TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton" - echo ${TRITON_REPO}@${TRITON_COMMIT_ID} - pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" - - name: Build Pytorch XPU - run: | - source activate e2e_ci - source .github/scripts/env.sh - cd ../pytorch - pip install -r requirements.txt - export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} - python setup.py bdist_wheel - pip install --force-reinstall dist/*.whl - - name: Show GITHUB_ENV - run: | - echo "$GITHUB_ENV" - rm -rf ../pytorch/inductor_log - rm -rf /tmp/torchinductor_* - rm -rf ~/.triton/cache - - # Nihglty launch - - name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test - if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: huggingface - env_prepare: true - dt: float32,bfloat16,float16,amp_bf16,amp_fp16 - mode: inference,training - scenario: accuracy - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Nightly Torchbench BF16 Training Accuracy Test - if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: torchbench - dt: bfloat16 - mode: training - scenario: accuracy - env_prepare: true - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Nightly Timm_models FP16 Training Accuracy Test - if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: timm_models - dt: float16 - mode: training - scenario: accuracy - env_prepare: true - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - # Weekly launch - - name: Weekly Huggingface Full Test - if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: huggingface - env_prepare: true - dt: float32,bfloat16,float16,amp_bf16,amp_fp16 - mode: inference,training - scenario: accuracy,performance - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Weekly Torchbench Full Test - if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: torchbench - env_prepare: true - dt: float32,bfloat16,float16,amp_bf16,amp_fp16 - mode: inference,training - scenario: accuracy,performance - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Weekly Timm_models Full Test - if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: timm_models - env_prepare: true - dt: float32,bfloat16,float16,amp_bf16,amp_fp16 - mode: inference,training - scenario: accuracy,performance - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - # On-demand launch - - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - if: github.event_name != 'schedule' - uses: ./.github/actions/inductor-xpu-e2e-test - with: - suite: ${{ inputs.suite }} - env_prepare: true - dt: ${{ inputs.dt }} - mode: ${{ inputs.mode }} - scenario: ${{ inputs.scenario }} - hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - - name: Summarize archieve files - id: summary - if: ${{ ! cancelled() }} - run: | - rm -rf ${{ github.workspace }}/upload_files - cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files - mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/ - find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days - tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs - failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) - timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) - if [ ${timeout_models} -ne 0 ];then - TIMEOUT_MODELS="$( - grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log - )" - echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}" - fi - if [ ${failed_models} -ne 0 ];then - grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1 - exit 1 - fi - - name: Upload Inductor XPU E2E Data - if: ${{ ! cancelled() }} - uses: actions/upload-artifact@v4 - with: - name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }} - path: ${{ github.workspace }}/upload_files - - Tests-Failure-And-Report: - if: ${{ ! cancelled() }} - runs-on: [ self-hosted, Linux ] - permissions: - issues: write - env: - GH_TOKEN: ${{ github.token }} - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - needs: Linux-Nightly-Ondemand-E2E-Tests - steps: - - name: Report github issue for XPU OPS nightly - if: github.repository_owner == 'intel' - run: | - set -xe - # Test env - build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - repo="${{ github.repository }}" - TORCH_BRANCH_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCH_BRANCH_ID }}" - TORCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCH_COMMIT_ID }}" - DRIVER_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.DRIVER_VERSION }}" - KERNEL_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.KERNEL_VERSION }}" - BUNDLE_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.BUNDLE_VERSION }}" - OS_PRETTY_NAME="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.OS_PRETTY_NAME }}" - GCC_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.GCC_VERSION }}" - TORCHBENCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHBENCH_COMMIT_ID }}" - TORCHVISION_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHVISION_COMMIT_ID }}" - TORCHAUDIO_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TORCHAUDIO_COMMIT_ID }}" - TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRANSFORMERS_VERSION }}" - TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMM_COMMIT_ID }}" - TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRITON_COMMIT_ID }}" - TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMEOUT_MODELS }}" - # Test status - if [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests.result }}" == "success" ];then - test_status=Success - elif [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests.result }}" == "failure" ];then - test_status=Failure - cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}" - else - test_status=None - exit 0 - fi - # Test Type - if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then - test_type="On-demand" - test_issue_id=426 - cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}" - elif [ "${{ github.event.schedule }}" == "0 16 * * 5" ];then - test_type="Weekly" - test_issue_id=432 - else - test_type="Nightly" - test_issue_id=432 - fi - # Test report - echo -e "**${test_status}** $test_type Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt - printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} | " >> ${{ github.workspace }}/report.txt - printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt - echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt - printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt - printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt - printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt - printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt - printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt - echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt - printf "Device | OS | GCC | Python | Driver(DKMS) | Kernel | Bundle(DPCPP)\n--- | --- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt - echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION | $KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt - if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then - test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.model }}" != "" ];then - test_scope+="; model=${{ inputs.model }}" - fi - echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt - fi - echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt - echo "$cc_comment" >> ${{ github.workspace }}/report.txt - # Report - report_txt=$(cat ${{ github.workspace }}/report.txt) - gh --repo $repo issue comment $test_issue_id --body "$report_txt" + printenv