From f3968373c1292dc0eed14212276d4a4f5c8ae96a Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 3 Jan 2025 17:45:41 +0000 Subject: [PATCH] Migrate the rest of CUDA 12.1 jobs to 12.4 (#144118) CUDA 12.4 is the default now and we don't build nightly 12.1 anymore, so it's time to move the rest of CI jobs to 12.4. I also clean up some redundant CI jobs on periodic and inductor-periodic. Pull Request resolved: https://github.com/pytorch/pytorch/pull/144118 Approved by: https://github.com/atalman --- .ci/docker/build.sh | 28 ------- .../workflows/inductor-micro-benchmark.yml | 20 ++--- .github/workflows/inductor-perf-compare.yml | 20 ++--- .../workflows/inductor-perf-test-nightly.yml | 44 +++++----- .github/workflows/inductor-periodic.yml | 82 +++++-------------- .github/workflows/periodic.yml | 76 +++++------------ 6 files changed, 84 insertions(+), 186 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 93b645f04b9292..6e6523a636a600 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -208,20 +208,6 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; - pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9) - CUDA_VERSION=12.4.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.10 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - ;; pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9) CUDA_VERSION=12.1.1 CUDNN_VERSION=9 @@ -236,20 +222,6 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; - pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9) - CUDA_VERSION=12.4.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.10 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - ;; pytorch-linux-focal-py3-clang10-onnx) ANACONDA_PYTHON_VERSION=3.9 CLANG_VERSION=10 diff --git a/.github/workflows/inductor-micro-benchmark.yml b/.github/workflows/inductor-micro-benchmark.yml index b1d2511a7cd6e8..2079aea810b185 100644 --- a/.github/workflows/inductor-micro-benchmark.yml +++ b/.github/workflows/inductor-micro-benchmark.yml @@ -37,16 +37,16 @@ jobs: curr_ref_type: ${{ github.ref_type }} check_experiments: "awsa100" - linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: - get-default-label-prefix - get-a100-test-label-type with: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' test-matrix: | { include: [ @@ -54,14 +54,14 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-test: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-test: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha timeout-minutes: 720 secrets: inherit diff --git a/.github/workflows/inductor-perf-compare.yml b/.github/workflows/inductor-perf-compare.yml index 8b341d2c44dbdc..37249bf10f2f55 100644 --- a/.github/workflows/inductor-perf-compare.yml +++ b/.github/workflows/inductor-perf-compare.yml @@ -34,16 +34,16 @@ jobs: curr_ref_type: ${{ github.ref_type }} check_experiments: "awsa100" - linux-focal-cuda12_1-py3_10-gcc9-inductor-build: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-build: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: - get-default-label-prefix - get-test-label-type with: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' test-matrix: | { include: [ @@ -54,14 +54,14 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-test: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-test: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha # disable monitor in perf tests for more investigation disable-monitor: true diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml index 94f642ae2f5324..68632f217de569 100644 --- a/.github/workflows/inductor-perf-test-nightly.yml +++ b/.github/workflows/inductor-perf-test-nightly.yml @@ -77,14 +77,14 @@ jobs: curr_ref_type: ${{ github.ref_type }} # NB: Keep this in sync with trunk.yml - linux-focal-cuda12_1-py3_10-gcc9-inductor-build: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-build: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' test-matrix: | { include: [ @@ -104,48 +104,48 @@ jobs: selected-test-configs: ${{ inputs.benchmark_configs }} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-test-nightly: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-test-nightly: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build if: github.event.schedule == '0 7 * * 1-6' with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha timeout-minutes: 720 # disable monitor in perf tests for more investigation disable-monitor: true secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-test-weekly: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-test-weekly: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build if: github.event.schedule == '0 7 * * 0' with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha timeout-minutes: 1440 # disable monitor in perf tests for more investigation disable-monitor: true secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-test: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-test: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-false-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }} - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha timeout-minutes: 720 # disable monitor in perf tests for more investigation diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index 402cff71df9f2d..3d6b5c43f8a835 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -39,14 +39,14 @@ jobs: curr_ref_type: ${{ github.ref_type }} check_experiments: "awsa100" - linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build: - name: cuda12.1-py3.10-gcc9-sm86-periodic-dynamo-benchmarks + linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build: + name: cuda12.4-py3.10-gcc9-sm86-periodic-dynamo-benchmarks uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.6' test-matrix: | { include: [ @@ -68,26 +68,26 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-test: - name: cuda12.1-py3.10-gcc9-sm86-periodic-dynamo-benchmarks + linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-test: + name: cuda12.4-py3.10-gcc9-sm86-periodic-dynamo-benchmarks uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build + needs: linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: - get-default-label-prefix - get-a100-test-label-type with: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' test-matrix: | { include: [ @@ -95,14 +95,14 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-test-gcp: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-test-gcp: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }} use-gha: anything-non-empty-to-use-gha # disable monitor in smoke perf tests for more investigation disable-monitor: true @@ -143,52 +143,16 @@ jobs: secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-inductor-build: - name: cuda12.1-py3.10-gcc9-sm86 - uses: ./.github/workflows/_linux-build.yml - needs: get-default-label-prefix - with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks - cuda-arch-list: '8.6' - runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - sync-tag: linux-focal-cuda12_1-py3_10-gcc9-inductor-build - test-matrix: | - { include: [ - { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - ]} - secrets: inherit - - linux-focal-cuda12_1-py3_10-gcc9-inductor-test: - name: cuda12.1-py3.10-gcc9-sm86 - uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build - with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }} - secrets: inherit - linux-focal-cuda12_4-py3_10-gcc9-inductor-build: - # Should be synced with the benchmark tests in inductor.yml, but this doesn't run inductor_timm name: cuda12.4-py3.10-gcc9-sm86 uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.6' + runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" + sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build test-matrix: | { include: [ { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, @@ -209,13 +173,11 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build with: - sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} secrets: inherit - linux-jammy-cpu-py3_9-gcc11-inductor-build: name: linux-jammy-cpu-py3.9-gcc11-inductor uses: ./.github/workflows/_linux-build.yml diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index 4310037d7d3f77..f3d7cf95745d28 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -49,36 +49,6 @@ jobs: curr_branch: ${{ github.head_ref || github.ref_name }} curr_ref_type: ${{ github.ref_type }} - linux-focal-cuda12_1-py3_10-gcc9-build: - name: linux-focal-cuda12.1-py3.10-gcc9 - uses: ./.github/workflows/_linux-build.yml - needs: get-label-type - with: - runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 - test-matrix: | - { include: [ - { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, - { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, - { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, - { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, - { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - ]} - secrets: inherit - - linux-focal-cuda12_1-py3_10-gcc9-test: - name: linux-focal-cuda12.1-py3.10-gcc9 - uses: ./.github/workflows/_linux-test.yml - needs: - - linux-focal-cuda12_1-py3_10-gcc9-build - - target-determination - with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }} - secrets: inherit - linux-focal-cuda12_4-py3_10-gcc9-build: name: linux-focal-cuda12.4-py3.10-gcc9 uses: ./.github/workflows/_linux-build.yml @@ -89,11 +59,6 @@ jobs: docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9 test-matrix: | { include: [ - { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, @@ -109,7 +74,6 @@ jobs: - linux-focal-cuda12_4-py3_10-gcc9-build - target-determination with: - timeout-minutes: 360 build-environment: linux-focal-cuda12.4-py3.10-gcc9 docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }} test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }} @@ -206,16 +170,16 @@ jobs: test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build: - name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build + linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build: + name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build uses: ./.github/workflows/_linux-build.yml needs: get-label-type if: false # See https://github.com/pytorch/pytorch/issues/138750 with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" use_split_build: true - build-environment: linux-focal-cuda12.1-py3.10-gcc9 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 + build-environment: linux-focal-cuda12.4-py3.10-gcc9 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9 test-matrix: | { include: [ { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, @@ -226,16 +190,16 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build-test: - name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build + linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build-test: + name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build uses: ./.github/workflows/_linux-test.yml needs: - - linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build + - linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build - target-determination with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.test-matrix }} secrets: inherit @@ -301,14 +265,14 @@ jobs: test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }} secrets: inherit - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build: - name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build: + name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 + build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9 cuda-arch-list: 8.6 test-matrix: | { include: [ @@ -323,16 +287,16 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test: - name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-test: + name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck uses: ./.github/workflows/_linux-test.yml needs: - - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build + - linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build - target-determination with: - build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck + docker-image: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }} timeout-minutes: 300 secrets: inherit