From f3968373c1292dc0eed14212276d4a4f5c8ae96a Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 3 Jan 2025 17:45:41 +0000
Subject: [PATCH] Migrate the rest of CUDA 12.1 jobs to 12.4 (#144118)

CUDA 12.4 is the default now and we don't build nightly 12.1 anymore, so it's time to move the rest of CI jobs to 12.4.  I also clean up some redundant CI jobs on periodic and inductor-periodic.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/144118
Approved by: https://github.com/atalman
---
 .ci/docker/build.sh                           | 28 -------
 .../workflows/inductor-micro-benchmark.yml    | 20 ++---
 .github/workflows/inductor-perf-compare.yml   | 20 ++---
 .../workflows/inductor-perf-test-nightly.yml  | 44 +++++-----
 .github/workflows/inductor-periodic.yml       | 82 +++++--------------
 .github/workflows/periodic.yml                | 76 +++++------------
 6 files changed, 84 insertions(+), 186 deletions(-)

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index 93b645f04b9292..6e6523a636a600 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -208,20 +208,6 @@ case "$image" in
     CONDA_CMAKE=yes
     TRITON=yes
     ;;
-  pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.4.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
   pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
     CUDA_VERSION=12.1.1
     CUDNN_VERSION=9
@@ -236,20 +222,6 @@ case "$image" in
     CONDA_CMAKE=yes
     TRITON=yes
     ;;
-  pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.4.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
   pytorch-linux-focal-py3-clang10-onnx)
     ANACONDA_PYTHON_VERSION=3.9
     CLANG_VERSION=10
diff --git a/.github/workflows/inductor-micro-benchmark.yml b/.github/workflows/inductor-micro-benchmark.yml
index b1d2511a7cd6e8..2079aea810b185 100644
--- a/.github/workflows/inductor-micro-benchmark.yml
+++ b/.github/workflows/inductor-micro-benchmark.yml
@@ -37,16 +37,16 @@ jobs:
       curr_ref_type: ${{ github.ref_type }}
       check_experiments: "awsa100"
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs:
       - get-default-label-prefix
       - get-a100-test-label-type
     with:
       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
       test-matrix: |
         { include: [
@@ -54,14 +54,14 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-test:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-test:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-micro-benchmark-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 720
     secrets: inherit
diff --git a/.github/workflows/inductor-perf-compare.yml b/.github/workflows/inductor-perf-compare.yml
index 8b341d2c44dbdc..37249bf10f2f55 100644
--- a/.github/workflows/inductor-perf-compare.yml
+++ b/.github/workflows/inductor-perf-compare.yml
@@ -34,16 +34,16 @@ jobs:
       curr_ref_type: ${{ github.ref_type }}
       check_experiments: "awsa100"
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs:
       - get-default-label-prefix
       - get-test-label-type
     with:
       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
       test-matrix: |
         { include: [
@@ -54,14 +54,14 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       # disable monitor in perf tests for more investigation
       disable-monitor: true
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
index 94f642ae2f5324..68632f217de569 100644
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@@ -77,14 +77,14 @@ jobs:
       curr_ref_type: ${{ github.ref_type }}
 
   # NB: Keep this in sync with trunk.yml
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs: get-label-type
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
       test-matrix: |
         { include: [
@@ -104,48 +104,48 @@ jobs:
       selected-test-configs: ${{ inputs.benchmark_configs }}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test-nightly:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-test-nightly:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
     if: github.event.schedule == '0 7 * * 1-6'
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
       dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 720
       # disable monitor in perf tests for more investigation
       disable-monitor: true
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test-weekly:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-test-weekly:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
     if: github.event.schedule == '0 7 * * 0'
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
       dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 1440
       # disable monitor in perf tests for more investigation
       disable-monitor: true
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
     if: github.event_name == 'workflow_dispatch'
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
       dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-false-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       timeout-minutes: 720
       # disable monitor in perf tests for more investigation
diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml
index 402cff71df9f2d..3d6b5c43f8a835 100644
--- a/.github/workflows/inductor-periodic.yml
+++ b/.github/workflows/inductor-periodic.yml
@@ -39,14 +39,14 @@ jobs:
       curr_ref_type: ${{ github.ref_type }}
       check_experiments: "awsa100"
 
-  linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build:
-    name: cuda12.1-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
+  linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build:
+    name: cuda12.4-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
     uses: ./.github/workflows/_linux-build.yml
     needs: get-default-label-prefix
     with:
       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.6'
       test-matrix: |
         { include: [
@@ -68,26 +68,26 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-test:
-    name: cuda12.1-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
+  linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-test:
+    name: cuda12.4-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build
+    needs: linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs:
       - get-default-label-prefix
       - get-a100-test-label-type
     with:
       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
       test-matrix: |
         { include: [
@@ -95,14 +95,14 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test-gcp:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-test-gcp:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp
+    needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha
       # disable monitor in smoke perf tests for more investigation
       disable-monitor: true
@@ -143,52 +143,16 @@ jobs:
     secrets: inherit
 
 
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
-    name: cuda12.1-py3.10-gcc9-sm86
-    uses: ./.github/workflows/_linux-build.yml
-    needs: get-default-label-prefix
-    with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
-      cuda-arch-list: '8.6'
-      runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      sync-tag: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
-      test-matrix: |
-        { include: [
-          { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-        ]}
-    secrets: inherit
-
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
-    name: cuda12.1-py3.10-gcc9-sm86
-    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
-    with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
-    secrets: inherit
-
   linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
-    # Should be synced with the benchmark tests in inductor.yml, but this doesn't run inductor_timm
     name: cuda12.4-py3.10-gcc9-sm86
     uses: ./.github/workflows/_linux-build.yml
     needs: get-default-label-prefix
     with:
-      runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
       build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
       docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.6'
+      runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
+      sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
       test-matrix: |
         { include: [
           { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
@@ -209,13 +173,11 @@ jobs:
     uses: ./.github/workflows/_linux-test.yml
     needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
     with:
-      sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test
       build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
       docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
     secrets: inherit
 
-
   linux-jammy-cpu-py3_9-gcc11-inductor-build:
     name: linux-jammy-cpu-py3.9-gcc11-inductor
     uses: ./.github/workflows/_linux-build.yml
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
index 4310037d7d3f77..f3d7cf95745d28 100644
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@@ -49,36 +49,6 @@ jobs:
       curr_branch: ${{ github.head_ref || github.ref_name }}
       curr_ref_type: ${{ github.ref_type }}
 
-  linux-focal-cuda12_1-py3_10-gcc9-build:
-    name: linux-focal-cuda12.1-py3.10-gcc9
-    uses: ./.github/workflows/_linux-build.yml
-    needs: get-label-type
-    with:
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
-      test-matrix: |
-        { include: [
-          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
-        ]}
-    secrets: inherit
-
-  linux-focal-cuda12_1-py3_10-gcc9-test:
-    name: linux-focal-cuda12.1-py3.10-gcc9
-    uses: ./.github/workflows/_linux-test.yml
-    needs:
-      - linux-focal-cuda12_1-py3_10-gcc9-build
-      - target-determination
-    with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
-    secrets: inherit
-
   linux-focal-cuda12_4-py3_10-gcc9-build:
     name: linux-focal-cuda12.4-py3.10-gcc9
     uses: ./.github/workflows/_linux-build.yml
@@ -89,11 +59,6 @@ jobs:
       docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
       test-matrix: |
         { include: [
-          { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
-          { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
-          { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
-          { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
-          { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
           { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
           { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
           { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
@@ -109,7 +74,6 @@ jobs:
       - linux-focal-cuda12_4-py3_10-gcc9-build
       - target-determination
     with:
-      timeout-minutes: 360
       build-environment: linux-focal-cuda12.4-py3.10-gcc9
       docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
@@ -206,16 +170,16 @@ jobs:
       test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build:
-    name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
+  linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:
+    name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
     uses: ./.github/workflows/_linux-build.yml
     needs: get-label-type
     if: false # See https://github.com/pytorch/pytorch/issues/138750
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
       use_split_build: true
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
       test-matrix: |
         { include: [
           { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
@@ -226,16 +190,16 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build-test:
-    name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
+  linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build-test:
+    name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
     uses: ./.github/workflows/_linux-test.yml
     needs:
-      - linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build
+      - linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build
       - target-determination
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
     secrets: inherit
 
 
@@ -301,14 +265,14 @@ jobs:
       test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build:
-    name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
+  linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build:
+    name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
     uses: ./.github/workflows/_linux-build.yml
     needs: get-label-type
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
+      build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
       cuda-arch-list: 8.6
       test-matrix: |
         { include: [
@@ -323,16 +287,16 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test:
-    name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
+  linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-test:
+    name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
     uses: ./.github/workflows/_linux-test.yml
     needs:
-      - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build
+      - linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build
       - target-determination
     with:
-      build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
       timeout-minutes: 300
     secrets: inherit