chore(ci): refacto erc20 gpu bench workflows to reduce duplicates

Now there is only one entry point to trigger ERC20 benchmarks manually. This entry point uses a sub-workflow responsible for provisioning and running the benchmarks. A weekly workflow is also created with all the targets needed.
zama-ai · Nov 22, 2024 · 8c2358a · 8c2358a
1 parent c3def17
commit 8c2358a
Show file tree

Hide file tree

Showing 5 changed files with 101 additions and 397 deletions.
diff --git a/.github/workflows/benchmark_gpu_erc20.yml b/.github/workflows/benchmark_gpu_erc20.yml
@@ -1,195 +1,41 @@
-# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
-name: ERC20 GPU H100 benchmarks
+# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
+name: Cuda ERC20 benchmarks
 
 on:
   workflow_dispatch:
-  schedule:
-    # Weekly benchmarks will be triggered each Saturday at 5a.m.
-    - cron: '0 5 * * 6'
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+    inputs:
+      profile:
+        description: "Instance type"
+        required: true
+        type: choice
+        options:
+          - "l40 (n3-L40x1)"
+          - "single-h100 (n3-H100x1)"
+          - "2-h100 (n3-H100x2)"
+          - "multi-h100 (n3-H100x8)"
 
 jobs:
-  setup-instance:
-    name: Setup instance (cuda-erc20-benchmarks)
+  parse-inputs:
     runs-on: ubuntu-latest
-    if:  github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
     outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: hyperstack
-          profile: single-h100
-
-  cuda-erc20-benchmarks:
-    name: Execute GPU integer benchmarks
-    needs: setup-instance
-    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
+      profile: ${{ steps.parse_profile.outputs.profile }}
+      hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
     steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Get benchmark details
-        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
-
-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
-        with:
-          toolchain: nightly
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
-      - name: Run benchmarks
-        run: |
-          make bench_hlapi_erc20_gpu
-
-      - name: Parse results
+      - name: Parse profile
+        id: parse_profile
         run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "n3-H100x1" \
-          --backend gpu \
-          --project-version "${{ env.COMMIT_HASH }}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${{ env.COMMIT_DATE }}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --name-suffix avx512
-
-      - name: Parse PBS counts
-        run: |
-          python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
-          --object-sizes \
-          --append-results
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
-        with:
-          name: ${{ github.sha }}_erc20
-          path: ${{ env.RESULTS_FILENAME }}
+          echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}"
 
-      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Send data to Slab
-        shell: bash
+      - name: Parse hardware name
+        id: parse_hardware_name
         run: |
-          python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
-          --slab-url "${{ secrets.SLAB_URL }}"
-
-  slack-notify:
-    name: Slack Notification
-    needs: [ setup-instance, cuda-erc20-benchmarks ]
-    runs-on: ubuntu-latest
-    if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
-    continue-on-error: true
-    steps:
-      - name: Send message
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
-        env:
-          SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
-          SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-instance:
-    name: Teardown instance (cuda-erc20-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
-    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-instance.outputs.runner-name }}
+          echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}"
 
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+  run-benchmarks:
+    name: Run benchmarks
+    needs: parse-inputs
+    uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
+    with:
+      profile: ${{ needs.parse-inputs.outputs.profile }}
+      hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
+    secrets: inherit