diff --git a/.github/workflows/benchmark_gpu_erc20.yml b/.github/workflows/benchmark_gpu_erc20.yml index 89e0c0f00e..3cda9ceb6e 100644 --- a/.github/workflows/benchmark_gpu_erc20.yml +++ b/.github/workflows/benchmark_gpu_erc20.yml @@ -1,195 +1,41 @@ -# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: ERC20 GPU H100 benchmarks +# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot. +name: Cuda ERC20 benchmarks on: workflow_dispatch: - schedule: - # Weekly benchmarks will be triggered each Saturday at 5a.m. - - cron: '0 5 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + inputs: + profile: + description: "Instance type" + required: true + type: choice + options: + - "l40 (n3-L40x1)" + - "single-h100 (n3-H100x1)" + - "2-h100 (n3-H100x2)" + - "multi-h100 (n3-H100x8)" jobs: - setup-instance: - name: Setup instance (cuda-erc20-benchmarks) + parse-inputs: runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: single-h100 - - cuda-erc20-benchmarks: - name: Execute GPU integer benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 + profile: ${{ steps.parse_profile.outputs.profile }} + hardware_name: ${{ steps.parse_hardware_name.outputs.name }} steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Run benchmarks - run: | - make bench_hlapi_erc20_gpu - - - name: Parse results + - name: Parse profile + id: parse_profile run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x1" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 - - - name: Parse PBS counts - run: | - python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \ - --object-sizes \ - --append-results - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_erc20 - path: ${{ env.RESULTS_FILENAME }} + echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}" - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Send data to Slab - shell: bash + - name: Parse hardware name + id: parse_hardware_name run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-erc20-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-erc20-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} + echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}" - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + run-benchmarks: + name: Run benchmarks + needs: parse-inputs + uses: ./.github/workflows/benchmark_gpu_erc20_common.yml + with: + profile: ${{ needs.parse-inputs.outputs.profile }} + hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }} + secrets: inherit diff --git a/.github/workflows/benchmark_gpu_erc20_8h100.yml b/.github/workflows/benchmark_gpu_erc20_8h100.yml deleted file mode 100644 index 2218218ec5..0000000000 --- a/.github/workflows/benchmark_gpu_erc20_8h100.yml +++ /dev/null @@ -1,195 +0,0 @@ -# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: ERC20 GPU 8xH100 benchmarks - -on: - workflow_dispatch: - schedule: - # Weekly benchmarks will be triggered each Saturday at 5a.m. - - cron: '0 5 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - -jobs: - setup-instance: - name: Setup instance (cuda-erc20-benchmarks) - runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: multi-h100 - - cuda-erc20-benchmarks: - name: Execute GPU integer benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Run benchmarks - run: | - make bench_hlapi_erc20_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x8" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 - - - name: Parse PBS counts - run: | - python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \ - --object-sizes \ - --append-results - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_erc20 - path: ${{ env.RESULTS_FILENAME }} - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-erc20-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }} - SLACK_MESSAGE: "ERC20 8xH100 benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-erc20-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_erc20_2h100.yml b/.github/workflows/benchmark_gpu_erc20_common.yml similarity index 84% rename from .github/workflows/benchmark_gpu_erc20_2h100.yml rename to .github/workflows/benchmark_gpu_erc20_common.yml index 4bb01fc8ae..079751dd8e 100644 --- a/.github/workflows/benchmark_gpu_erc20_2h100.yml +++ b/.github/workflows/benchmark_gpu_erc20_common.yml @@ -1,11 +1,35 @@ # Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: ERC20 GPU 2xH100 benchmarks +name: Cuda ERC20 benchmarks - common on: - workflow_dispatch: - schedule: - # Weekly benchmarks will be triggered each Saturday at 5a.m. - - cron: '0 5 * * 6' + workflow_call: + inputs: + backend: + type: string + default: hyperstack + profile: + type: string + required: true + hardware_name: + type: string + required: true + secrets: + FHE_ACTIONS_TOKEN: + required: true + SLAB_ACTION_TOKEN: + required: true + SLAB_BASE_URL: + required: true + SLAB_URL: + required: true + JOB_SECRET: + required: true + SLACK_CHANNEL: + required: true + BOT_USERNAME: + required: true + SLACK_WEBHOOK: + required: true env: CARGO_TERM_COLOR: always @@ -36,11 +60,11 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: 2-h100 + backend: ${{ inputs.backend }} + profile: ${{ inputs.profile }} cuda-erc20-benchmarks: - name: Execute GPU integer benchmarks + name: Cuda ERC20 benchmarks (${{ inputs.profile }}) needs: setup-instance runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: @@ -123,7 +147,7 @@ jobs: run: | python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware "n3-H100x2" \ + --hardware "${{ inputs.hardware_name }}" \ --backend gpu \ --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ @@ -132,12 +156,6 @@ jobs: --walk-subdirs \ --name-suffix avx512 - - name: Parse PBS counts - run: | - python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \ - --object-sizes \ - --append-results - - name: Upload parsed results artifact uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 with: @@ -168,10 +186,10 @@ jobs: uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 env: SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }} - SLACK_MESSAGE: "ERC20 2xH100 benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Cuda ERC20 benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" teardown-instance: - name: Teardown instance (cuda-erc20-benchmarks) + name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks) if: ${{ always() && needs.setup-instance.result != 'skipped' }} needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ] runs-on: ubuntu-latest @@ -192,4 +210,4 @@ jobs: uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (cuda-erc20-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_erc20_weekly.yml b/.github/workflows/benchmark_gpu_erc20_weekly.yml new file mode 100644 index 0000000000..a3409afb7f --- /dev/null +++ b/.github/workflows/benchmark_gpu_erc20_weekly.yml @@ -0,0 +1,35 @@ +# Run CUDA ERC20 benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot. +name: Cuda ERC20 weekly benchmarks + +on: + schedule: + # Weekly benchmarks will be triggered each Saturday at 5a.m. + - cron: '0 5 * * 6' + +jobs: + run-benchmarks-1-h100: + name: Run benchmarks (1xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_erc20_common.yml + with: + profile: single-h100 + hardware_name: n3-H100x1 + secrets: inherit + + run-benchmarks-2-h100: + name: Run benchmarks (2xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_erc20_common.yml + with: + profile: 2-h100 + hardware_name: n3-H100x2 + secrets: inherit + + run-benchmarks-8-h100: + name: Run benchmarks (8xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_erc20_common.yml + with: + profile: multi-h100 + hardware_name: n3-H100x8 + secrets: inherit diff --git a/.github/workflows/benchmark_gpu_integer_common.yml b/.github/workflows/benchmark_gpu_integer_common.yml index 89fcc84f57..cd79d3981e 100644 --- a/.github/workflows/benchmark_gpu_integer_common.yml +++ b/.github/workflows/benchmark_gpu_integer_common.yml @@ -1,5 +1,5 @@ # Run integer benchmarks on CUDA instance and return parsed results to Slab CI bot. -name: Cuda benchmarks common +name: Cuda benchmarks - common on: workflow_call: