From 8dd417dc28829dee4822010fe7fc03b1970e1f19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Thu, 21 Nov 2024 13:31:26 +0100 Subject: [PATCH] chore(ci): refacto gpu bench workflows to reduce duplicates Now there is only one entry point to trigger benchmarks manually. This entry point uses a sub-workflow responsible for provisioning and running the benchmarks. A weekly workflow is also created with all the targets needed. This also adds the possibility to run throughput benchmarks on-demand. --- .github/workflows/benchmark_gpu_integer.yml | 271 +++++------------- .../benchmark_gpu_integer_2H100_full.yml | 206 ------------- ...l.yml => benchmark_gpu_integer_common.yml} | 157 +++++++--- .../benchmark_gpu_integer_multi_bit.yml | 234 --------------- ...chmark_gpu_integer_multi_bit_multi_gpu.yml | 224 --------------- .../benchmark_gpu_integer_multi_gpu_full.yml | 206 ------------- .../benchmark_gpu_integer_weekly.yml | 92 ++++++ .github/workflows/benchmark_gpu_l40.yml | 205 ------------- 8 files changed, 281 insertions(+), 1314 deletions(-) delete mode 100644 .github/workflows/benchmark_gpu_integer_2H100_full.yml rename .github/workflows/{benchmark_gpu_integer_full.yml => benchmark_gpu_integer_common.yml} (57%) delete mode 100644 .github/workflows/benchmark_gpu_integer_multi_bit.yml delete mode 100644 .github/workflows/benchmark_gpu_integer_multi_bit_multi_gpu.yml delete mode 100644 .github/workflows/benchmark_gpu_integer_multi_gpu_full.yml create mode 100644 .github/workflows/benchmark_gpu_integer_weekly.yml delete mode 100644 .github/workflows/benchmark_gpu_l40.yml diff --git a/.github/workflows/benchmark_gpu_integer.yml b/.github/workflows/benchmark_gpu_integer.yml index 477036bd5a..3a436353a7 100644 --- a/.github/workflows/benchmark_gpu_integer.yml +++ b/.github/workflows/benchmark_gpu_integer.yml @@ -1,213 +1,86 @@ -# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: Integer GPU benchmarks +# Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot. +name: Cuda benchmarks on: workflow_dispatch: inputs: - run_throughput: - description: "Run throughput benchmarks" + profile: + description: "Instance type" + required: true + type: choice + options: + - "l40 (n3-L40x1)" + - "single-h100 (n3-H100x1)" + - "2-h100 (n3-H100x2)" + - "4-h100 (n3-H100x4)" + - "multi-h100 (n3-H100x8)" + - "multi-h100-nvlink (n3-H100x8-NVLink)" + - "multi-a100-nvlink (n3-A100x8-NVLink)" + command: + description: "Benchmark command to run" + type: choice + default: integer_multi_bit + options: + - integer + - integer_multi_bit + - integer_compression + - pbs + - ks + op_flavor: + description: "Operations set to run" + type: choice + default: default + options: + - default + - fast_default + - unchecked + all_precisions: + description: "Run all precisions" type: boolean default: false - - push: - branches: - - main - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - BENCH_TYPE: latency + bench_type: + description: "Benchmarks type" + type: choice + default: latency + options: + - latency + - throughput + - both jobs: - setup-instance: - name: Setup instance (cuda-integer-benchmarks) + parse-inputs: runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' || - (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') outputs: - runner-name: ${{ steps.start-instance.outputs.label }} + profile: ${{ steps.parse_profile.outputs.profile }} + hardware_name: ${{ steps.parse_hardware_name.outputs.name }} steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: single-h100 - - cuda-integer-benchmarks: - name: Execute GPU integer benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Should run throughput benchmarks - if: inputs.run_throughput + - name: Parse profile + id: parse_profile run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" + echo "profile=$(echo \"${{ inputs.profile }}\" | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}" - - name: Run benchmarks with AVX512 + - name: Parse hardware name + id: parse_hardware_name run: | - make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu - - - name: Parse benchmarks to csv - run: | - make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \ - parse_integer_benches - - - name: Upload csv results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_csv_integer - path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x1" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_integer - path: ${{ env.RESULTS_FILENAME }} - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-integer-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-integer-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-integer-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-integer-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + echo "name=$(echo \"${{ inputs.profile }}\" | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}" + + run-benchmarks: + name: Run benchmarks + needs: parse-inputs + uses: ./.github/workflows/benchmark_gpu_integer_common.yml + with: + profile: ${{ needs.parse-inputs.outputs.profile }} + hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }} + command: ${{ inputs.command }} + op_flavor: ${{ inputs.op_flavor }} + bench_type: ${{ inputs.bench_type }} + all_precisions: ${{ inputs.all_precisions }} + secrets: + FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + SLAB_URL: ${{ secrets.SLAB_URL }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} diff --git a/.github/workflows/benchmark_gpu_integer_2H100_full.yml b/.github/workflows/benchmark_gpu_integer_2H100_full.yml deleted file mode 100644 index 6b2ab44c2d..0000000000 --- a/.github/workflows/benchmark_gpu_integer_2H100_full.yml +++ /dev/null @@ -1,206 +0,0 @@ -# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: Integer 2xH100 benchmarks - -on: - workflow_dispatch: - inputs: - run_throughput: - description: "Run throughput benchmarks" - type: boolean - default: false - - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - BENCH_TYPE: latency - -jobs: - setup-instance: - name: Setup instance (cuda-integer-full-2-gpu-benchmarks) - runs-on: ubuntu-latest - if: github.event_name != 'schedule' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: 2-h100 - - cuda-integer-full-2-gpu-benchmarks: - name: Execute 2xH100 integer benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - timeout-minutes: 1440 # 24 hours - continue-on-error: true - strategy: - fail-fast: false - max-parallel: 1 - matrix: - command: [integer_multi_bit] - op_flavor: [default] - # explicit include-based build matrix, of known valid options - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Should run throughput benchmarks - if: inputs.run_throughput - run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" - - - name: Run benchmarks with AVX512 - run: | - make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x2" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }} - path: ${{ env.RESULTS_FILENAME }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-integer-full-2-gpu-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-full-2-gpu-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-integer-full-2-gpu-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU 2xH100 benchmarks finished with status: ${{ needs.cuda-integer-full-2-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-integer-full-2-gpu-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-full-2-gpu-benchmarks ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-full-2-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_integer_full.yml b/.github/workflows/benchmark_gpu_integer_common.yml similarity index 57% rename from .github/workflows/benchmark_gpu_integer_full.yml rename to .github/workflows/benchmark_gpu_integer_common.yml index 437de7adc2..22deb61419 100644 --- a/.github/workflows/benchmark_gpu_integer_full.yml +++ b/.github/workflows/benchmark_gpu_integer_common.yml @@ -1,17 +1,47 @@ -# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: Integer GPU full benchmarks +# Run integer benchmarks on CUDA instance and return parsed results to Slab CI bot. +name: Cuda benchmarks common on: - workflow_dispatch: + workflow_call: inputs: - run_throughput: - description: "Run throughput benchmarks" + backend: + type: string + default: hyperstack + profile: + type: string + required: true + hardware_name: + type: string + required: true + command: # Use a comma separated values to generate an array + type: string + required: true + op_flavor: # Use a comma separated values to generate an array + type: string + required: true + bench_type: + type: string + default: latency + all_precisions: type: boolean default: false - - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' + secrets: + FHE_ACTIONS_TOKEN: + required: true + SLAB_ACTION_TOKEN: + required: true + SLAB_BASE_URL: + required: true + SLAB_URL: + required: true + JOB_SECRET: + required: true + SLACK_CHANNEL: + required: true + BOT_USERNAME: + required: true + SLACK_WEBHOOK: + required: true env: CARGO_TERM_COLOR: always @@ -23,14 +53,67 @@ env: SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - BENCH_TYPE: latency + FAST_BENCH: TRUE jobs: + prepare-matrix: + name: Prepare operations matrix + runs-on: ubuntu-latest + outputs: + command: ${{ steps.set_command.outputs.command }} + op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }} + bench_type: ${{ steps.set_bench_type.outputs.bench_type }} + steps: + - name: Set single command + if: ${{ !contains(inputs.command, ',')}} + run: | + echo "COMMAND=[\"${{ inputs.command }}\"]" >> "${GITHUB_ENV}" + + - name: Set multiple commands + if: ${{ contains(inputs.command, ',')}} + run: | + PARSED_COMMAND=$(echo "${{ inputs.command }}" | sed 's/[[:space:]]*,[[:space:]]*/\\", \\"/g') + echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}" + + - name: Set single operations flavor + if: ${{ !contains(inputs.op_flavor, ',')}} + run: | + echo "OP_FLAVOR=[\"${{ inputs.op_flavor }}\"]" >> "${GITHUB_ENV}" + + - name: Set multiple operations flavors + if: ${{ contains(inputs.op_flavor, ',')}} + run: | + PARSED_OP_FLAVOR=$(echo "${{ inputs.op_flavor }}" | sed 's/[[:space:]]*,[[:space:]]*/\\", \\"/g') + echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}" + + - name: Set benchmark types + run: | + echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}" + if [[ "${{ inputs.bench_type }}" == "both" ]]; then + echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}" + else + echo "BENCH_TYPE=[\"${{ inputs.bench_type }}\"]" >> "${GITHUB_ENV}" + fi + + - name: Set command output + id: set_command + run: | + echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}" + + - name: Set operation flavor output + id: set_op_flavor + run: | + echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}" + + - name: Set benchmark types output + id: set_bench_type + run: | + echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}" + setup-instance: - name: Setup instance (cuda-integer-full-benchmarks) + name: Setup instance (cuda-${{ inputs.profile }}-benchmarks) + needs: prepare-matrix runs-on: ubuntu-latest - if: github.event_name != 'schedule' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') outputs: runner-name: ${{ steps.start-instance.outputs.label }} steps: @@ -42,12 +125,12 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: single-h100 + backend: ${{ inputs.backend }} + profile: ${{ inputs.profile }} - cuda-integer-full-benchmarks: - name: Execute GPU integer benchmarks for all operations flavor - needs: setup-instance + cuda-benchmarks: + name: Cuda benchmarks (${{ inputs.profile }}) + needs: [ prepare-matrix, setup-instance ] runs-on: ${{ needs.setup-instance.outputs.runner-name }} timeout-minutes: 1440 # 24 hours continue-on-error: true @@ -55,8 +138,9 @@ jobs: fail-fast: false max-parallel: 1 matrix: - command: [integer, integer_multi_bit] - op_flavor: [default] + command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }} + op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }} + bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }} # explicit include-based build matrix, of known valid options include: - os: ubuntu-22.04 @@ -133,34 +217,27 @@ jobs: if: ${{ !cancelled() }} run: nvidia-smi - - name: Should run throughput benchmarks - if: inputs.run_throughput + - name: Should run benchmarks with all precisions + if: inputs.all_precisions run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" + echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}" - - name: Run benchmarks with AVX512 + - name: Run benchmarks run: | make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu - # Run these benchmarks only once - - name: Run compression benchmarks with AVX512 - if: matrix.op_flavor == 'default' && matrix.command == 'integer' - run: | - make bench_integer_compression_gpu - - name: Parse results run: | python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware "n3-H100x1" \ + --hardware "${{ inputs.hardware_name }}" \ --backend gpu \ --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ --commit-date "${{ env.COMMIT_DATE }}" \ --bench-date "${{ env.BENCH_DATE }}" \ --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} + --name-suffix avx512 - name: Upload parsed results artifact uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 @@ -176,21 +253,21 @@ jobs: slack-notify: name: Slack Notification - needs: [ setup-instance, cuda-integer-full-benchmarks ] + needs: [ setup-instance, cuda-benchmarks ] runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-full-benchmarks.result != 'skipped' && failure() }} + if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }} continue-on-error: true steps: - name: Send message uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 env: - SLACK_COLOR: ${{ needs.cuda-integer-full-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" + SLACK_COLOR: ${{ needs.cuda-benchmarks.result }} + SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" teardown-instance: - name: Teardown instance (cuda-integer-full-benchmarks) + name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks) if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-full-benchmarks, slack-notify ] + needs: [ setup-instance, cuda-benchmarks, slack-notify ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -209,4 +286,4 @@ jobs: uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-full-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_integer_multi_bit.yml b/.github/workflows/benchmark_gpu_integer_multi_bit.yml deleted file mode 100644 index 78077712da..0000000000 --- a/.github/workflows/benchmark_gpu_integer_multi_bit.yml +++ /dev/null @@ -1,234 +0,0 @@ -# Run integer benchmarks with multi-bit cryptographic parameters on an instance and return parsed results to Slab CI bot. -name: Integer GPU Multi-bit benchmarks - -on: - workflow_dispatch: - inputs: - all_precisions: - description: "Run all precisions" - type: boolean - default: false - fast_default: - description: "Run only deduplicated default operations without scalar variants" - type: boolean - default: false - run_throughput: - description: "Run throughput benchmarks" - type: boolean - default: false - - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - FAST_BENCH: TRUE - BENCH_OP_FLAVOR: default - BENCH_TYPE: latency - -jobs: - setup-instance: - name: Setup instance (cuda-integer-multi-bit-benchmarks) - runs-on: ubuntu-latest - if: github.event_name != 'schedule' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: single-h100 - - cuda-integer-multi-bit-benchmarks: - name: Execute GPU integer multi-bit benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - timeout-minutes: 1440 # 24 hours - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Should run benchmarks with all precisions - if: inputs.all_precisions - run: | - echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}" - - - name: Should run fast subset benchmarks - if: inputs.fast_default - run: | - echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}" - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Should run throughput benchmarks - if: inputs.run_throughput - run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" - - - name: Run multi-bit benchmarks with AVX512 - run: | - make bench_unsigned_integer_multi_bit_gpu - - - name: Parse benchmarks to csv - run: | - make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \ - parse_integer_benches - - - name: Upload csv results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_csv_integer - path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x1" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_integer - path: ${{ env.RESULTS_FILENAME }} - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-integer-multi-bit-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-multi-bit-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-integer-full-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-multi-bit-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_integer_multi_bit_multi_gpu.yml b/.github/workflows/benchmark_gpu_integer_multi_bit_multi_gpu.yml deleted file mode 100644 index 94a6f1addb..0000000000 --- a/.github/workflows/benchmark_gpu_integer_multi_bit_multi_gpu.yml +++ /dev/null @@ -1,224 +0,0 @@ -# Run 64-bit multi-bit integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: Integer multi GPU Multi-bit benchmarks - -on: - workflow_dispatch: - inputs: - all_precisions: - description: "Run all precisions" - type: boolean - default: false - fast_default: - description: "Run only deduplicated default operations without scalar variants" - type: boolean - default: false - run_throughput: - description: "Run throughput benchmarks" - type: boolean - default: false - - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - FAST_BENCH: TRUE - BENCH_OP_FLAVOR: default - BENCH_TYPE: latency - -jobs: - setup-instance: - name: Setup instance (cuda-integer-multi-bit-multi-gpu-benchmarks) - runs-on: ubuntu-latest - if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || - github.event_name == 'workflow_dispatch' }} - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: multi-h100 - - cuda-integer-multi-bit-multi-gpu-benchmarks: - name: Execute multi GPU integer multi-bit benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - timeout-minutes: 1440 # 24 hours - continue-on-error: true - strategy: - fail-fast: false - max-parallel: 1 - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Should run benchmarks with all precisions - if: inputs.all_precisions - run: | - echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}" - - - name: Should run fast subset benchmarks - if: inputs.fast_default - run: | - echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}" - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Should run throughput benchmarks - if: inputs.run_throughput - run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" - - - name: Run multi-bit benchmarks with AVX512 - run: | - make bench_unsigned_integer_multi_bit_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x8" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_integer - path: ${{ env.RESULTS_FILENAME }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }} - SLACK_MESSAGE: "Integer multi GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-integer-multi-bit-multi-gpu-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_integer_multi_gpu_full.yml b/.github/workflows/benchmark_gpu_integer_multi_gpu_full.yml deleted file mode 100644 index 80aefdc5c8..0000000000 --- a/.github/workflows/benchmark_gpu_integer_multi_gpu_full.yml +++ /dev/null @@ -1,206 +0,0 @@ -# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: Integer multi GPU full benchmarks - -on: - workflow_dispatch: - inputs: - run_throughput: - description: "Run throughput benchmarks" - type: boolean - default: false - - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - BENCH_TYPE: latency - -jobs: - setup-instance: - name: Setup instance (cuda-integer-full-multi-gpu-benchmarks) - runs-on: ubuntu-latest - if: github.event_name != 'schedule' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: multi-h100 - - cuda-integer-full-multi-gpu-benchmarks: - name: Execute multi GPU integer benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - timeout-minutes: 1440 # 24 hours - continue-on-error: true - strategy: - fail-fast: false - max-parallel: 1 - matrix: - command: [integer_multi_bit] - op_flavor: [default] - # explicit include-based build matrix, of known valid options - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Should run throughput benchmarks - if: inputs.run_throughput - run: | - echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}" - - - name: Run benchmarks with AVX512 - run: | - make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-H100x8" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 \ - --bench-type ${{ env.BENCH_TYPE }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }} - path: ${{ env.RESULTS_FILENAME }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-integer-full-multi-gpu-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }} - SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-integer-full-multi-gpu-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-integer-full-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_integer_weekly.yml b/.github/workflows/benchmark_gpu_integer_weekly.yml new file mode 100644 index 0000000000..00a9e74496 --- /dev/null +++ b/.github/workflows/benchmark_gpu_integer_weekly.yml @@ -0,0 +1,92 @@ +# Run CUDA benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot. +name: Cuda weekly benchmarks + +on: + schedule: + # Weekly benchmarks will be triggered each Saturday at 1a.m. + - cron: '0 1 * * 6' + +jobs: + run-benchmarks-1-h100: + name: Run benchmarks (1xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_integer_common.yml + with: + profile: single-h100 + hardware_name: n3-H100x1 + command: integer,integer_multi_bit + op_flavor: default + bench_type: latency + all_precisions: true + secrets: + FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + SLAB_URL: ${{ secrets.SLAB_URL }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + + run-benchmarks-2-h100: + name: Run benchmarks (2xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_integer_common.yml + with: + profile: 2-h100 + hardware_name: n3-H100x2 + command: integer_multi_bit + op_flavor: default + bench_type: latency + all_precisions: true + secrets: + FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + SLAB_URL: ${{ secrets.SLAB_URL }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + + run-benchmarks-8-h100: + name: Run benchmarks (8xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_integer_common.yml + with: + profile: multi-h100 + hardware_name: n3-H100x8 + command: integer_multi_bit + op_flavor: default + bench_type: latency + all_precisions: true + secrets: + FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + SLAB_URL: ${{ secrets.SLAB_URL }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + + run-benchmarks-l40: + name: Run benchmarks (L40) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_integer_common.yml + with: + profile: l40 + hardware_name: n3-L40x1 + command: integer_multi_bit,integer_compression,pbs,ks + op_flavor: default + bench_type: latency + all_precisions: true + secrets: + FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + SLAB_URL: ${{ secrets.SLAB_URL }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} diff --git a/.github/workflows/benchmark_gpu_l40.yml b/.github/workflows/benchmark_gpu_l40.yml deleted file mode 100644 index 6d9c65e4dd..0000000000 --- a/.github/workflows/benchmark_gpu_l40.yml +++ /dev/null @@ -1,205 +0,0 @@ -# Run benchmarks on an L40 VM and return parsed results to Slab CI bot. -name: Cuda benchmarks (L40) - -on: - workflow_dispatch: - schedule: - # Weekly benchmarks will be triggered each Saturday at 1a.m. - - cron: '0 1 * * 6' - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - -jobs: - setup-instance: - name: Setup instance (cuda-l40-benchmarks) - runs-on: ubuntu-latest - if: github.event_name != 'schedule' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - runner-name: ${{ steps.start-instance.outputs.label }} - steps: - - name: Start instance - id: start-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: hyperstack - profile: l40 - - cuda-l40-benchmarks: - name: Cuda benchmarks (L40) - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - timeout-minutes: 1440 # 24 hours - continue-on-error: true - strategy: - fail-fast: false - max-parallel: 1 - matrix: - command: [integer_multi_bit] - op_flavor: [default] - # explicit include-based build matrix, of known valid options - include: - - os: ubuntu-22.04 - cuda: "12.2" - gcc: 11 - env: - CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} - CMAKE_VERSION: 3.29.6 - steps: - # Mandatory on hyperstack since a bootable volume is not re-usable yet. - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev - wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz - tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz - cd cmake-${{ env.CMAKE_VERSION }} - ./bootstrap - make -j"$(nproc)" - sudo make install - - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Get benchmark details - run: | - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - - - name: Set up home - # "Install rust" step require root user to have a HOME directory which is not set. - run: | - echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" - - - name: Install rust - uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a - with: - toolchain: nightly - - - name: Export CUDA variables - if: ${{ !cancelled() }} - run: | - { - echo "CUDA_PATH=$CUDA_PATH"; - echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; - echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; - } >> "${GITHUB_ENV}" - echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" - - # Specify the correct host compilers - - name: Export gcc and g++ variables - if: ${{ !cancelled() }} - run: | - { - echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; - } >> "${GITHUB_ENV}" - - - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - repository: zama-ai/slab - path: slab - token: ${{ secrets.FHE_ACTIONS_TOKEN }} - - - name: Check device is detected - if: ${{ !cancelled() }} - run: nvidia-smi - - - name: Run benchmarks with AVX512 - run: | - make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu - - - name: Run compression benchmarks with AVX512 - run: | - make bench_integer_compression_gpu - - - name: Run PBS benchmarks - run: | - make bench_pbs_gpu - - - name: Run KS benchmarks - run: | - make bench_ks_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ - --database tfhe_rs \ - --hardware "n3-L40x1" \ - --backend gpu \ - --project-version "${{ env.COMMIT_HASH }}" \ - --branch ${{ github.ref_name }} \ - --commit-date "${{ env.COMMIT_DATE }}" \ - --bench-date "${{ env.BENCH_DATE }}" \ - --walk-subdirs \ - --name-suffix avx512 - - - name: Upload parsed results artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 - with: - name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }} - path: ${{ env.RESULTS_FILENAME }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ - --slab-url "${{ secrets.SLAB_URL }}" - - slack-notify: - name: Slack Notification - needs: [ setup-instance, cuda-l40-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-l40-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ needs.cuda-l40-benchmarks.result }} - SLACK_MESSAGE: "Cuda benchmarks (L40) finished with status: ${{ needs.cuda-l40-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: Teardown instance (cuda-l40-benchmarks) - if: ${{ always() && needs.setup-instance.result != 'skipped' }} - needs: [ setup-instance, cuda-l40-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8 - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - continue-on-error: true - uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-l40-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"