Skip to content

Commit

Permalink
chore(ci): refacto erc20 gpu bench workflows to reduce duplicates
Browse files Browse the repository at this point in the history
 Now there is only one entry point to trigger ERC20 benchmarks manually.
 This entry point uses a sub-workflow responsible for provisioning and running the benchmarks.

 A weekly workflow is also created with all the targets needed.
  • Loading branch information
soonum committed Nov 22, 2024
1 parent c3def17 commit 8c2358a
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 397 deletions.
212 changes: 29 additions & 183 deletions .github/workflows/benchmark_gpu_erc20.yml
Original file line number Diff line number Diff line change
@@ -1,195 +1,41 @@
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
name: ERC20 GPU H100 benchmarks
# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
name: Cuda ERC20 benchmarks

on:
workflow_dispatch:
schedule:
# Weekly benchmarks will be triggered each Saturday at 5a.m.
- cron: '0 5 * * 6'

env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
inputs:
profile:
description: "Instance type"
required: true
type: choice
options:
- "l40 (n3-L40x1)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "multi-h100 (n3-H100x8)"

jobs:
setup-instance:
name: Setup instance (cuda-erc20-benchmarks)
parse-inputs:
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: single-h100

cuda-erc20-benchmarks:
name: Execute GPU integer benchmarks
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Get benchmark details
run: |
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
- name: Set up home
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
with:
toolchain: nightly

- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi

- name: Run benchmarks
run: |
make bench_hlapi_erc20_gpu
- name: Parse results
- name: Parse profile
id: parse_profile
run: |
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
--database tfhe_rs \
--hardware "n3-H100x1" \
--backend gpu \
--project-version "${{ env.COMMIT_HASH }}" \
--branch ${{ github.ref_name }} \
--commit-date "${{ env.COMMIT_DATE }}" \
--bench-date "${{ env.BENCH_DATE }}" \
--walk-subdirs \
--name-suffix avx512
- name: Parse PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
--object-sizes \
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}
echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}"
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Send data to Slab
shell: bash
- name: Parse hardware name
id: parse_hardware_name
run: |
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
slack-notify:
name: Slack Notification
needs: [ setup-instance, cuda-erc20-benchmarks ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Send message
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance (cuda-erc20-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}"
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
run-benchmarks:
name: Run benchmarks
needs: parse-inputs
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
with:
profile: ${{ needs.parse-inputs.outputs.profile }}
hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
secrets: inherit
Loading

0 comments on commit 8c2358a

Please sign in to comment.