-
Notifications
You must be signed in to change notification settings - Fork 155
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(gpu): add workflows for p5 bench (workflow dispatch only)
- Loading branch information
1 parent
c1fcd95
commit c1c30b7
Showing
3 changed files
with
410 additions
and
0 deletions.
There are no files selected for viewing
210 changes: 210 additions & 0 deletions
210
.github/workflows/integer_multi_bit_multi_gpu_benchmark_p5.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
# Run 64-bit multi-bit integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. | ||
name: AWS p5 integer multi GPU Multi-bit benchmarks | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
all_precisions: | ||
description: "Run all precisions" | ||
type: boolean | ||
default: false | ||
fast_default: | ||
description: "Run only deduplicated default operations without scalar variants" | ||
type: boolean | ||
default: false | ||
|
||
env: | ||
CARGO_TERM_COLOR: always | ||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json | ||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||
RUST_BACKTRACE: "full" | ||
RUST_MIN_STACK: "8388608" | ||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} | ||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png | ||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} | ||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | ||
FAST_BENCH: TRUE | ||
BENCH_OP_FLAVOR: default | ||
|
||
jobs: | ||
setup-instance: | ||
name: Setup instance (cuda-integer-multi-bit-multi-gpu-p5-benchmarks) | ||
runs-on: ubuntu-latest | ||
if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }} | ||
outputs: | ||
runner-name: ${{ steps.start-instance.outputs.label }} | ||
steps: | ||
- name: Start instance | ||
id: start-instance | ||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261 | ||
with: | ||
mode: start | ||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }} | ||
slab-url: ${{ secrets.SLAB_BASE_URL }} | ||
job-secret: ${{ secrets.JOB_SECRET }} | ||
backend: aws | ||
profile: multi-h100-nvlink | ||
|
||
cuda-integer-multi-bit-multi-gpu-p5-benchmarks: | ||
name: Execute multi GPU P5 integer multi-bit benchmarks | ||
needs: setup-instance | ||
runs-on: ${{ needs.setup-instance.outputs.runner-name }} | ||
timeout-minutes: 1440 # 24 hours | ||
continue-on-error: true | ||
strategy: | ||
fail-fast: false | ||
max-parallel: 1 | ||
matrix: | ||
include: | ||
- os: ubuntu-22.04 | ||
cuda: "12.2" | ||
gcc: 11 | ||
env: | ||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} | ||
|
||
steps: | ||
- name: Install dependencies | ||
run: | | ||
sudo apt update | ||
sudo apt install -y checkinstall zlib1g-dev libssl-dev | ||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz | ||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz | ||
cd cmake-${{ env.CMAKE_VERSION }} | ||
./bootstrap | ||
make -j"$(nproc)" | ||
sudo make install | ||
- name: Checkout tfhe-rs repo with tags | ||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Get benchmark details | ||
run: | | ||
{ | ||
echo "BENCH_DATE=$(date --iso-8601=seconds)"; | ||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; | ||
echo "COMMIT_HASH=$(git describe --tags --dirty)"; | ||
} >> "${GITHUB_ENV}" | ||
- name: Set up home | ||
# "Install rust" step require root user to have a HOME directory which is not set. | ||
run: | | ||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" | ||
- name: Install rust | ||
uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17 | ||
with: | ||
toolchain: nightly | ||
|
||
- name: Export CUDA variables | ||
if: ${{ !cancelled() }} | ||
run: | | ||
{ | ||
echo "CUDA_PATH=$CUDA_PATH"; | ||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH"; | ||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc"; | ||
} >> "${GITHUB_ENV}" | ||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" | ||
# Specify the correct host compilers | ||
- name: Export gcc and g++ variables | ||
if: ${{ !cancelled() }} | ||
run: | | ||
{ | ||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}"; | ||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}"; | ||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}"; | ||
} >> "${GITHUB_ENV}" | ||
- name: Checkout Slab repo | ||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 | ||
with: | ||
repository: zama-ai/slab | ||
path: slab | ||
token: ${{ secrets.FHE_ACTIONS_TOKEN }} | ||
|
||
- name: Should run benchmarks with all precisions | ||
if: inputs.all_precisions | ||
run: | | ||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}" | ||
- name: Should run fast subset benchmarks | ||
if: inputs.fast_default | ||
run: | | ||
echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}" | ||
- name: Run multi-bit benchmarks with AVX512 | ||
run: | | ||
make bench_unsigned_integer_multi_bit_gpu | ||
- name: Parse results | ||
run: | | ||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ | ||
--database tfhe_rs \ | ||
--hardware "p5.48xlarge" \ | ||
--backend gpu \ | ||
--project-version "${{ env.COMMIT_HASH }}" \ | ||
--branch ${{ github.ref_name }} \ | ||
--commit-date "${{ env.COMMIT_DATE }}" \ | ||
--bench-date "${{ env.BENCH_DATE }}" \ | ||
--walk-subdirs \ | ||
--name-suffix avx512 \ | ||
--throughput | ||
- name: Upload parsed results artifact | ||
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b | ||
with: | ||
name: ${{ github.sha }}_integer | ||
path: ${{ env.RESULTS_FILENAME }} | ||
|
||
- name: Send data to Slab | ||
shell: bash | ||
run: | | ||
echo "Computing HMac on results file" | ||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')" | ||
echo "Sending results to Slab..." | ||
curl -v -k \ | ||
-H "Content-Type: application/json" \ | ||
-H "X-Slab-Repository: ${{ github.repository }}" \ | ||
-H "X-Slab-Command: store_data_v2" \ | ||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ | ||
-d @${{ env.RESULTS_FILENAME }} \ | ||
${{ secrets.SLAB_URL }} | ||
slack-notify: | ||
name: Slack Notification | ||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-p5-benchmarks ] | ||
runs-on: ubuntu-latest | ||
if: ${{ !success() && !cancelled() }} | ||
continue-on-error: true | ||
steps: | ||
- name: Send message | ||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 | ||
env: | ||
SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-multi-gpu-p5-benchmarks.result }} | ||
SLACK_MESSAGE: "Integer multi GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-multi-gpu-p5-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" | ||
|
||
teardown-instance: | ||
name: Teardown instance (cuda-integer-multi-bit-multi-gpu-p5-benchmarks) | ||
if: ${{ always() && needs.setup-instance.result != 'skipped' }} | ||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-p5-benchmarks ] | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Stop instance | ||
id: stop-instance | ||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261 | ||
with: | ||
mode: stop | ||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }} | ||
slab-url: ${{ secrets.SLAB_BASE_URL }} | ||
job-secret: ${{ secrets.JOB_SECRET }} | ||
label: ${{ needs.setup-instance.outputs.runner-name }} | ||
|
||
- name: Slack Notification | ||
if: ${{ failure() }} | ||
continue-on-error: true | ||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 | ||
env: | ||
SLACK_COLOR: ${{ job.status }} | ||
SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-multi-gpu-p5-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" |
Oops, something went wrong.