CIFAR-10 benchmark CML #51
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CIFAR-10 benchmark CML | |
on: | |
workflow_dispatch: | |
inputs: | |
git-ref: | |
description: Repo reference (branch, tag or SHA) | |
default: "main" | |
required: true | |
type: string | |
benchmark: | |
description: Benchmark to run (cifar-10-8b or cifar-10-16b) | |
default: "cifar-10-16b" | |
type: choice | |
options: | |
- "cifar-10-8b" | |
- "cifar-10-16b" | |
instance_type: | |
description: Instance type on which to launch benchmarks | |
default: "m6i.metal" | |
type: choice | |
options: | |
- "m6i.metal" | |
- "u-6tb1.112xlarge" | |
- "hpc7a.96xlarge" | |
num_samples: | |
description: Number of samples to use | |
default: "3" | |
type: string | |
required: true | |
p_error: | |
description: P-error to use | |
default: "0.01" | |
type: string | |
required: true | |
# FIXME: Add recurrent launching | |
# https://github.com/zama-ai/concrete-ml-internal/issues/1851 | |
# Global environnement variables | |
env: | |
# Github action url (used by slack notification) | |
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache | |
RUNNER_TOOL_CACHE: /opt/hostedtoolcache | |
# We need to use other settings than the CI here to be able to launch benchmarks | |
# on AWS Ireland that has the hpc7a (which are the fastest machines atm). | |
# We might clash with TFHE-rs benchmarks on hpc7a since they launch benches on all | |
# updates of main. | |
# Jobs | |
jobs: | |
start-cifar-runner: | |
name: Launch AWS instances | |
runs-on: ubuntu-20.04 | |
defaults: | |
run: | |
shell: bash | |
container: | |
image: ubuntu:20.04 | |
outputs: | |
label: ${{ steps.start-cifar10-8bit-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-cifar10-8bit-runner.outputs.ec2-instance-id || '' }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_BENCH_REGION }} | |
- name: Start CIFAR-10 8-bit runner | |
id: start-cifar10-8bit-runner | |
uses: machulav/ec2-github-runner@2c4d1dcf2c54673ed3bfd194c4b6919ed396a209 | |
with: | |
mode: start | |
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }} | |
ec2-image-id: ${{ secrets.AWS_BENCH_EC2_AMI }} | |
ec2-instance-type: ${{ github.event.inputs.instance_type }} | |
subnet-id: ${{ secrets.AWS_BENCH_EC2_SUBNET_ID }} | |
security-group-id: ${{ secrets.AWS_BENCH_EC2_SECURITY_GROUP_ID }} | |
aws-resource-tags: > | |
[ | |
{"Key": "Name", "Value": "cml-benchmark-cifar10"}, | |
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, | |
{"Key": "Actor", "Value": "${{ github.actor }}"}, | |
{"Key": "Action", "Value": "${{ github.action }}"}, | |
{"Key": "GitHash", "Value": "${{ github.sha }}"}, | |
{"Key": "RefName", "Value": "${{ github.ref_name }}"}, | |
{"Key": "RunId", "Value": "${{ github.run_id }}"}, | |
{"Key": "Team", "Value": "CML"} | |
] | |
run-cifar-10: | |
needs: [start-cifar-runner] | |
name: Run benchmark | |
runs-on: ${{ needs.start-cifar-runner.outputs.label }} | |
env: | |
PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }} | |
PIP_EXTRA_INDEX_URL: ${{ secrets.PIP_EXTRA_INDEX_URL }} | |
steps: | |
- name: Add masks | |
run: | | |
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL_FOR_MASK }}" | |
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL_FOR_MASK }}" | |
echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL }}" | |
echo "::add-mask::${{ secrets.INTERNAL_REPO_URL }}" | |
- name: Checkout code | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 | |
with: | |
lfs: true | |
ref: ${{ github.event.inputs.git-ref }} | |
- name: Set up Python | |
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c | |
with: | |
python-version: "3.8" | |
- name: Install dependencies | |
id: install-deps | |
run: | | |
apt update | |
apt install --no-install-recommends -y gnome-keyring | |
apt install -y graphviz* graphviz-dev libgraphviz-dev pkg-config python3-dev | |
apt-mark hold docker.io | |
./script/make_utils/setup_os_deps.sh | |
make setup_env | |
# CIFAR-10-8b benchmark | |
- name: Benchmark - CIFAR-10-8b | |
if: github.event.inputs.benchmark == 'cifar-10-8b' | |
run: | | |
source .venv/bin/activate | |
NUM_SAMPLES=${{ github.event.inputs.num_samples }} python3 ./use_case_examples/cifar/cifar_brevitas_with_model_splitting/infer_fhe.py | |
python3 ./benchmarks/convert_cifar.py --model-name "8-bit-split-v0" | |
# CIFAR-10-16b benchmark | |
- name: Benchmark - CIFAR-10-16b | |
if: github.event.inputs.benchmark == 'cifar-10-16b' | |
run: | | |
source .venv/bin/activate | |
NUM_SAMPLES=${{ github.event.inputs.num_samples }} P_ERROR=${{ github.event.inputs.p_error }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py | |
python3 ./benchmarks/convert_cifar.py --model-name "16-bits-trained-v0" | |
- name: Archive raw predictions | |
uses: actions/[email protected] | |
with: | |
name: predictions.csv | |
path: inference_results.csv | |
- name: Archive metrics | |
uses: actions/[email protected] | |
with: | |
name: metrics.json | |
path: to_upload.json | |
- name: Archive MLIR | |
uses: actions/[email protected] | |
with: | |
name: mlir.txt | |
path: cifar10.mlir | |
- name: Archive Graph | |
uses: actions/[email protected] | |
with: | |
name: graph.txt | |
path: cifar10.graph | |
- name: Archive client | |
uses: actions/[email protected] | |
with: | |
name: client.zip | |
path: client_server/client.zip | |
- name: Archive server | |
uses: actions/[email protected] | |
with: | |
name: server.zip | |
path: client_server/server.zip | |
# We need to keep this as the last step to avoid not uploading the artifacts | |
# if the step crashes | |
- name: Upload results | |
id: upload-results | |
run: | | |
# Log the json | |
cat to_upload.json | jq | |
# We need to sleep to avoid log issues | |
sleep 1. | |
# Upload the json to the benchmark database | |
curl --fail-with-body \ | |
-H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \ | |
-H "Content-Type: application/json; charset=UTF-8" \ | |
-d @to_upload.json \ | |
-X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment" | |
stop-runner: | |
name: Stop EC2 runner | |
needs: [run-cifar-10, start-cifar-runner] | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 2 | |
if: ${{ always() }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Stop EC2 runner | |
uses: machulav/ec2-github-runner@2c4d1dcf2c54673ed3bfd194c4b6919ed396a209 | |
if: ${{ always() }} | |
with: | |
mode: stop | |
github-token: ${{ secrets.EC2_RUNNER_BOT_TOKEN }} | |
label: ${{ needs.start-cifar-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-cifar-runner.outputs.ec2-instance-id }} | |
slack-notification: | |
runs-on: ubuntu-20.04 | |
needs: [run-cifar-10] | |
steps: | |
- name: Slack Notification | |
if: ${{ always() }} | |
continue-on-error: true | |
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8 | |
env: | |
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} | |
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png | |
SLACK_COLOR: ${{ needs.run-cifar-10.result }} | |
SLACK_MESSAGE: "Benchmark action: ${{ github.event.inputs.benchmark }} (${{ env.ACTION_RUN_URL }}) ended with result: ${{ needs.run-cifar-10.result }}" | |
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |