Skip to content

TorchBench Userbenchmark on A100 #1145

TorchBench Userbenchmark on A100

TorchBench Userbenchmark on A100 #1145

name: TorchBench Userbenchmark on A100
on:
schedule:
- cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST
workflow_dispatch:
inputs:
userbenchmark_name:
description: "Name of the user benchmark to run"
userbenchmark_options:
description: "Option of the user benchmark to run"
jobs:
run-userbenchmark:
runs-on: linux.aws.a100
timeout-minutes: 1440 # 24 hours
environment: docker-s3-upload
env:
BASE_CONDA_ENV: "torchbench"
CONDA_ENV: "userbenchmark-a100"
PLATFORM_NAME: "gcp_a100"
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Checkout TorchBench
uses: actions/checkout@v3
- name: Install Conda
run: |
bash ./.ci/torchbench/install-conda.sh
- name: Install TorchBench
run: |
bash ./.ci/torchbench/install.sh
- name: Run user benchmark
run: |
set -x
. ${HOME}/miniconda3/etc/profile.d/conda.sh
conda activate "${CONDA_ENV}"
# remove old results
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}"
if [ -z "${MANUAL_WORKFLOW}" ]; then
# Figure out what userbenchmarks we should run, and run it
python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}
if [ -d ./.userbenchmark ]; then
cp -r ./.userbenchmark benchmark-output
else
mkdir benchmark-output
fi
else
python run_benchmark.py "${{ github.event.inputs.userbenchmark_name }}" ${{ github.event.inputs.userbenchmark_options }}
cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" benchmark-output
ls -las benchmark-output
pwd
fi
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: TorchBench result
path: benchmark-output
- name: Upload result jsons to Scribe and S3
run: |
RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
echo "Uploading result jsons: ${RESULTS}"
for r in ${RESULTS[@]}; do
python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
done
- name: Clean up Conda env
if: always()
run: |
. ${HOME}/miniconda3/etc/profile.d/conda.sh
conda remove -n "${CONDA_ENV}" --all