TorchBench Userbenchmark on A100 #1145
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: TorchBench Userbenchmark on A100 | |
on: | |
schedule: | |
- cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST | |
workflow_dispatch: | |
inputs: | |
userbenchmark_name: | |
description: "Name of the user benchmark to run" | |
userbenchmark_options: | |
description: "Option of the user benchmark to run" | |
jobs: | |
run-userbenchmark: | |
runs-on: linux.aws.a100 | |
timeout-minutes: 1440 # 24 hours | |
environment: docker-s3-upload | |
env: | |
BASE_CONDA_ENV: "torchbench" | |
CONDA_ENV: "userbenchmark-a100" | |
PLATFORM_NAME: "gcp_a100" | |
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
steps: | |
- name: Checkout TorchBench | |
uses: actions/checkout@v3 | |
- name: Install Conda | |
run: | | |
bash ./.ci/torchbench/install-conda.sh | |
- name: Install TorchBench | |
run: | | |
bash ./.ci/torchbench/install.sh | |
- name: Run user benchmark | |
run: | | |
set -x | |
. ${HOME}/miniconda3/etc/profile.d/conda.sh | |
conda activate "${CONDA_ENV}" | |
# remove old results | |
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi | |
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi | |
MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}" | |
if [ -z "${MANUAL_WORKFLOW}" ]; then | |
# Figure out what userbenchmarks we should run, and run it | |
python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME} | |
if [ -d ./.userbenchmark ]; then | |
cp -r ./.userbenchmark benchmark-output | |
else | |
mkdir benchmark-output | |
fi | |
else | |
python run_benchmark.py "${{ github.event.inputs.userbenchmark_name }}" ${{ github.event.inputs.userbenchmark_options }} | |
cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" benchmark-output | |
ls -las benchmark-output | |
pwd | |
fi | |
- name: Upload artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: TorchBench result | |
path: benchmark-output | |
- name: Upload result jsons to Scribe and S3 | |
run: | | |
RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r)) | |
echo "Uploading result jsons: ${RESULTS}" | |
for r in ${RESULTS[@]}; do | |
python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}" | |
python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}" | |
done | |
- name: Clean up Conda env | |
if: always() | |
run: | | |
. ${HOME}/miniconda3/etc/profile.d/conda.sh | |
conda remove -n "${CONDA_ENV}" --all |