Satyaog/feature/covalent #58
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: cloud-tests | |
on: | |
# Runs for pull requests | |
pull_request: | |
branches: | |
- master | |
permissions: | |
id-token: write | |
contents: write | |
jobs: | |
cloud-tests: | |
strategy: | |
fail-fast: true | |
matrix: | |
include: | |
- arch: cuda | |
exclude: "no-cuda" | |
run_on: azure__a100 | |
# - arch: rocm | |
# exclude : "no-rocm" | |
runs-on: ubuntu-latest | |
environment: cloud-ci | |
# Cancel previous jobs if a new version was pushed | |
concurrency: | |
group: "${{ github.ref }}-${{ matrix.arch }}-${{ matrix.run_on }}" | |
cancel-in-progress: true | |
defaults: | |
run: | |
shell: bash -el {0} | |
env: | |
MILABENCH_CONFIG: "config/standard.yaml" | |
MILABENCH_SYSTEM: "config/cloud-multinodes-system.yaml" | |
MILABENCH_BASE: "output" | |
MILABENCH_ARGS: "" | |
MILABENCH_DASH: "no" | |
ARM_TENANT_ID: "${{ secrets.ARM_TENANT_ID }}" | |
ARM_SUBSCRIPTION_ID: "${{ secrets.ARM_SUBSCRIPTION_ID }}" | |
AZURE_CORE_OUTPUT: none | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
token: ${{ github.token }} | |
- uses: actions/setup-python@v2 | |
with: | |
python-version: 3.9 | |
# Follow | |
# https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/guides/service_principal_client_secret | |
# to generate a clientId as well as a clientSecret | |
- name: Azure login | |
uses: azure/login@v2 | |
with: | |
creds: | | |
{ | |
"clientId": "${{ secrets.ARM_CLIENT_ID }}", | |
"clientSecret": "${{ secrets.ARM_CLIENT_SECRET }}", | |
"subscriptionId": "${{ secrets.ARM_SUBSCRIPTION_ID }}", | |
"tenantId": "${{ secrets.ARM_TENANT_ID }}" | |
} | |
- name: dependencies | |
run: | | |
python -m pip install -U pip | |
python -m pip install -U poetry | |
poetry lock --no-update | |
poetry install | |
- name: setup cloud credentials | |
run: | | |
mkdir -p ~/.aws | |
mkdir -p ~/.ssh/covalent | |
echo "${{ secrets.COVALENT_EC2_EXECUTOR_KEYPAIR }}" >~/.ssh/covalent/covalent-ec2-executor-keypair.pem | |
echo "[default]" >~/.aws/credentials | |
echo "aws_access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }}" >>~/.aws/credentials | |
echo "aws_secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >>~/.aws/credentials | |
chmod -R a-rwx,u+rwX ~/.aws ~/.ssh | |
- name: start covalent server | |
run: | | |
poetry run -- python3 -m milabench.scripts.covalent serve start --develop | |
- name: setup cloud | |
run: | | |
poetry run milabench cloud \ | |
--setup \ | |
--run-on ${{ matrix.run_on }} \ | |
--system "$MILABENCH_SYSTEM" >$MILABENCH_SYSTEM.${{ matrix.run_on }} | |
echo "MILABENCH_SYSTEM=$MILABENCH_SYSTEM.${{ matrix.run_on }}" >>$GITHUB_ENV | |
- name: install benchmarks | |
run: | | |
poetry run milabench install --variant ${{ matrix.arch }} \ | |
--exclude llm-full-mp-gpus,llm-full-mp-nodes,llm-lora-ddp-gpus,llm-lora-ddp-nodes,llm-lora-mp-gpus,llm-lora-single | |
- name: prepare benchmarks | |
run: | | |
poetry run milabench prepare \ | |
--exclude llm-full-mp-gpus,llm-full-mp-nodes,llm-lora-ddp-gpus,llm-lora-ddp-nodes,llm-lora-mp-gpus,llm-lora-single | |
- name: run benchmarks | |
run: | | |
poetry run milabench run \ | |
--exclude llm-full-mp-gpus,llm-full-mp-nodes,llm-lora-ddp-gpus,llm-lora-ddp-nodes,llm-lora-mp-gpus,llm-lora-single | |
- name: Summary | |
run: | | |
git config credential.${{ github.server_url }}.username ${{ github.actor }} | |
git config credential.helper '!f() { test "$1" = get && echo "password=$GITHUB_TOKEN"; }; f' | |
git config --global user.email "[email protected]" | |
git config --global user.name "GitHub CI" | |
poetry run milabench report --push | |
env: | |
GITHUB_TOKEN: ${{ github.token }} | |
- name: DEBUG state file | |
if: always() | |
run: | | |
cat /tmp/milabench/covalent_venv/lib/python*/site-packages/covalent_azure_plugin/infra/*.tfstate | |
- name: teardown cloud | |
if: always() | |
run: | | |
if [[ -f "${MILABENCH_SYSTEM%.*}" ]] | |
then | |
export MILABENCH_SYSTEM=${MILABENCH_SYSTEM%.*} | |
fi | |
poetry run milabench cloud \ | |
--teardown \ | |
--run-on ${{ matrix.run_on }} \ | |
--all | |
- name: DEBUG logs | |
if: always() | |
run: | | |
cat ~/.cache/covalent/covalent_ui.log |