Skip to content

Xccl process group for Pytorch #15

Xccl process group for Pytorch

Xccl process group for Pytorch #15

Workflow file for this run

name: Lint
on:
pull_request:
branches-ignore:
- nightly
push:
branches:
- main
- release/*
- landchecks/*
workflow_dispatch:
permissions: read-all
# The names of steps that actually test the code should be suffixed with `(nonretryable)`.
# When any other step fails, it's job will be retried once by retryBot.
jobs:
get-label-type:
name: get-label-type
uses: ./.github/workflows/_runner-determinator.yml
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
lintrunner-clang:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
timeout: 120
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT"
export CLANG=1
.github/scripts/lintrunner.sh
lintrunner-noclang:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
timeout: 120
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT"
.github/scripts/lintrunner.sh
quick-checks:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: 0
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# Ensure no non-breaking spaces
# NB: We use 'printf' below rather than '\u000a' since bash pre-4.2
# does not support the '\u000a' syntax (which is relevant for local linters)
(! git --no-pager grep -In "$(printf '\xC2\xA0')" -- . || (echo "The above lines have non-breaking spaces (U+00A0); please convert them to spaces (U+0020)"; false))
# Ensure cross-OS compatible file names
(! git ls-files | grep -E '([<>:"|?*]|[ .]$)' || (echo "The above file names are not valid across all operating systems. Please ensure they don't contain the characters '<>:""|?*' and don't end with a white space or a '.' "; false))
# Ensure no versionless Python shebangs
(! git --no-pager grep -In '#!.*python$' -- . || (echo "The above lines have versionless Python shebangs; please specify either python2 or python3"; false))
# Ensure ciflow tags mentioned in config
python3 .github/scripts/collect_ciflow_labels.py --validate-tags
# C++ docs check
pushd docs/cpp/source
./check-doxygen.sh
popd
# CUDA kernel launch check
set -eux
python3 torch/testing/_internal/check_kernel_launches.py |& tee cuda_kernel_launch_checks.txt
pr-sanity-checks:
name: pr-sanity-checks
needs: get-label-type
runs-on: [self-hosted, "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.large"]
# Only run this on pull requests. This check is simple enough to be done without a Docker image
if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'skip-pr-sanity-checks')
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
submodules: false
fetch-depth: -1
- name: PR size check (nonretryable)
env:
BASE: ${{ github.event.pull_request.base.sha }}
HEAD: ${{ github.event.pull_request.head.sha }}
run: |
bash .github/scripts/pr-sanity-check.sh
workflow-checks:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: -1
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# Regenerate workflows
.github/scripts/generate_ci_workflows.py
RC=0
# Assert that regenerating the workflows didn't change them
if ! .github/scripts/report_git_status.sh .github/workflows; then
echo
echo 'As shown by the above diff, the committed .github/workflows'
echo 'are not up to date according to .github/templates.'
echo 'Please run this command, commit, and push again to your PR:'
echo
echo ' .github/scripts/generate_ci_workflows.py'
echo
echo 'If running that command does nothing, you may need to rebase'
echo 'onto a more recent commit from the PyTorch main branch.'
RC=1
fi
# Check that jobs will be cancelled
.github/scripts/ensure_actions_will_cancel.py
exit $RC
toc:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: 0
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# Regenerate ToCs and check that they didn't change
set -eu
export PATH=~/.npm-global/bin:"$PATH"
for FILE in $(git grep -Il '<!-- toc -->' -- '**.md'); do
markdown-toc --bullets='-' -i "$FILE"
done
if ! .github/scripts/report_git_status.sh .; then
echo
echo 'As shown by the above diff, the table of contents in one or'
echo 'more Markdown files is not up to date with the file contents.'
echo 'You can either apply that Git diff directly to correct the'
echo 'table of contents, or if you have npm installed, you can'
echo 'install the npm package markdown-toc and run the following'
# shellcheck disable=SC2016
echo 'command (replacing $FILE with the filename for which you want'
echo 'to regenerate the table of contents):'
echo
# shellcheck disable=SC2016
echo " markdown-toc --bullets='-' -i \"\$FILE\""
false
fi
test-tools:
name: Test tools
if: ${{ github.repository == 'pytorch/pytorch' }}
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: 0
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# Test tools
python3 -m unittest discover -vs tools/test -p 'test_*.py'
python3 -m unittest discover -vs .github/scripts -p 'test_*.py'
test_run_test:
name: Test `run_test.py` is usable without boto3/rockset
if: ${{ github.repository == 'pytorch/pytorch' }}
runs-on: linux.20_04.4x
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
submodules: false
fetch-depth: 1
- name: Setup Python 3.8
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: x64
cache: pip
- name: Install dependencies
run: |
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
- name: Run run_test.py (nonretryable)
run: |
# Run test_vulkan, which is a fast noop on Linux
python3 test/run_test.py --include test_vulkan --verbose
test_collect_env:
if: ${{ github.repository == 'pytorch/pytorch' }}
name: Test collect_env
runs-on: linux.20_04.4x
strategy:
matrix:
test_type: [with_torch, without_torch, older_python_version]
steps:
# [see note: pytorch repo ref]
# deep clone (fetch-depth 0) required, to allow us to use git log
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
submodules: false
fetch-depth: 1
- name: Setup Python 3.6
if: matrix.test_type == 'older_python_version'
uses: actions/setup-python@v4
with:
python-version: '3.6'
architecture: x64
check-latest: false
cache: pip
cache-dependency-path: |
**/requirements.txt
- name: Setup Python 3.8
if: matrix.test_type != 'older_python_version'
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: x64
check-latest: false
cache: pip
cache-dependency-path: |
**/requirements.txt
- name: Install torch
if: matrix.test_type == 'with_torch'
run: |
pip install -r requirements.txt
# Doesn't really matter what torch version, we just need ANY torch installed
pip install 'torch==1.*'
- name: Run collect_env.py (nonretryable)
run: |
# All we need to see is that it passes
python3 torch/utils/collect_env.py
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true