A graph-based pipeline splitting #322
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GPU Tests | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
paths: | |
- '.github/workflows/**.yaml' | |
- 'pippy/**' | |
- 'test/**' | |
- 'examples/**' | |
- '!docs/**' | |
- '!**.md' | |
- 'requirements.txt' | |
concurrency: | |
# Cancel CI on previous commit when a new commit is pushed to the same branch | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
defaults: | |
run: | |
shell: bash -l -eo pipefail {0} | |
jobs: | |
forward_tests_4gpu: | |
runs-on: linux.g5.12xlarge.nvidia.gpu | |
strategy: | |
matrix: | |
python-version: ['3.10'] | |
steps: | |
- name: Check out repo | |
uses: actions/checkout@v3 | |
- name: Setup conda env | |
uses: conda-incubator/setup-miniconda@v2 | |
with: | |
auto-update-conda: true | |
miniconda-version: "latest" | |
activate-environment: test | |
python-version: ${{ matrix.python-version }} | |
- name: Activate conda env | |
run: conda activate test | |
- name: Install dependencies | |
run: | | |
pip install numpy expecttest | |
pip install --pre -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cu121/torch_nightly.html | |
- name: Install pippy | |
run: python setup.py install | |
- name: Run forward-only integration test | |
run: torchrun --nproc-per-node 4 test/test_fwd.py | |
- name: Run auto-split test | |
run: torchrun --nproc-per-node 4 test/test_autosplit.py | |
- name: Test skip connection support | |
run: torchrun --nproc-per-node 4 test/test_skip_conn.py | |
- name: Test trace on different device | |
run: torchrun --nproc-per-node 2 test/test_cpu_init.py | |
- name: Test Composability | |
run: python test/test_composability.py | |
- name: Run example | |
run: torchrun --nproc-per-node 3 examples/basic/example.py | |
- name: Run example with manual stage | |
run: torchrun --nproc-per-node 3 examples/basic/example_manual_stage.py | |
- name: Run training example | |
run: torchrun --nproc-per-node 3 examples/basic/example_train.py | |
- name: Install Transformers for real model tests | |
run: pip install transformers | |
- name: Run GPT2 | |
run: torchrun --nproc-per-node 4 examples/huggingface/pippy_gpt2.py | |
- name: Test CPU init + GPU run | |
run: torchrun --nproc-per-node 4 examples/cpu_init/gpt2_cpu_init.py | |
# - name: Run T5 | |
# run: torchrun --nproc-per-node 2 examples/huggingface/pippy_t5.py | |
# - name: Run BERT | |
# run: torchrun --nproc-per-node 4 examples/huggingface/pippy_bert.py | |
backward_tests_4gpu: | |
runs-on: linux.g5.12xlarge.nvidia.gpu | |
strategy: | |
matrix: | |
python-version: ['3.10'] | |
schedule: ["gpipe", "1f1b"] | |
steps: | |
- name: Check out repo | |
uses: actions/checkout@v3 | |
- name: Setup conda env | |
uses: conda-incubator/setup-miniconda@v2 | |
with: | |
auto-update-conda: true | |
miniconda-version: "latest" | |
activate-environment: test | |
python-version: ${{ matrix.python-version }} | |
- name: Activate conda env | |
run: conda activate test | |
- name: Install dependencies | |
run: | |
pip install numpy | |
pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cu121/torch_nightly.html | |
- name: Install pippy | |
run: python setup.py install | |
- name: Run forward-backward test | |
run: torchrun --nproc-per-node 4 test/test_bwd.py --schedule ${{ matrix.schedule }} | |
- name: Run optimizer test | |
run: torchrun --nproc-per-node 4 test/test_optim.py --schedule ${{ matrix.schedule }} | |
- name: Test gradient equivalence | |
run: torchrun --nproc-per-node 4 test/test_grad.py --schedule ${{ matrix.schedule }} | |
- name: Test interleaving schedules | |
run: torchrun --nproc-per-node 4 test/test_interleave.py --schedule ${{ matrix.schedule }} |