demo training/inference script #1039
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PiPPy tests | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
paths: | |
- '.github/workflows/pippy**' | |
- 'pippy/**' | |
- 'test/**' | |
- 'examples/**' | |
- '!docs/**' | |
- '!**.md' | |
- 'requirements.txt' | |
concurrency: | |
# Cancel CI on previous commit when a new commit is pushed to the same branch | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
pytest_tests: | |
runs-on: linux.4xlarge | |
strategy: | |
matrix: | |
python-version: ["3.8", "3.9"] | |
container: | |
image: python:${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install flake8 pytest pytest-cov pytest-xdist numpy | |
if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
- name: Install pavel's huggingface fork | |
run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
- name: Install pippy | |
run: "python setup.py install" | |
- name: Test with pytest | |
run: | | |
pytest --cov=pippy --ignore=test/hf_test.py --ignore=test/test_fx.py --ignore=test/test_fx_experimental.py --ignore=test/fx test/ | |
# hf_model_tests: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# shard: ["0", "1", "2", "3", "4", "5", "6", "7"] | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov pytest-xdist pytest-shard numpy | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# # Single thread to avoid OOM | |
# - name: Test forward only | |
# run: | | |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'not HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py | |
# - name: Test forward and backward | |
# run: | | |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py | |
integration_test_cpu: | |
runs-on: linux.4xlarge | |
strategy: | |
matrix: | |
python-version: ["3.8", "3.9"] | |
replicate: ["0", "1"] | |
schedule: ["FillDrain", "1F1B"] | |
checkpoint: [ "0", "1" ] | |
env: | |
OMP_NUM_THREADS: "1" | |
container: | |
image: python:${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
- name: Install pavel's huggingface fork | |
run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
- name: Install pippy | |
run: "python setup.py install" | |
- name: Run forward-only integration test | |
run: python test/local_test_forward.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
- name: Run forward-only-auto-parallel integration test | |
run: python test/local_test_forward_auto_parallel.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
- name: Run forward-loss-backward integration test | |
run: python test/local_test_forward_backward.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
- name: Run null_coalesce_accumulate integration test | |
run: python test/local_test_null_coalesce_accumulate.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} | |
- name: Run PP + DDP test | |
run: python test/local_test_ddp.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
#- name: Run HF BERT forward-only integration test | |
# run: python test/local_test_forward_hf_bert.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
- name: Run HF GPT2 forward-only integration test | |
run: python test/local_test_forward_hf_gpt2.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
- name: Run visualizer test | |
run: python test/local_test_visualizer.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} | |
- name: Run auto-split test | |
run: python test/local_test_autosplit.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} | |
- name: Run compile test | |
run: python test/local_test_compile.py -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
# hf_examples_set1: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# schedule: ["FillDrain", "1F1B"] | |
# env: | |
# OMP_NUM_THREADS: "1" | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# - name: Test min-GPT | |
# run: | | |
# git config --global --add safe.directory /__w/tau/tau | |
# git submodule update --init test/minGPT | |
# python test/min_gpt_tracing.py | |
# - name: Run GPT2 example | |
# run: python examples/hf/gpt2/pippy_gpt2.py -s ${{ matrix.schedule }} | |
# - name: Run BERT example | |
# run: python examples/hf/bert/pippy_bert.py -s ${{ matrix.schedule }} | |
# - name: Run T5 example | |
# run: python examples/hf/t5/pippy_t5.py -s ${{ matrix.schedule }} | |
# - name: "HF Translation: fine-tune T5 model translation English to Romanian" | |
# run: > | |
# python examples/hf/translation/run_translation.py --model_name_or_path t5-small --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=1 --pp_group_size=8 | |
# - name: "HF Translation: fine-tune BART model translation English to Romanian" | |
# run: > | |
# python examples/hf/translation/run_translation.py --model_name_or_path facebook/bart-base --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=2 --pp_group_size=8 | |
# hf_examples_set2: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# schedule: ["FillDrain", "1F1B"] | |
# env: | |
# OMP_NUM_THREADS: "1" | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# - name: "HF Causal Language Modeling: fine-tune GPT-2 on WikiText-2" | |
# run: python examples/hf/language-modeling/run_clm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-clm --max_steps=3 --overwrite_output_dir | |
# - name: "HF Masked Language Modeling: fine-tune RoBERTa on WikiText-2" | |
# run: python examples/hf/language-modeling/run_mlm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path roberta-base --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-mlm --max_steps=3 --overwrite_output_dir | |
# - name: "HF Text classification: fine-tune BERT on the GLUE benchmark" | |
# run: python examples/hf/text-classification/run_glue.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path bert-base-cased --task_name mrpc --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --num_train_epochs 3 --output_dir /tmp/mrpc/ --max_steps=3 --overwrite_output_dir | |
integration_test_gpu: | |
runs-on: linux.16xlarge.nvidia.gpu | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
replicate: ["0", "1"] | |
schedule: ["FillDrain", "1F1B"] | |
env: | |
DOCKER_IMAGE: qts8n/cuda-python:devel | |
PIPPY_ROOT: /PiPPy | |
OMP_NUM_THREADS: "1" | |
REPLICATE: ${{ matrix.replicate }} | |
SCHEDULE: ${{ matrix.schedule }} | |
steps: | |
- name: Clean working directory | |
shell: bash | |
run: | | |
sudo rm -rf /home/ec2-user/actions-runner/_work/PiPPy/PiPPy/* || true | |
- uses: actions/checkout@v2 | |
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG | |
uses: pytorch/test-infra/.github/actions/setup-nvidia@main | |
- name: Pull Docker image | |
run: | | |
retry () { | |
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@") | |
} | |
retry docker pull "${DOCKER_IMAGE}" | |
- name: Test docker run | |
run: | | |
set -x | |
# shellcheck disable=SC2086,SC2090 | |
container_name=$(docker run \ | |
--gpus all \ | |
--shm-size=1g --ulimit memlock=-1 \ | |
-e OMP_NUM_THREADS \ | |
-e REPLICATE \ | |
-e SCHEDULE \ | |
--tty \ | |
--detach \ | |
-v "$(pwd):${PIPPY_ROOT}" \ | |
-w "${PIPPY_ROOT}" \ | |
"${DOCKER_IMAGE}" | |
) | |
# Run GPU tests and return error signal from docker | |
docker exec -t -w "${PIPPY_ROOT}" "${container_name}" bash -c "bash .github/workflows/pippy_gpu_tests.sh; exit \$?" | |
- name: Chown workspace | |
if: always() | |
run: | | |
# Ensure the working directory gets chowned back to the current user | |
docker run --rm -v "$(pwd):${PIPPY_ROOT}" -w "${PIPPY_ROOT}" "${DOCKER_IMAGE}" chown -R "$(id -u):$(id -g)" . | |
- name: Kill containers, clean up images | |
if: always() | |
run: | | |
# ignore expansion of "docker ps -q" since it could be empty | |
# shellcheck disable=SC2046 | |
docker stop $(docker ps -q) || true | |
# Prune all of the docker images | |
docker system prune -af | |
programming_model_tests: | |
runs-on: linux.4xlarge | |
strategy: | |
matrix: | |
python-version: ["3.9"] | |
container: | |
image: python:${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install numpy datasets evaluate scikit-learn sacrebleu | |
if [ -f requirements.txt ]; then pip install --pre -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
- name: Install pippy | |
run: "python setup.py install" | |
- name: Test PiPPy + Dynamo example | |
run: python examples/TorchDynamo/pippy_dynamo.py | |
- name: Run PiPPy in GSPMD style | |
run: python examples/gspmd/pippy_gspmd.py |