Skip to content

Commit

Permalink
Add FSDP test to Gaudi2 CI (huggingface#683)
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss authored Feb 2, 2024
1 parent 3ce812b commit 54c17e0
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
28 changes: 27 additions & 1 deletion .github/workflows/slow_tests_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,37 @@ jobs:
--ipc=host \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
fsdp:
name: Test FSDP models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed # run the job when the previous test job is done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
make slow_tests_fsdp
multi-card:
name: Test multi-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed # run the job when the previous test job is done
- fsdp # run the job when the previous test job is done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ slow_tests_text_generation_example: test_installs
python -m pip install git+https://github.com/HabanaAI/[email protected]
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder_text_summarization.py -v -s --token $(TOKEN)

slow_tests_fsdp: test_installs
python -m pytest tests/test_fsdp_examples.py -v -s

# Check if examples are up to date with the Transformers library
example_diff_tests: test_installs
python -m pytest tests/test_examples_match_transformers.py
Expand Down
1 change: 1 addition & 0 deletions tests/test_fsdp_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


# Gaudi2 CI baselines
# FSDP is not supported on Gaudi1
MODELS_TO_TEST = {
"bf16": [
(
Expand Down

0 comments on commit 54c17e0

Please sign in to comment.