Skip to content

Commit

Permalink
Upgrade to Synapse 1.14 (huggingface#664)
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss authored Jan 25, 2024
1 parent 2f4fe81 commit e394b0f
Show file tree
Hide file tree
Showing 42 changed files with 158 additions and 169 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/fast_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ jobs:
start-runner:
name: Start self-hosted EC2 runner
needs: authorize
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-0a82d7d7ad5d25f56
EC2_AMI_ID: ami-0a2179742e502fdfe
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-b7533b96
EC2_SECURITY_GROUP: sg-08af7938042271373
Expand Down Expand Up @@ -77,7 +77,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -89,7 +89,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/fast_tests.sh
diffusers:
name: Run tests for optimum.habana.diffusers
Expand All @@ -113,7 +113,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -125,7 +125,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/fast_tests_diffusers.sh
stop-runner:
name: Stop self-hosted EC2 runner
Expand All @@ -134,7 +134,7 @@ jobs:
- start-runner # required to get output from the start-runner job
- transformers # required to wait for the tests to be finished
- diffusers # required to wait for the tests to be finished
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
Expand Down
34 changes: 17 additions & 17 deletions .github/workflows/slow_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ concurrency:
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-west-2
EC2_AMI_ID: ami-01b277257cd28a061
EC2_AMI_ID: ami-0961e95b539f72c46
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-452c913d
EC2_SECURITY_GROUP: sg-0894f4f70dd6bd778
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -67,7 +67,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
Expand All @@ -83,7 +83,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -95,7 +95,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -112,7 +112,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -124,7 +124,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -141,7 +141,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -153,7 +153,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -171,7 +171,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -183,7 +183,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
albert-xxl-single-card:
name: Test single-card ALBERT XXL
Expand All @@ -204,7 +204,7 @@ jobs:
- name: Pull image
if: github.event.schedule == '0 21 * * 6'
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run test
if: github.event.schedule == '0 21 * * 6'
run: |
Expand All @@ -217,7 +217,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/albert_xxl_1x.sh
- name: Warning
if: github.event.schedule != '0 21 * * 6'
Expand All @@ -240,7 +240,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -252,7 +252,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
stop-runner:
name: Stop self-hosted EC2 runner
Expand All @@ -264,7 +264,7 @@ jobs:
- single-card
- albert-xxl-single-card
- text-generation
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-west-2
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/slow_tests_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -30,7 +30,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -56,7 +56,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -69,7 +69,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -82,7 +82,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -96,7 +96,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -110,7 +110,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
text-generation:
name: Test text-generation example
Expand All @@ -125,7 +125,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -138,5 +138,5 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ slow_tests_8x: test_installs

# Run DeepSpeed non-regression tests
slow_tests_deepspeed: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
python -m pytest tests/test_examples.py -v -s -k "deepspeed"

slow_tests_diffusers: test_installs
Expand All @@ -58,7 +58,7 @@ slow_tests_diffusers: test_installs

# Run text-generation non-regression tests
slow_tests_text_generation_example: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder_text_summarization.py -v -s --token $(TOKEN)

# Check if examples are up to date with the Transformers library
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up

> To use DeepSpeed on HPUs, you also need to run the following command:
>```bash
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
>```
Optimum Habana is a fast-moving project, and you may want to install it from source:
Expand Down Expand Up @@ -211,7 +211,7 @@ Please refer to Habana Gaudi's official [installation guide](https://docs.habana

> Tests should be run in a Docker container based on Habana Docker images.
>
> The current version has been validated for SynapseAI 1.13.
> The current version has been validated for SynapseAI 1.14.

## Development
Expand Down
10 changes: 2 additions & 8 deletions docs/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest

ARG commit_sha
ARG clone_url
Expand All @@ -7,13 +7,7 @@ ARG clone_url
RUN apt-get update && apt-get install -y \
software-properties-common \
npm

# Need node to build doc HTML. Taken from https://stackoverflow.com/a/67491580
RUN apt-get update && apt-get install -y \
software-properties-common \
npm
RUN npm install [email protected] -g && \
npm install n -g && \
RUN npm install n -g && \
n latest

RUN git clone $clone_url optimum-habana && cd optimum-habana && git checkout $commit_sha
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ python -m pip install --upgrade-strategy eager optimum[habana]
To use DeepSpeed on HPUs, you also need to run the following command:

```bash
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
```

4 changes: 2 additions & 2 deletions docs/source/usage_guides/deepspeed.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ You can find more information about DeepSpeed Gaudi integration [here](https://d
To use DeepSpeed on Gaudi, you need to install Optimum Habana and [Habana's DeepSpeed fork](https://github.com/HabanaAI/DeepSpeed) with:
```bash
pip install optimum[habana]
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
```


Expand Down Expand Up @@ -78,7 +78,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf

</Tip>

Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.13.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.
Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.14.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.

The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well.
A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/).
Expand Down
2 changes: 1 addition & 1 deletion examples/audio-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ On 8 HPUs, this script should run in ~12 minutes and yield an accuracy of **80.4

> You need to install DeepSpeed with:
> ```bash
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
> ```
DeepSpeed can be used with almost the same command as for a multi-card run:
Expand Down
2 changes: 1 addition & 1 deletion examples/gaudi_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def main():
if not is_deepspeed_available():
raise ImportError(
"--use_deepspeed requires deepspeed: `pip install"
" git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0`."
" git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0`."
)

# Patch sys.argv
Expand Down
Loading

0 comments on commit e394b0f

Please sign in to comment.