Skip to content

Commit

Permalink
Merge pull request #78 from graphcore-research/docker-dev
Browse files Browse the repository at this point in the history
Add docker dev setup and update requirements
  • Loading branch information
thecharlieblake authored Nov 5, 2024
2 parents 445ff24 + 7a1f767 commit bcc3d5e
Show file tree
Hide file tree
Showing 19 changed files with 127 additions and 210 deletions.
24 changes: 24 additions & 0 deletions .devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"build": {
"dockerfile": "Dockerfile"
},
"workspaceFolder": "/home/developer/unit-scaling",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter"
],
"settings": {
"terminal.integrated.defaultProfile.linux": "zsh",
"terminal.integrated.profiles.linux": { "zsh": { "path": "/bin/zsh" } }
}
}
},
"mounts": [
"source=${localEnv:HOME}/.ssh,target=/home/developer/.ssh,type=bind,readonly=true",
"source=${localEnv:HOME}/.gitconfig,target=/home/developer/.gitconfig,type=bind,readonly=true",
"source=${localWorkspaceFolder},target=/home/developer/unit-scaling,type=bind"
],
"remoteUser": "developer"
}
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!requirements*.txt
34 changes: 0 additions & 34 deletions .github/workflows/ci-ipu.yaml

This file was deleted.

14 changes: 8 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
- name: Install dependencies
- name: Checkout code
uses: actions/checkout@v3

- name: Build Docker Image
run: |
sudo apt-get update
sudo apt-get install -y git
pip install -r requirements-dev.txt
docker build -t unit-scaling-dev:latest .
- name: Run CI
run: ./dev ci
run: docker run --rm -v $(pwd):/home/developer/unit-scaling unit-scaling-dev:latest ./dev ci

- name: Publish documentation
if: ${{github.ref == 'refs/heads/main'}}
uses: Cecilapp/GitHub-Pages-deploy@v3
Expand Down
39 changes: 39 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Use PyTorch base image
FROM pytorch/pytorch:latest

# Install additional dependencies
RUN apt-get update && apt-get install -y \
git \
vim \
sudo \
make \
g++ \
zsh \
&& chsh -s /bin/zsh \
&& apt-get clean && rm -rf /var/lib/apt/lists/* # cleanup (smaller image)

# Configure a non-root user with sudo privileges
ARG USERNAME=developer # Change this to preferred username
ARG USER_UID=1001
ARG USER_GID=$USER_UID
RUN groupadd --gid $USER_GID $USERNAME \
&& useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME
USER $USERNAME

# Set working directory
WORKDIR /home/$USERNAME/unit-scaling

# Puts pip install libs on $PATH & sets correct locale
ENV PATH="$PATH:/home/$USERNAME/.local/bin" \
LC_ALL=C.UTF-8

# Install Python dependencies
COPY requirements-dev.txt .
RUN pip install -r requirements-dev.txt

# Creates basic .zshrc
RUN sudo cp /etc/zsh/newuser.zshrc.recommended /home/$USERNAME/.zshrc

CMD ["/bin/zsh"]
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,22 @@ To install the `unit-scaling` library, run:
pip install git+https://github.com/graphcore-research/unit-scaling.git
```

For development on this repository, see [docs/development.md](docs/development.md).
## Development

For development in this repository, we recommend using the provided docker container.
This image can be built and entered interactively using:

```sh
docker build -t unit-scaling-dev:latest .
docker run -it --rm --user developer:developer -v $(pwd):/home/developer/unit-scaling unit-scaling-dev:latest
# To use git within the container, add `-v ~/.ssh:/home/developer/.ssh:ro -v ~/.gitconfig:/home/developer/.gitconfig:ro`.
```

For vscode users, this repo also contains a `.devcontainer.json` file, which enables the container to be used as a full-featured IDE (see the [Dev Container docs](https://code.visualstudio.com/docs/devcontainers/containers) for details on how to use this feature).

Key development functionality is contained within the `./dev` script. This includes running unit tests, linting, formatting, documentation generation and more. Run `./dev --help` for the available options. Running `./dev` without arguments is equivalent to using the `--ci` option, which runs all of the available dev checks. This is the test used for GitHub CI.

We encourage pull requests from the community. Please reach out to us with any questions about contributing.

## What is u-μP?

Expand Down
32 changes: 1 addition & 31 deletions analysis/almost_scaled_dot_product_attention/demo_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
from torch import nn, Tensor
import tqdm

try:
import poptorch

poptorch_available = True
except ModuleNotFoundError:
poptorch_available = False


class Config(dict):
def __init__(self, *args: Any, **kwargs: Any):
Expand Down Expand Up @@ -132,7 +125,7 @@ def forward(self, indices: Tensor) -> Tensor:
)


def train_cpu() -> Tensor:
def train() -> Tensor:
model = Model()
opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
losses = []
Expand All @@ -143,26 +136,3 @@ def train_cpu() -> Tensor:
opt.step()
losses.append(float(loss))
return torch.tensor(losses)


def train_ipu() -> Tensor:
model = Model()
options = poptorch.Options()
options.showCompilationProgressBar(False)
opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
session = poptorch.trainingModel(model, options, opt)
try:
return torch.tensor(
[
float(session(batch.int()))
for batch in tqdm.tqdm(
islice(batches(), CONFIG.steps), total=CONFIG.steps
)
]
)
finally:
session.destroy()


def train() -> Tensor:
return train_ipu() if poptorch_available else train_cpu()
40 changes: 0 additions & 40 deletions docs/development.md

This file was deleted.

7 changes: 0 additions & 7 deletions docs/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,6 @@ The advantage of using a unit-scaled model is as follows:
scales have stayed within range for all unit-scaled models tested thus far.
3. This can enable the use of smaller, more efficient number formats out-of-the-box,
such as FP16 and even FP8.
4. As the behaviour of some ops depends on scale, unit-scaling a model can change its
training dynamics slightly. In some experiments this has been shown to lead to
loss decreasing faster, though further work is needed to validate this.

For a more in-depth treatment of unit scaling, see our paper
`Unit Scaling: Out-of-the-Box Low-Precision Training (ICML, 2023)
<https://arxiv.org/abs/2303.11257>`_.


How to unit-scale a model
Expand Down
12 changes: 0 additions & 12 deletions requirements-dev-ipu.txt

This file was deleted.

42 changes: 28 additions & 14 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
-r requirements.txt
black==24.3.0
flake8==6.0.0
isort==5.12.0
mypy==1.2.0
myst-parser==1.0.0
pandas-stubs==2.0.2.230605
pytest==7.3.1
pytest-cov==4.0.0
sphinx==6.2.1
sphinx-rtd-theme==1.2.0
transformers==4.38.0
types-Pygments==2.15.0.0
types-tabulate==0.9.0.2
# Look in pytorch-cpu first, then pypi second
--index-url https://download.pytorch.org/whl/cpu
--extra-index-url=https://pypi.org/simple

# Same as requirements.txt, but with versions locked-in
datasets==3.1.0
docstring-parser==0.16
einops==0.8.0
numpy==2.1.3
seaborn==0.13.2
tabulate==0.9.0
torch==2.5.1+cpu

# Additional dev requirements
black==24.10.0
flake8==7.1.1
isort==5.13.2
mypy==1.13.0
myst-parser==4.0.0
pandas-stubs==2.2.3.241009
pytest==8.3.3
pytest-cov==6.0.0
sphinx==8.1.3
sphinx-rtd-theme==3.0.1
transformers==4.46.1
triton==3.1.0
types-Pygments==2.18.0.20240506
types-tabulate==0.9.0.20240106
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
datasets
docstring-parser
einops
numpy<2.0
numpy
seaborn
tabulate
torch>=2.2
6 changes: 0 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@ show_error_codes = true
strict = true
check_untyped_defs = true

[mypy-poptorch.*]
ignore_missing_imports = True

[mypy-poptorch_experimental_addons.*]
ignore_missing_imports = True

# As torch.fx doesn't explicitly export many of its useful modules.
[mypy-torch.fx]
implicit_reexport = True
Expand Down
9 changes: 0 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@
import setuptools

requirements = Path("requirements.txt").read_text().rstrip("\n").split("\n")
try:
import poptorch

# This should match requirements-dev-ipu.txt
requirements.append(
"poptorch-experimental-addons @ git+https://github.com/graphcore-research/poptorch-experimental-addons@beb12678d1e7ea2c033bd061d32167be262dfa58"
)
except ImportError:
pass

version = re.search("__version__ = \"(.+)\"", Path("unit_scaling/_version.py").read_text()).group(1)

Expand Down
4 changes: 2 additions & 2 deletions unit_scaling/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import matplotlib.colors
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns # type: ignore[import]
from datasets import load_dataset # type: ignore[import]
import seaborn as sns # type: ignore[import-untyped]
from datasets import load_dataset # type: ignore[import-untyped]
from torch import Tensor, nn
from torch.fx.graph import Graph
from torch.fx.node import Node
Expand Down
Loading

0 comments on commit bcc3d5e

Please sign in to comment.