Skip to content

Commit

Permalink
Training script (#31)
Browse files Browse the repository at this point in the history
Training script, and a bunch of other stuff too
  • Loading branch information
jettjaniak authored Mar 19, 2024
1 parent 456958a commit 35f0c69
Show file tree
Hide file tree
Showing 50 changed files with 2,033 additions and 38 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: setup python
uses: actions/setup-python@v5
with:
Expand All @@ -31,11 +33,11 @@ jobs:
- name: dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-nocuda.txt
pip install -e .
- name: black
run: black --check .
- name: isort
run: isort --profile black --check .
run: isort --check .
- name: pytest
run: pytest
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ __pycache__/
# C extensions
*.so

bin
include
lib64
pyvenv.cfg

# Distribution / packaging
.Python
build/
Expand Down Expand Up @@ -158,3 +163,15 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# ignore wandb files
**/wandb/*
**/*.wandb
**/wandb-summary.json
**/wandb-metadata.json

# scratch notebook
notebooks/scratch.ipynb

# dsstore
.DS_Store
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,3 @@ repos:
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black"]
34 changes: 34 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "run_training 256",
"type": "debugpy",
"request": "launch",
"program": "scripts/run_training.py",
"console": "integratedTerminal",
"args": "--debug --train_sample_limit=256"
//"args": "${command:pickArgs}"
},
{
"name": "run_training --help",
"type": "debugpy",
"request": "launch",
"program": "scripts/run_training.py",
"console": "integratedTerminal",
"args": "--help"
//"args": "${command:pickArgs}"
},
{
"name": "run training with debug plus custom args",
"type": "debugpy",
"request": "launch",
"program": "scripts/run_training.py",
"console": "integratedTerminal",
"args": "--debug ${command:pickArgs}"
}
]
}
4 changes: 0 additions & 4 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,5 @@
"source.organizeImports": "explicit"
},
"python.analysis.typeCheckingMode": "basic",
"isort.args": [
"--profile",
"black"
],
"black-formatter.importStrategy": "fromEnvironment",
}
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ When you save a file vscode should automatically format it. Otherwise, pre-commi
- comment important sections of the code in _Files changed_ tab
- when it's ready, add the relevant stakeholders as reviewers
4. after the comments are resolved and PR is approved, merge it using _Squash and merge_

# Incrementing Versions
When making a new release, increment the version in `delphi/__init__.py`
45 changes: 45 additions & 0 deletions notebooks/training_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from delphi.train.config.utils import get_presets_by_name\n",
"from delphi.train.training import run_training\n",
"from delphi.train.utils import ModelTrainingState\n",
"from delphi.train.run_context import RunContext\n",
"\n",
"\n",
"def train() -> tuple[ModelTrainingState, RunContext]:\n",
" config = get_presets_by_name()[\"v0-llama2-100k\"]\n",
" config.wandb_config.entity = \"jaiwithani\"\n",
" return run_training(config)\n",
"\n",
"model_train_result = train()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tinyevals",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
name = "delphi"
dynamic = ["version"]

[tool.setuptools.dynamic]
version = {attr = "delphi.__version__"}

[tool.isort]
profile = 'black'
known_third_party = ['wandb']

[tool.pytest.ini_options]
testpaths = ["tests"]
30 changes: 30 additions & 0 deletions requirements-nocuda.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# this is a separate requirements.txt file for use in github actions
# this omits packages that cannot be installed in github actions due
# to hardware limitations (e.g. no GPU). All packages here are automatically
# included when installing from requirements.txt
torch==2.1.2
datasets==2.16.1
tqdm==4.66.1
ipywidgets==8.1.1
nbformat==5.9.2
pytest==7.4.4
black==23.12.1
jaxtyping==0.2.25
beartype==0.16.4
pre-commit==3.6.0
isort==5.13.2
chardet==5.2.0
sentencepiece==0.1.99
protobuf==4.25.2
plotly==5.18.0
wandb==0.16.3
spacy==3.7.2
pandas==1.3.4
dacite==1.8.1

# temporarily installing transformers from main until 4.39.0 comes out (for mamba support)
transformers @ git+https://github.com/huggingface/transformers@main
# transformers==4.39.0 TODO: use this once 4.39.0 releases

# spacy-transformers requires transformers <= 4.37.0, temporarily disabling
# spacy-transformers>=1.3.4
27 changes: 8 additions & 19 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
torch==2.1.2
datasets==2.16.1
transformers==4.36.2
tqdm==4.66.1
ipywidgets==8.1.1
nbformat==5.9.2
pytest==7.4.4
black==23.12.1
jaxtyping==0.2.25
beartype==0.16.4
pre-commit==3.6.0
isort==5.13.2
spacy==3.7.2
chardet==5.2.0
sentencepiece==0.1.99
protobuf==4.25.2
plotly==5.18.0
spacy-transformers==1.3.4
pandas==1.3.4
# most packages are specified in requirements-gh.txt, and new packages should be placed
# there UNLESS they cannot be installed without CUDA support, in which case they should go here.
-r requirements-nocuda.txt

# these libs support better mamba implementations in transformers,
# but require CUDA/nvcc, so they won't work on MacOS.
mamba_ssm==1.2.0.post1; sys_platform != 'darwin'
causal-conv1d==1.2.0.post2; sys_platform != 'darwin'
Loading

0 comments on commit 35f0c69

Please sign in to comment.