From 198ded7534cf4435273ab23df0a598771894bd0b Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Wed, 14 Dec 2022 13:22:34 -0500 Subject: [PATCH 1/8] Rename entry points; add evaluation script (#88) --- .../examples/local/run.sh | 3 +- .../examples/local_orion/run.sh | 2 +- .../examples/slurm/to_submit.sh | 2 +- .../examples/slurm_orion/to_submit.sh | 2 +- {{cookiecutter.project_slug}}/setup.py | 4 +- .../{main.py => evaluate.py} | 74 ++++------- .../{{cookiecutter.project_slug}}/train.py | 123 +++++++++++++++++- .../utils/logging_utils.py | 1 + 8 files changed, 153 insertions(+), 58 deletions(-) rename {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/{main.py => evaluate.py} (51%) mode change 100755 => 100644 diff --git a/{{cookiecutter.project_slug}}/examples/local/run.sh b/{{cookiecutter.project_slug}}/examples/local/run.sh index 5d9e811..feb7704 100644 --- a/{{cookiecutter.project_slug}}/examples/local/run.sh +++ b/{{cookiecutter.project_slug}}/examples/local/run.sh @@ -1 +1,2 @@ -main --data ../data --output output --config config.yaml --start-from-scratch +amlrt-train --data ../data --output output --config config.yaml --start-from-scratch +amlrt-eval --data ../data --config config.yaml --ckpt-path output/best_model/model.ckpt diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/run.sh b/{{cookiecutter.project_slug}}/examples/local_orion/run.sh index e472e31..314f688 100644 --- a/{{cookiecutter.project_slug}}/examples/local_orion/run.sh +++ b/{{cookiecutter.project_slug}}/examples/local_orion/run.sh @@ -2,7 +2,7 @@ export MLFLOW_TRACKING_URI='mlruns' export ORION_DB_ADDRESS='orion_db.pkl' export ORION_DB_TYPE='pickleddb' -orion -v hunt --config orion_config.yaml ../../{{cookiecutter.project_slug}}/main.py --data ../data \ +orion -v hunt --config orion_config.yaml amlrt-train --data ../data \ --config config.yaml --disable-progressbar \ --output '{exp.working_dir}/{trial.id}/' \ --log '{exp.working_dir}/{trial.id}/exp.log' diff --git a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh index c87cc12..24aaf1c 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh +++ b/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh @@ -27,4 +27,4 @@ export MLFLOW_TRACKING_URI='mlruns' -main --data ../data --output output --config config.yaml --tmp-folder ${SLURM_TMPDIR} --disable-progressbar +amlrt-train --data ../data --output output --config config.yaml --tmp-folder ${SLURM_TMPDIR} --disable-progressbar diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh index dfa90e5..c5f444e 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh +++ b/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh @@ -27,7 +27,7 @@ export ORION_DB_ADDRESS='orion_db.pkl' export ORION_DB_TYPE='pickleddb' orion -v hunt --config orion_config.yaml \ - main --data ../data --config config.yaml --disable-progressbar \ + amlrt-train --data ../data --config config.yaml --disable-progressbar \ --output '{exp.working_dir}/{trial.id}/' \ --log '{exp.working_dir}/{trial.id}/exp.log' \ --tmp-folder ${SLURM_TMPDIR}/{trial.id} diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py index 0e4fcd7..406dafa 100644 --- a/{{cookiecutter.project_slug}}/setup.py +++ b/{{cookiecutter.project_slug}}/setup.py @@ -30,7 +30,9 @@ ], entry_points={ 'console_scripts': [ - 'main={{ cookiecutter.project_slug }}.main:main' + # TODO: change amlrt- prefix, placeholder for now. + 'amlrt-train={{ cookiecutter.project_slug }}.train:main', + 'amlrt-eval={{ cookiecutter.project_slug }}.evaluate:main', ], } ) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py old mode 100755 new mode 100644 similarity index 51% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py rename to {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py index 385a70c..619d810 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py +++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py @@ -1,21 +1,16 @@ -#!/usr/bin/env python - import argparse import logging -import os -import shutil import sys import yaml from yaml import load +import pytorch_lightning as pl + from {{cookiecutter.project_slug}}.data.data_loader import FashionMnistDM -from {{cookiecutter.project_slug}}.train import train from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp from {{cookiecutter.project_slug}}.models.model_loader import load_model -from {{cookiecutter.project_slug}}.utils.file_utils import rsync_folder -from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details -from {{cookiecutter.project_slug}}.utils.reproducibility_utils import set_seed +from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter logger = logging.getLogger(__name__) @@ -31,46 +26,19 @@ def main(): parser = argparse.ArgumentParser() # __TODO__ check you need all the following CLI parameters parser.add_argument('--log', help='log to this file (in addition to stdout/err)') + parser.add_argument('--ckpt-path', help='Path to best model') parser.add_argument('--config', help='config file with generic hyper-parameters, such as optimizer, ' 'batch_size, ... - in yaml format') parser.add_argument('--data', help='path to data', required=True) - parser.add_argument('--tmp-folder', - help='will use this folder as working folder - it will copy the input data ' - 'here, generate results here, and then copy them back to the output ' - 'folder') - parser.add_argument('--output', help='path to outputs - will store files here', required=True) - parser.add_argument('--disable-progressbar', action='store_true', - help='will disable the progressbar while going over the mini-batch') - parser.add_argument('--start-from-scratch', action='store_true', - help='will not load any existing saved model - even if present') parser.add_argument('--gpus', default=None, help='list of GPUs to use. If not specified, runs on CPU.' 'Example of GPU usage: 1 means run on GPU 1, 0 on GPU 0.') - parser.add_argument('--debug', action='store_true') args = parser.parse_args() logging.basicConfig(stream=sys.stdout, level=logging.INFO) - if os.path.exists(args.output) and args.start_from_scratch: - logger.info('Starting from scratch, removing any previous experiments.') - shutil.rmtree(args.output) - - if os.path.exists(args.output): - logger.info("Previous experiment found, resuming from checkpoint") - else: - os.makedirs(args.output) - - if args.tmp_folder is not None: - data_folder_name = os.path.basename(os.path.normpath(args.data)) - rsync_folder(args.data, args.tmp_folder) - data_dir = os.path.join(args.tmp_folder, data_folder_name) - output_dir = os.path.join(args.tmp_folder, 'output') - if not os.path.exists(output_dir): - os.makedirs(output_dir) - else: - data_dir = args.data - output_dir = args.output + data_dir = args.data # will log to a file if provided (useful for orion on cluster) if args.log is not None: @@ -91,14 +59,11 @@ def main(): else: hyper_params = {} - run(args, data_dir, output_dir, hyper_params) + evaluate(args, data_dir, hyper_params) - if args.tmp_folder is not None: - rsync_folder(output_dir + os.path.sep, args.output) - -def run(args, data_dir, output_dir, hyper_params): - """Setup and run the dataloaders, training loops, etc. +def evaluate(args, data_dir, hyper_params): + """Performs an evaluation on both the validation and test sets. Args: args (object): arguments passed from the cli @@ -110,21 +75,26 @@ def run(args, data_dir, output_dir, hyper_params): # (and NOT the model - these will be specified in the model itself) logger.info('List of hyper-parameters:') check_and_log_hp( - ['architecture', 'batch_size', 'exp_name', 'max_epoch', 'optimizer', 'seed', - 'early_stopping'], + ['architecture', 'batch_size', 'exp_name', 'early_stopping'], hyper_params) - if hyper_params["seed"] is not None: - set_seed(hyper_params["seed"]) - - log_exp_details(os.path.realpath(__file__), args) + trainer = pl.Trainer( + gpus=args.gpus, + ) datamodule = FashionMnistDM(data_dir, hyper_params) + datamodule.setup() + model = load_model(hyper_params) + model.load_from_checkpoint(args.ckpt_path) + + val_metrics = trainer.validate(model, datamodule=datamodule) + test_metrics = trainer.test(model, datamodule=datamodule) - train(model=model, datamodule=datamodule, output=output_dir, hyper_params=hyper_params, - use_progress_bar=not args.disable_progressbar, gpus=args.gpus) + # We can have many val/test sets, so iterate throught their results. + logger.info(f"Validation Metrics: {val_metrics}") + logger.info(f"Test Metrics: {test_metrics}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py index 7a9d8a6..04aba87 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py +++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py @@ -1,13 +1,24 @@ -import glob +import argparse import logging import os +import shutil +import sys +import glob + +import yaml +from yaml import load import orion import pytorch_lightning as pl from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping from orion.client import report_results +from {{cookiecutter.project_slug}}.data.data_loader import FashionMnistDM from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp +from {{cookiecutter.project_slug}}.models.model_loader import load_model +from {{cookiecutter.project_slug}}.utils.file_utils import rsync_folder +from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details +from {{cookiecutter.project_slug}}.utils.reproducibility_utils import set_seed logger = logging.getLogger(__name__) @@ -15,6 +26,112 @@ LAST_MODEL_NAME = 'last_model' +def main(): + """Main entry point of the program. + + Note: + This main.py file is meant to be called using the cli, + see the `examples/local/run.sh` file to see how to use it. + + """ + parser = argparse.ArgumentParser() + # __TODO__ check you need all the following CLI parameters + parser.add_argument('--log', help='log to this file (in addition to stdout/err)') + parser.add_argument('--config', + help='config file with generic hyper-parameters, such as optimizer, ' + 'batch_size, ... - in yaml format') + parser.add_argument('--data', help='path to data', required=True) + parser.add_argument('--tmp-folder', + help='will use this folder as working folder - it will copy the input data ' + 'here, generate results here, and then copy them back to the output ' + 'folder') + parser.add_argument('--output', help='path to outputs - will store files here', required=True) + parser.add_argument('--disable-progressbar', action='store_true', + help='will disable the progressbar while going over the mini-batch') + parser.add_argument('--start-from-scratch', action='store_true', + help='will not load any existing saved model - even if present') + parser.add_argument('--gpus', default=None, + help='list of GPUs to use. If not specified, runs on CPU.' + 'Example of GPU usage: 1 means run on GPU 1, 0 on GPU 0.') + parser.add_argument('--debug', action='store_true') + args = parser.parse_args() + + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + if os.path.exists(args.output) and args.start_from_scratch: + logger.info('Starting from scratch, removing any previous experiments.') + shutil.rmtree(args.output) + + if os.path.exists(args.output): + logger.info("Previous experiment found, resuming from checkpoint") + else: + os.makedirs(args.output) + + if args.tmp_folder is not None: + data_folder_name = os.path.basename(os.path.normpath(args.data)) + rsync_folder(args.data, args.tmp_folder) + data_dir = os.path.join(args.tmp_folder, data_folder_name) + output_dir = os.path.join(args.tmp_folder, 'output') + if not os.path.exists(output_dir): + os.makedirs(output_dir) + else: + data_dir = args.data + output_dir = args.output + + # will log to a file if provided (useful for orion on cluster) + if args.log is not None: + handler = logging.handlers.WatchedFileHandler(args.log) + formatter = logging.Formatter(logging.BASIC_FORMAT) + handler.setFormatter(formatter) + root = logging.getLogger() + root.setLevel(logging.INFO) + root.addHandler(handler) + + # to intercept any print statement: + sys.stdout = LoggerWriter(logger.info) + sys.stderr = LoggerWriter(logger.warning) + + if args.config is not None: + with open(args.config, 'r') as stream: + hyper_params = load(stream, Loader=yaml.FullLoader) + else: + hyper_params = {} + + run(args, data_dir, output_dir, hyper_params) + + if args.tmp_folder is not None: + rsync_folder(output_dir + os.path.sep, args.output) + + +def run(args, data_dir, output_dir, hyper_params): + """Setup and run the dataloaders, training loops, etc. + + Args: + args (object): arguments passed from the cli + data_dir (str): path to input folder + output_dir (str): path to output folder + hyper_params (dict): hyper parameters from the config file + """ + # __TODO__ change the hparam that are used from the training algorithm + # (and NOT the model - these will be specified in the model itself) + logger.info('List of hyper-parameters:') + check_and_log_hp( + ['architecture', 'batch_size', 'exp_name', 'max_epoch', 'optimizer', 'seed', + 'early_stopping'], + hyper_params) + + if hyper_params["seed"] is not None: + set_seed(hyper_params["seed"]) + + log_exp_details(os.path.realpath(__file__), args) + + datamodule = FashionMnistDM(data_dir, hyper_params) + model = load_model(hyper_params) + + train(model=model, datamodule=datamodule, output=output_dir, hyper_params=hyper_params, + use_progress_bar=not args.disable_progressbar, gpus=args.gpus) + + def train(**kwargs): # pragma: no cover """Training loop wrapper. Used to catch exception if Orion is being used.""" try: @@ -111,3 +228,7 @@ def handle_previous_models(output, last_model_path, best_model_path): logger.info('no model found - starting training from scratch') resume_from_checkpoint = None return resume_from_checkpoint + + +if __name__ == '__main__': + main() diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py index 3425bcf..dd87ac0 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py +++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py @@ -22,6 +22,7 @@ def __init__(self, printer): printer: (fn) function used to print message (e.g., logger.info). """ self.printer = printer + self.encoding = None def write(self, message): """write. From 5c29725d1998bde62b189cd93d6f85257058b507 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Tue, 17 Jan 2023 17:16:46 -0500 Subject: [PATCH 2/8] remove cookie-cutter dependency (#96) --- .github/workflows/ci.yml | 16 +- .gitignore | 4 + ...OVED_LIBRARIES.md => APPROVED_LIBRARIES.md | 0 LICENSE | 2 +- README.md | 219 ++++++++++++++++-- .../__init__.py | 0 .../data/__init__.py | 0 .../data/data_loader.py | 4 +- .../data/data_preprocess.py | 0 .../evaluate.py | 8 +- .../models/__init__.py | 0 .../models/model_loader.py | 2 +- .../models/my_model.py | 4 +- .../models/optim.py | 0 .../train.py | 12 +- .../utils/__init__.py | 0 .../utils/file_utils.py | 0 .../utils/hp_utils.py | 0 .../utils/logging_utils.py | 0 .../utils/reproducibility_utils.py | 0 cookiecutter.json | 13 -- .../docs => docs}/Makefile | 0 .../docs => docs}/conf.py | 8 +- .../docs => docs}/index.rst | 4 +- .../docs => docs}/make.bat | 0 .../docs => docs}/usage/guide.rst | 2 +- .../docs => docs}/usage/quickstart.md | 0 .../examples => examples}/local/config.yaml | 0 .../examples => examples}/local/run.sh | 0 .../local_orion/config.yaml | 0 .../local_orion/orion_config.yaml | 0 .../examples => examples}/local_orion/run.sh | 0 .../examples => examples}/slurm/config.yaml | 0 .../examples => examples}/slurm/run.sh | 0 .../examples => examples}/slurm/to_submit.sh | 4 +- .../slurm_orion/config.yaml | 0 .../slurm_orion/orion_config.yaml | 0 .../examples => examples}/slurm_orion/run.sh | 0 .../slurm_orion/to_submit.sh | 4 +- .../hooks => hooks}/pre-commit | 0 setup.py | 39 +++- tests/end2end_pytorch/run.sh | 31 +-- tests/test_docs/run.sh | 12 +- .../tests => tests}/test_hp_utils.py | 2 +- .../.azure_pipeline.yml | 27 --- .../.github/workflows/tests.yml | 42 ---- {{cookiecutter.project_slug}}/.gitignore | 110 --------- {{cookiecutter.project_slug}}/LICENSE | 111 --------- {{cookiecutter.project_slug}}/README.md | 215 ----------------- {{cookiecutter.project_slug}}/setup.py | 38 --- 50 files changed, 275 insertions(+), 658 deletions(-) rename {{cookiecutter.project_slug}}/APPROVED_LIBRARIES.md => APPROVED_LIBRARIES.md (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/__init__.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/data/__init__.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/data/data_loader.py (96%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/data/data_preprocess.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/evaluate.py (91%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/models/__init__.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/models/model_loader.py (91%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/models/my_model.py (95%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/models/optim.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/train.py (94%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/utils/__init__.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/utils/file_utils.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/utils/hp_utils.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/utils/logging_utils.py (100%) rename {{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}} => amlrt_project}/utils/reproducibility_utils.py (100%) delete mode 100644 cookiecutter.json rename {{{cookiecutter.project_slug}}/docs => docs}/Makefile (100%) rename {{{cookiecutter.project_slug}}/docs => docs}/conf.py (93%) rename {{{cookiecutter.project_slug}}/docs => docs}/index.rst (83%) rename {{{cookiecutter.project_slug}}/docs => docs}/make.bat (100%) rename {{{cookiecutter.project_slug}}/docs => docs}/usage/guide.rst (85%) rename {{{cookiecutter.project_slug}}/docs => docs}/usage/quickstart.md (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/local/config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/local/run.sh (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/local_orion/config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/local_orion/orion_config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/local_orion/run.sh (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm/config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm/run.sh (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm/to_submit.sh (91%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm_orion/config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm_orion/orion_config.yaml (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm_orion/run.sh (100%) rename {{{cookiecutter.project_slug}}/examples => examples}/slurm_orion/to_submit.sh (92%) rename {{{cookiecutter.project_slug}}/hooks => hooks}/pre-commit (100%) rename {{{cookiecutter.project_slug}}/tests => tests}/test_hp_utils.py (89%) delete mode 100644 {{cookiecutter.project_slug}}/.azure_pipeline.yml delete mode 100644 {{cookiecutter.project_slug}}/.github/workflows/tests.yml delete mode 100644 {{cookiecutter.project_slug}}/.gitignore delete mode 100644 {{cookiecutter.project_slug}}/LICENSE delete mode 100644 {{cookiecutter.project_slug}}/README.md delete mode 100644 {{cookiecutter.project_slug}}/setup.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec21256..5d60c0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,11 +4,11 @@ on: # but only for the main branch push: branches: - - master + - main - development pull_request: branches: - - master + - main - development jobs: build: @@ -19,14 +19,18 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9 - - name: install-dependencies + - name: linting checks run: | python -m pip install --upgrade pip + pip install flake8 flake8-docstrings + sh hooks/pre-commit + - name: install project dependencies + run: | pip install -e . - - name: print env + - name: unit tests run: | - env - - name: pytorch-test + pytest --cov=amlrt-project + - name: pytorch-end2end run: | ./tests/end2end_pytorch/run.sh - name: doc-creation-test diff --git a/.gitignore b/.gitignore index a306c16..50e214b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .idea mlruns +examples/data/ +examples/*/output/ +examples/*/lightning_logs/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/{{cookiecutter.project_slug}}/APPROVED_LIBRARIES.md b/APPROVED_LIBRARIES.md similarity index 100% rename from {{cookiecutter.project_slug}}/APPROVED_LIBRARIES.md rename to APPROVED_LIBRARIES.md diff --git a/LICENSE b/LICENSE index b591866..69866e9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 Mila (Quebec AI Institute) +Copyright (c) 2023, amlrt_team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 612fe93..d2c378f 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,209 @@ -[![Build Status](https://api.travis-ci.com/mila-iqia/cookiecutter-pyml.svg?branch=master)](https://travis-ci.com/github/mila-iqia/cookiecutter-pyml) +# amlrt_project -About ------ -A cookiecutter is a generic project template that will instantiate a new project with sane defaults. This repo contains our custom cookiecutter (`cookiecutter-pyml`) which will generate a new python deep learning package preconfigured with best practices in mind. It currently supports: +Replace this line with a short description about your project! -* Pytorch (PyTorch Lightning) -* Github Actions (CI/CD) -* Sphinx (documentation) -* Tensorboard (experiment tracking) -* Orion (hyperparameter optimization) -* Flake8 (linting) -* Pytest (unit testing) -More information on what a cookiecutter is [here.](https://cookiecutter.readthedocs.io) +## Instructions to setup the project -Quickstart ----------- +### Install the dependencies: +First, activate a virtual environment (recommended). +Install the package in `editable` mode so you can modify the source directly: -Install the latest version of cookiecutter: + pip install -e . - pip install -U cookiecutter +To add new dependencies, simply add them to the setup.py. +### Add git: -Generate your project using our template. Make sure to use the command exactly as you see it here. -This will use cookiecutter to instantiate your new project from our template (https://github.com/mila-iqia/cookiecutter-pyml.git). + git init - cookiecutter https://github.com/mila-iqia/cookiecutter-pyml.git +### Setup pre-commit hooks: +These hooks will: +* validate flake8 before any commit +* check that jupyter notebook outputs have been stripped -Follow the CLI instructions, then cd into your newly created project folder: + cd .git/hooks/ && ln -s ../../hooks/pre-commit . - cd $YOUR_PROJECT_NAME +### Commit the code -Follow the instructions in the README in the newly created repository (`$YOUR_PROJECT_NAME/README.md`) to get started with your new project (in particular, the section "Instructions to setup the project"). + git add . + git commit -m 'first commit' -Enjoy the cookies! +### Link github to your local repository +Go on github and follow the instructions to create a new project. +When done, do not add any file, and follow the instructions to +link your local git to the remote project, which should look like this: +(PS: these instructions are reported here for your convenience. +We suggest to also look at the GitHub project page for more up-to-date info) + + git remote add origin git@github.com:{$GITHUB_USERNAME}/amlrt_project.git + git branch -M main + git push -u origin main + +### Setup Continuous Integration + +Continuous integration will run the following: +- Unit tests under `tests`. +- End-to-end test under `exmaples/local`. +- `flake8` to check the code syntax. +- Checks on documentation presence and format (using `sphinx`). + +We support the following Continuous Integration providers. +Check the following instructions for more details. + +#### GitHub Actions + +Github actions are already configured in `.github/workflows/tests.yml`. +Github actions are already enabled by default when using Github, so, when +pushing to github, they will be executed automatically for pull requests to +`main` and to `develop`. + +#### Azure + +Azure Continuous Integration is already configured in (`.azure_pipeline.yml`). + +To enable it server-side, just in azure and select `.azure_pipeline.yml` as the +configuration one for Continuous Integration. + +## Running the code + +### Run the tests +Just run (from the root folder): + + pytest + +### Run the code/examples. +Note that the code should already compile at this point. + +Running examples can be found under the `examples` folder. + +In particular, you will find examples for: +* local machine (e.g., your laptop). +* a slurm cluster. + +For both these cases, there is the possibility to run with or without Orion. +(Orion is a hyper-parameter search tool - see https://github.com/Epistimio/orion - +that is already configured in this project) + +#### Run locally + +For example, to run on your local machine without Orion: + + cd examples/local + sh run.sh + +This will run a simple MLP on a simple toy task: sum 5 float numbers. +You should see an almost perfect loss of 0 after a few epochs. + +Note you have a new `output` folder which contains models and a summary of results: +* best_model: the best model checkpoint during training +* last_model: the last model checkpoint during training +* lightning_logs: contains the tensorboard logs. + +To view tensorboard logs, simply run: + + tensorboard --logdir output + +#### Run on a remote cluster (with Slurm) + +First, bring you project on the cluster (assuming you didn't create your +project directly there). To do so, simply login on the cluster and git +clone your project: + + git clone git@github.com:<$GITHUB_USERNAME>/amlrt_project.git + +Then activate your virtual env, and install the dependencies: + + cd amlrt_project + pip install -e . + +To run with Slurm, just: + + cd examples/slurm + sh run.sh + +Check the log to see that you got an almost perfect loss (i.e., 0). + +#### Measure GPU time (and others) on the Mila cluster + +You can track down the GPU time (and other resources) of your jobs by +associating a tag to the job (when using `sbatch`). +To associate a tag to a job, replace `my_tag` with a proper tag, +and uncomment the line (i.e., remove one #) from the line: + + ##SBATCH --wckey=my_tag + +This line is inside the file `examples/slurm_mila/to_submit.sh`. + +To get a sumary for a particular tag, just run: + + sacct --allusers --wckeys=my_tag --format=JobID,JobName,Start,Elapsed -X -P --delimiter=',' + +(again, remember to change `my_tag` into the real tag name) + +#### GPU profiling on the Mila cluster + +It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your experiments are running on. This can be done in the following way: + +Once you have launched your job on the mila cluster, open the log for your current experiment: + +`head logs/amlrt_project__.err` + +You should see printed in the first few lines the hostname of your machine, e.g., + +``` +INFO:amlrt_project.utils.logging_utils:Experiment info: +hostname: leto35 +git code hash: a51bfc5447d188bd6d31fac3afbd5757650ef524 +data folder: ../data +data folder (abs): /network/tmp1/bronzimi/20191105_cookiecutter/amlrt_project/examples/data +``` + +In a separate shell on your local computer, run the following command: + +`ssh -L 19999:.server.mila.quebec:19999 @login.server.mila.quebec -p 2222` + +where `` is your user name on the Mila cluster and `` is the name of the machine your job is currenty running on (`leto35` in our example). You can then navigate your local browser to `http://localhost:19999/` to view the ressources being used on the cluster and monitor your job. You should see something like this: + +![image](https://user-images.githubusercontent.com/18450628/88088807-fe2acd80-cb58-11ea-8ab2-bd090e8a826c.png) +{%- endif %} + +#### Run with Orion on the Slurm cluster + +This example will run orion for 2 trials (see the orion config file). +To do so, go into `examples/slurm_orion`. +Here you can find the orion config file (`orion_config.yaml`), as well as the config +file (`config.yaml`) for your project (that contains the hyper-parameters). + +In general, you will want to run Orion in parallel over N slurm jobs. +To do so, simply run `sh run.sh` N times. + +When Orion has completed the trials, you will find the orion db file. + +You will also find the output of your experiments in `orion_working_dir`, which +will contain a folder for every trial. +Inside these folders, you can find the models (the best one and the last one), the config file with +the hyper-parameters for this trial, and the log file. + +You can check orion status with the following commands: +(to be run from `examples/slurm_orion`) + + export ORION_DB_ADDRESS='orion_db.pkl' + export ORION_DB_TYPE='pickleddb' + orion status + orion info --name my_exp + +### Building docs: + +Documentation is built using sphinx. It will automatically document all functions based on docstrings. +To automatically generate docs for your project, navigate to the `docs` folder and build the documentation: + + cd docs + make html + +To view the docs locally, open `docs/_build/html/index.html` in your browser. + + +## YOUR PROJECT README: + +* __TODO__ diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/__init__.py b/amlrt_project/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/__init__.py rename to amlrt_project/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/__init__.py b/amlrt_project/data/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/__init__.py rename to amlrt_project/data/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py b/amlrt_project/data/data_loader.py similarity index 96% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py rename to amlrt_project/data/data_loader.py index f490921..178aca6 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py +++ b/amlrt_project/data/data_loader.py @@ -8,8 +8,8 @@ from torch.utils.data import Dataset, DataLoader from torchvision import transforms -from {{cookiecutter.project_slug}}.data.data_preprocess import FashionMnistParser -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp +from amlrt_project.data.data_preprocess import FashionMnistParser +from amlrt_project.utils.hp_utils import check_and_log_hp logger = logging.getLogger(__name__) # __TODO__ change the dataloader to suit your needs... diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_preprocess.py b/amlrt_project/data/data_preprocess.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_preprocess.py rename to amlrt_project/data/data_preprocess.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py b/amlrt_project/evaluate.py similarity index 91% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py rename to amlrt_project/evaluate.py index 619d810..73350ca 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/evaluate.py +++ b/amlrt_project/evaluate.py @@ -7,10 +7,10 @@ import pytorch_lightning as pl -from {{cookiecutter.project_slug}}.data.data_loader import FashionMnistDM -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp -from {{cookiecutter.project_slug}}.models.model_loader import load_model -from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter +from amlrt_project.data.data_loader import FashionMnistDM +from amlrt_project.utils.hp_utils import check_and_log_hp +from amlrt_project.models.model_loader import load_model +from amlrt_project.utils.logging_utils import LoggerWriter logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/__init__.py b/amlrt_project/models/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/__init__.py rename to amlrt_project/models/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py b/amlrt_project/models/model_loader.py similarity index 91% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py rename to amlrt_project/models/model_loader.py index 1731766..2c55664 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py +++ b/amlrt_project/models/model_loader.py @@ -1,6 +1,6 @@ import logging -from {{cookiecutter.project_slug}}.models.my_model import SimpleMLP +from amlrt_project.models.my_model import SimpleMLP logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py b/amlrt_project/models/my_model.py similarity index 95% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py rename to amlrt_project/models/my_model.py index 450ff56..9a43b60 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py +++ b/amlrt_project/models/my_model.py @@ -4,9 +4,9 @@ from torch import nn import pytorch_lightning as pl -from {{cookiecutter.project_slug}}.models.optim import load_loss, load_optimizer +from amlrt_project.models.optim import load_loss, load_optimizer -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp +from amlrt_project.utils.hp_utils import check_and_log_hp logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py b/amlrt_project/models/optim.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py rename to amlrt_project/models/optim.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py b/amlrt_project/train.py similarity index 94% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py rename to amlrt_project/train.py index 04aba87..2cd9456 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py +++ b/amlrt_project/train.py @@ -13,12 +13,12 @@ from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping from orion.client import report_results -from {{cookiecutter.project_slug}}.data.data_loader import FashionMnistDM -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp -from {{cookiecutter.project_slug}}.models.model_loader import load_model -from {{cookiecutter.project_slug}}.utils.file_utils import rsync_folder -from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details -from {{cookiecutter.project_slug}}.utils.reproducibility_utils import set_seed +from amlrt_project.data.data_loader import FashionMnistDM +from amlrt_project.utils.hp_utils import check_and_log_hp +from amlrt_project.models.model_loader import load_model +from amlrt_project.utils.file_utils import rsync_folder +from amlrt_project.utils.logging_utils import LoggerWriter, log_exp_details +from amlrt_project.utils.reproducibility_utils import set_seed logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/__init__.py b/amlrt_project/utils/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/__init__.py rename to amlrt_project/utils/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/file_utils.py b/amlrt_project/utils/file_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/file_utils.py rename to amlrt_project/utils/file_utils.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py b/amlrt_project/utils/hp_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py rename to amlrt_project/utils/hp_utils.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/amlrt_project/utils/logging_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py rename to amlrt_project/utils/logging_utils.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/reproducibility_utils.py b/amlrt_project/utils/reproducibility_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/reproducibility_utils.py rename to amlrt_project/utils/reproducibility_utils.py diff --git a/cookiecutter.json b/cookiecutter.json deleted file mode 100644 index 4ebf2a6..0000000 --- a/cookiecutter.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "full_name": "John Doe", - "email": "john.doe@example.com", - "github_username": "johndoe", - "project_name": "Wonderful Project", - "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}", - "project_short_description": "{{ cookiecutter.project_name }} is wonderful!", - "python_version": "3.9", - "environment": ["mila", "generic"], - "pypi_username": "{{ cookiecutter.github_username }}", - "version": "0.0.1", - "open_source_license": ["MIT license", "BSD license", "ISC license", "Apache Software License 2.0", "GNU General Public License v3", "Not open source"] -} diff --git a/{{cookiecutter.project_slug}}/docs/Makefile b/docs/Makefile similarity index 100% rename from {{cookiecutter.project_slug}}/docs/Makefile rename to docs/Makefile diff --git a/{{cookiecutter.project_slug}}/docs/conf.py b/docs/conf.py similarity index 93% rename from {{cookiecutter.project_slug}}/docs/conf.py rename to docs/conf.py index 6916349..a3c29ab 100644 --- a/{{cookiecutter.project_slug}}/docs/conf.py +++ b/docs/conf.py @@ -16,9 +16,9 @@ # -- Project information ----------------------------------------------------- -project = '{{cookiecutter.project_slug}}' -copyright = '2020, {{cookiecutter.full_name}}' -author = '{{cookiecutter.full_name}}' +project = 'amlrt_project' +copyright = '2023, amlrt_team' +author = 'amlrt_team' # -- General configuration --------------------------------------------------- @@ -39,7 +39,7 @@ # autoapi extension for doc strings extensions.append('autoapi.extension') autoapi_type = 'python' -autoapi_dirs = ['../{{cookiecutter.project_slug}}/'] +autoapi_dirs = ['../amlrt_project/'] # Skip docstrings for loggers and tests diff --git a/{{cookiecutter.project_slug}}/docs/index.rst b/docs/index.rst similarity index 83% rename from {{cookiecutter.project_slug}}/docs/index.rst rename to docs/index.rst index ca81249..950fc40 100644 --- a/{{cookiecutter.project_slug}}/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,9 @@ -.. {{cookiecutter.project_slug}} documentation master file, created by +.. amlrt_project documentation master file, created by sphinx-quickstart on Fri Jul 3 10:11:19 2020. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to {{cookiecutter.project_slug}}'s documentation! +Welcome to amlrt_project's documentation! ============================================= About the project diff --git a/{{cookiecutter.project_slug}}/docs/make.bat b/docs/make.bat similarity index 100% rename from {{cookiecutter.project_slug}}/docs/make.bat rename to docs/make.bat diff --git a/{{cookiecutter.project_slug}}/docs/usage/guide.rst b/docs/usage/guide.rst similarity index 85% rename from {{cookiecutter.project_slug}}/docs/usage/guide.rst rename to docs/usage/guide.rst index 9e5b467..0db3d3d 100644 --- a/{{cookiecutter.project_slug}}/docs/usage/guide.rst +++ b/docs/usage/guide.rst @@ -13,4 +13,4 @@ Everybody loves Schrodinger's equation, why not put it everywhere? You can also add math or even link directly in your docstrings! For an example, click at the docstrings here: -:py:meth:`{{cookiecutter.project_slug}}.models.optim.load_loss` +:py:meth:`amlrt_project.models.optim.load_loss` diff --git a/{{cookiecutter.project_slug}}/docs/usage/quickstart.md b/docs/usage/quickstart.md similarity index 100% rename from {{cookiecutter.project_slug}}/docs/usage/quickstart.md rename to docs/usage/quickstart.md diff --git a/{{cookiecutter.project_slug}}/examples/local/config.yaml b/examples/local/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local/config.yaml rename to examples/local/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/local/run.sh b/examples/local/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local/run.sh rename to examples/local/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/config.yaml b/examples/local_orion/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local_orion/config.yaml rename to examples/local_orion/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/orion_config.yaml b/examples/local_orion/orion_config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local_orion/orion_config.yaml rename to examples/local_orion/orion_config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/run.sh b/examples/local_orion/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local_orion/run.sh rename to examples/local_orion/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/slurm/config.yaml b/examples/slurm/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm/config.yaml rename to examples/slurm/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm/run.sh b/examples/slurm/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm/run.sh rename to examples/slurm/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh b/examples/slurm/to_submit.sh similarity index 91% rename from {{cookiecutter.project_slug}}/examples/slurm/to_submit.sh rename to examples/slurm/to_submit.sh index 24aaf1c..55aeea3 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh +++ b/examples/slurm/to_submit.sh @@ -15,7 +15,7 @@ #SBATCH --gres=gpu:1 #SBATCH --mem=5G #SBATCH --time=0:05:00 -#SBATCH --job-name={{ cookiecutter.project_slug }} +#SBATCH --job-name=amlrt_project #SBATCH --output=logs/%x__%j.out #SBATCH --error=logs/%x__%j.err # to attach a tag to your run (e.g., used to track the GPU time) @@ -23,7 +23,7 @@ ##SBATCH --wckey=my_tag # remove one # if you prefer receiving emails ##SBATCH --mail-type=all -##SBATCH --mail-user={{ cookiecutter.email }} +##SBATCH --mail-user=amlrt_email@mila.quebec export MLFLOW_TRACKING_URI='mlruns' diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/config.yaml b/examples/slurm_orion/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/config.yaml rename to examples/slurm_orion/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/orion_config.yaml b/examples/slurm_orion/orion_config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/orion_config.yaml rename to examples/slurm_orion/orion_config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/run.sh b/examples/slurm_orion/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/run.sh rename to examples/slurm_orion/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh b/examples/slurm_orion/to_submit.sh similarity index 92% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh rename to examples/slurm_orion/to_submit.sh index c5f444e..baa1031 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh +++ b/examples/slurm_orion/to_submit.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH --job-name={{ cookiecutter.project_slug }} +#SBATCH --job-name=amlrt_project {%- if cookiecutter.environment == 'mila' %} ## this is for the mila cluster (uncomment it if you need it): ##SBATCH --account=rrg-bengioy-ad @@ -20,7 +20,7 @@ #SBATCH --error=logs/%x__%j.err # remove one # if you prefer receiving emails ##SBATCH --mail-type=all -##SBATCH --mail-user={{ cookiecutter.email }} +##SBATCH --mail-user=amlrt_email@mila.quebec export MLFLOW_TRACKING_URI='mlruns' export ORION_DB_ADDRESS='orion_db.pkl' diff --git a/{{cookiecutter.project_slug}}/hooks/pre-commit b/hooks/pre-commit similarity index 100% rename from {{cookiecutter.project_slug}}/hooks/pre-commit rename to hooks/pre-commit diff --git a/setup.py b/setup.py index b8714d6..56d018a 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,37 @@ setup( - name='cookiecutter-pyml', - version='0.1', - author='Mirko Bronzi', - author_email='m.bronzi@gmail.com', - python_requires='>=3.7', - install_requires=['cookiecutter>=1.4.0'] + name='amlrt_project', + version='0.0.1', + packages=find_packages(include=['amlrt_project', 'amlrt_project.*']), + python_requires='>=3.9', + install_requires=[ + 'flake8==4.0.1', + 'flake8-docstrings==1.6.0', + 'gitpython==3.1.27', + 'jupyter==1.0.0', + 'jinja2<3.1.0', + 'myst-parser==0.18.0', + 'orion>=0.2.4.post1', + 'pyyaml==6.0', + 'pytest==7.1.2', + 'pytest-cov==3.0.0', + 'pytorch_lightning==1.8.3', + 'sphinx==5.1.1', + 'sphinx-autoapi==1.9.0', + 'sphinx-rtd-theme==1.0.0', + 'sphinxcontrib-napoleon==0.7', + 'sphinxcontrib-katex==0.8.6', + 'tensorboard==2.9.1', + 'tqdm==4.64.0', + 'torch==1.12.0', + 'torchvision==0.13.0' + ], + entry_points={ + 'console_scripts': [ + # TODO: change amlrt- prefix, placeholder for now. + 'amlrt-train=amlrt_project.train:main', + 'amlrt-eval=amlrt_project.evaluate:main', + ], + } ) diff --git a/tests/end2end_pytorch/run.sh b/tests/end2end_pytorch/run.sh index 4ce5120..e9c3be5 100755 --- a/tests/end2end_pytorch/run.sh +++ b/tests/end2end_pytorch/run.sh @@ -1,32 +1,8 @@ # exit at the first error set -e -rm -rf wonderful_project # if it already exists -# go to the test folder -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd ${DIR} -cookiecutter ../.. --no-input --output-dir=./ -cd wonderful_project -git init -# setting some fake name/email for git: -git config user.email "you@example.com" -git config user.name "Your Name" -git add -A -git commit -m "initial commit" -pip install -e . --quiet -pip install flake8 pytest --quiet - -# print all dependencies -pip freeze - -# run flake8 test first -sh hooks/pre-commit - -# run tests -pytest --cov=wonderful_project - -# run the example -cd examples/local +# go to the examples folder and run the example +cd $GITHUB_WORKSPACE/examples/local sh run.sh mv output outout_OLD # re-run the example to check reproducibility @@ -41,8 +17,7 @@ else fi # run Orion -cd ../.. -cd examples/local_orion +cd $GITHUB_WORKSPACE/examples/local_orion sh run.sh mv orion_working_dir orion_working_dir_OLD # re-run the example to check reproducibility diff --git a/tests/test_docs/run.sh b/tests/test_docs/run.sh index 77f4bd9..6cc90a7 100755 --- a/tests/test_docs/run.sh +++ b/tests/test_docs/run.sh @@ -1,16 +1,6 @@ # exit at the first error set -e -# go to the test folder -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd ${DIR} - -cookiecutter ../.. --no-input --output-dir=./ -cd wonderful_project -pip install -e . --quiet - -# necessary cause tf dependencies are sometimes not updated -pip install -U setuptools numpy six --quiet # Build the docs -cd docs +cd $GITHUB_WORKSPACE/docs/ sphinx-build -b html -d _build/doctrees . _build/html diff --git a/{{cookiecutter.project_slug}}/tests/test_hp_utils.py b/tests/test_hp_utils.py similarity index 89% rename from {{cookiecutter.project_slug}}/tests/test_hp_utils.py rename to tests/test_hp_utils.py index 2b17680..9e16e96 100644 --- a/{{cookiecutter.project_slug}}/tests/test_hp_utils.py +++ b/tests/test_hp_utils.py @@ -1,6 +1,6 @@ import pytest -from {{cookiecutter.project_slug}}.utils.hp_utils import check_hp +from amlrt_project.utils.hp_utils import check_hp def test_check_hp__all_params_are_there(): diff --git a/{{cookiecutter.project_slug}}/.azure_pipeline.yml b/{{cookiecutter.project_slug}}/.azure_pipeline.yml deleted file mode 100644 index f1191b1..0000000 --- a/{{cookiecutter.project_slug}}/.azure_pipeline.yml +++ /dev/null @@ -1,27 +0,0 @@ -jobs: -- job: - pool: - vmImage: 'ubuntu-16.04' - strategy: - matrix: - Python: - python.version: '{{ cookiecutter.python_version }}' - - steps: - - task: UsePythonVersion@0 - displayName: 'Use Python $(python.version)' - inputs: - versionSpec: '$(python.version)' - - - script: pip install -e . - displayName: 'Install dependencies' - - - script: sh config/hooks/pre-commit - displayName: 'Running commit hook' - - - script: pytest --cov={{cookiecutter.project_slug}} - displayName: 'Run pytest and display test coverage' - - - script: sh run.sh - workingDirectory: examples/local - displayName: 'Run single toy experiment' diff --git a/{{cookiecutter.project_slug}}/.github/workflows/tests.yml b/{{cookiecutter.project_slug}}/.github/workflows/tests.yml deleted file mode 100644 index bf22c39..0000000 --- a/{{cookiecutter.project_slug}}/.github/workflows/tests.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: unit-tests -on: - # Trigger the workflow on push or pull request, - # but only for the main/develop branch - push: - branches: - - main - - develop - pull_request: - branches: - - main - - develop -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: python-{{ cookiecutter.python_version }} - uses: actions/setup-python@v2 - with: - python-version: {{ cookiecutter.python_version }} - - name: upgrade-pip - run: | - python -m pip install --upgrade pip - - name: flake8 - run: | - pip install flake8==4.0.1 flake8-docstrings==1.6.0 - hooks/pre-commit - - name: install-dependencies - run: | - pip install pytest - pip install -e . - - name: print env - run: | - env - - name: pytest_and_coverage - run: | - pytest --cov={{cookiecutter.project_slug}} - - name: end2end-toyexp - run: | - cd examples/local - sh run.sh diff --git a/{{cookiecutter.project_slug}}/.gitignore b/{{cookiecutter.project_slug}}/.gitignore deleted file mode 100644 index a06f135..0000000 --- a/{{cookiecutter.project_slug}}/.gitignore +++ /dev/null @@ -1,110 +0,0 @@ -.idea -mlruns - -# Ignore example outputs -examples/*/output/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ diff --git a/{{cookiecutter.project_slug}}/LICENSE b/{{cookiecutter.project_slug}}/LICENSE deleted file mode 100644 index 24b2600..0000000 --- a/{{cookiecutter.project_slug}}/LICENSE +++ /dev/null @@ -1,111 +0,0 @@ -{% if cookiecutter.open_source_license == 'MIT license' -%} -MIT License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -{% elif cookiecutter.open_source_license == 'BSD license' %} - -BSD License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. -{% elif cookiecutter.open_source_license == 'ISC license' -%} -ISC License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -{% elif cookiecutter.open_source_license == 'Apache Software License 2.0' -%} -Apache Software License 2.0 - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% elif cookiecutter.open_source_license == 'GNU General Public License v3' -%} -GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - {{ cookiecutter.project_short_description }} - Copyright (C) {% now 'local', '%Y' %} {{ cookiecutter.full_name }} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. -{% endif %} diff --git a/{{cookiecutter.project_slug}}/README.md b/{{cookiecutter.project_slug}}/README.md deleted file mode 100644 index 798f39e..0000000 --- a/{{cookiecutter.project_slug}}/README.md +++ /dev/null @@ -1,215 +0,0 @@ -{% set is_open_source = cookiecutter.open_source_license != 'Not open source' -%} - -# {{ cookiecutter.project_name }} - - -{{ cookiecutter.project_short_description }} - -{% if is_open_source %} -* Free software: {{ cookiecutter.open_source_license }} -{% endif %} - - -## Instructions to setup the project - -### Install the dependencies: -(remember to activate the virtual env if you want to use one) -Add new dependencies (if needed) to setup.py. - - pip install -e . - -### Add git: - - git init - -### Setup pre-commit hooks: -These hooks will: -* validate flake8 before any commit -* check that jupyter notebook outputs have been stripped - - cd .git/hooks/ && ln -s ../../hooks/pre-commit . && cd - - -### Commit the code - - git add . - git commit -m 'first commit' - -### Link github to your local repository -Go on github and follow the instructions to create a new project. -When done, do not add any file, and follow the instructions to -link your local git to the remote project, which should look like this: -(PS: these instructions are reported here for your convenience. -We suggest to also look at the GitHub project page for more up-to-date info) - - git remote add origin git@github.com:{{ cookiecutter.github_username }}/{{ cookiecutter.project_slug }}.git - git branch -M main - git push -u origin main - -### Setup Continuous Integration - -Continuous integration will run the following: -- Unit tests under `tests`. -- End-to-end test under `exmaples/local`. -- `flake8` to check the code syntax. -- Checks on documentation presence and format (using `sphinx`). - -We support the following Continuous Integration providers. -Check the following instructions for more details. - -#### GitHub Actions - -Github actions are already configured in `.github/workflows/tests.yml`. -Github actions are already enabled by default when using Github, so, when -pushing to github, they will be executed automatically for pull requests to -`main` and to `develop`. - -#### Azure - -Azure Continuous Integration is already configured in (`.azure_pipeline.yml`). - -To enable it server-side, just in azure and select `.azure_pipeline.yml` as the -configuration one for Continuous Integration. - -## Running the code - -### Run the tests -Just run (from the root folder): - - pytest - -### Run the code/examples. -Note that the code should already compile at this point. - -Running examples can be found under the `examples` folder. - -In particular, you will find examples for: -* local machine (e.g., your laptop). -* a slurm cluster. - -For both these cases, there is the possibility to run with or without Orion. -(Orion is a hyper-parameter search tool - see https://github.com/Epistimio/orion - -that is already configured in this project) - -#### Run locally - -For example, to run on your local machine without Orion: - - cd examples/local - sh run.sh - -This will run a simple MLP on a simple toy task: sum 5 float numbers. -You should see an almost perfect loss of 0 after a few epochs. - -Note you have a new `output` folder which contains models and a summary of results: -* best_model: the best model checkpoint during training -* last_model: the last model checkpoint during training -* lightning_logs: contains the tensorboard logs. - -To view tensorboard logs, simply run: - - tensorboard --logdir output - -#### Run on a remote cluster (with Slurm) - -First, bring you project on the cluster (assuming you didn't create your -project directly there). To do so, simply login on the cluster and git -clone your project: - - git clone git@github.com:{{ cookiecutter.github_username }}/{{ cookiecutter.project_slug }}.git - -Then activate your virtual env, and install the dependencies: - - cd {{ cookiecutter.project_slug }} - pip install -e . - -To run with Slurm, just: - - cd examples/slurm - sh run.sh - -Check the log to see that you got an almost perfect loss (i.e., 0). -{%- if cookiecutter.environment == 'mila' %} - -#### Measure GPU time (and others) on the Mila cluster - -You can track down the GPU time (and other resources) of your jobs by -associating a tag to the job (when using `sbatch`). -To associate a tag to a job, replace `my_tag` with a proper tag, -and uncomment the line (i.e., remove one #) from the line: - - ##SBATCH --wckey=my_tag - -This line is inside the file `examples/slurm_mila/to_submit.sh`. - -To get a sumary for a particular tag, just run: - - sacct --allusers --wckeys=my_tag --format=JobID,JobName,Start,Elapsed -X -P --delimiter=',' - -(again, remember to change `my_tag` into the real tag name) - -#### GPU profiling on the Mila cluster - -It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your experiments are running on. This can be done in the following way: - -Once you have launched your job on the mila cluster, open the log for your current experiment: - -`head logs/{{ cookiecutter.project_slug }}__.err` - -You should see printed in the first few lines the hostname of your machine, e.g., - -``` -INFO:{{ cookiecutter.project_slug }}.utils.logging_utils:Experiment info: -hostname: leto35 -git code hash: a51bfc5447d188bd6d31fac3afbd5757650ef524 -data folder: ../data -data folder (abs): /network/tmp1/bronzimi/20191105_cookiecutter/{{ cookiecutter.project_slug }}/examples/data -``` - -In a separate shell on your local computer, run the following command: - -`ssh -L 19999:.server.mila.quebec:19999 @login.server.mila.quebec -p 2222` - -where `` is your user name on the Mila cluster and `` is the name of the machine your job is currenty running on (`leto35` in our example). You can then navigate your local browser to `http://localhost:19999/` to view the ressources being used on the cluster and monitor your job. You should see something like this: - -![image](https://user-images.githubusercontent.com/18450628/88088807-fe2acd80-cb58-11ea-8ab2-bd090e8a826c.png) -{%- endif %} - -#### Run with Orion on the Slurm cluster - -This example will run orion for 2 trials (see the orion config file). -To do so, go into `examples/slurm_orion`. -Here you can find the orion config file (`orion_config.yaml`), as well as the config -file (`config.yaml`) for your project (that contains the hyper-parameters). - -In general, you will want to run Orion in parallel over N slurm jobs. -To do so, simply run `sh run.sh` N times. - -When Orion has completed the trials, you will find the orion db file. - -You will also find the output of your experiments in `orion_working_dir`, which -will contain a folder for every trial. -Inside these folders, you can find the models (the best one and the last one), the config file with -the hyper-parameters for this trial, and the log file. - -You can check orion status with the following commands: -(to be run from `examples/slurm_orion`) - - export ORION_DB_ADDRESS='orion_db.pkl' - export ORION_DB_TYPE='pickleddb' - orion status - orion info --name my_exp - -### Building docs: - -Documentation is built using sphinx. It will automatically document all functions based on docstrings. -To automatically generate docs for your project, navigate to the `docs` folder and build the documentation: - - cd docs - make html - -To view the docs locally, open `docs/_build/html/index.html` in your browser. - - -## YOUR PROJECT README: - -* __TODO__ diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py deleted file mode 100644 index 406dafa..0000000 --- a/{{cookiecutter.project_slug}}/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -from setuptools import setup, find_packages - - -setup( - name='{{ cookiecutter.project_slug }}', - version='{{ cookiecutter.version }}', - packages=find_packages(include=['{{ cookiecutter.project_slug }}', '{{ cookiecutter.project_slug }}.*']), - python_requires='>={{ cookiecutter.python_version }}', - install_requires=[ - 'flake8==4.0.1', - 'flake8-docstrings==1.6.0', - 'gitpython==3.1.27', - 'jupyter==1.0.0', - 'jinja2<3.1.0', - 'myst-parser==0.18.0', - 'orion>=0.2.4.post1', - 'pyyaml==6.0', - 'pytest==7.1.2', - 'pytest-cov==3.0.0', - 'pytorch_lightning==1.8.3', - 'sphinx==5.1.1', - 'sphinx-autoapi==1.9.0', - 'sphinx-rtd-theme==1.0.0', - 'sphinxcontrib-napoleon==0.7', - 'sphinxcontrib-katex==0.8.6', - 'tensorboard==2.9.1', - 'tqdm==4.64.0', - 'torch==1.12.0', - 'torchvision==0.13.0' - ], - entry_points={ - 'console_scripts': [ - # TODO: change amlrt- prefix, placeholder for now. - 'amlrt-train={{ cookiecutter.project_slug }}.train:main', - 'amlrt-eval={{ cookiecutter.project_slug }}.evaluate:main', - ], - } -) From cd1389978382bf17bdf632fdf58aab244e689557 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Fri, 20 Jan 2023 14:17:27 -0500 Subject: [PATCH 3/8] Add iSort to the CI (#97) --- .github/workflows/ci.yml | 2 +- amlrt_project/data/data_loader.py | 4 ++-- amlrt_project/data/data_preprocess.py | 6 +++--- amlrt_project/evaluate.py | 5 ++--- amlrt_project/models/my_model.py | 3 +-- amlrt_project/models/optim.py | 1 - amlrt_project/train.py | 11 +++++------ amlrt_project/utils/logging_utils.py | 2 +- hooks/pre-commit | 7 ++++--- setup.py | 3 +-- 10 files changed, 20 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d60c0a..f56d10b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - name: linting checks run: | python -m pip install --upgrade pip - pip install flake8 flake8-docstrings + pip install flake8 flake8-docstrings isort sh hooks/pre-commit - name: install project dependencies run: | diff --git a/amlrt_project/data/data_loader.py b/amlrt_project/data/data_loader.py index 178aca6..2fb00d1 100644 --- a/amlrt_project/data/data_loader.py +++ b/amlrt_project/data/data_loader.py @@ -1,11 +1,11 @@ import logging -from typing import Callable import typing +from typing import Callable import numpy as np import pytorch_lightning as pl import torch -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader, Dataset from torchvision import transforms from amlrt_project.data.data_preprocess import FashionMnistParser diff --git a/amlrt_project/data/data_preprocess.py b/amlrt_project/data/data_preprocess.py index 1ed8a24..b81bac5 100644 --- a/amlrt_project/data/data_preprocess.py +++ b/amlrt_project/data/data_preprocess.py @@ -1,8 +1,8 @@ -import os -import logging -import urllib.request import gzip +import logging +import os import typing +import urllib.request import numpy as np diff --git a/amlrt_project/evaluate.py b/amlrt_project/evaluate.py index 73350ca..b235e7d 100644 --- a/amlrt_project/evaluate.py +++ b/amlrt_project/evaluate.py @@ -2,14 +2,13 @@ import logging import sys +import pytorch_lightning as pl import yaml from yaml import load -import pytorch_lightning as pl - from amlrt_project.data.data_loader import FashionMnistDM -from amlrt_project.utils.hp_utils import check_and_log_hp from amlrt_project.models.model_loader import load_model +from amlrt_project.utils.hp_utils import check_and_log_hp from amlrt_project.utils.logging_utils import LoggerWriter logger = logging.getLogger(__name__) diff --git a/amlrt_project/models/my_model.py b/amlrt_project/models/my_model.py index 9a43b60..e44cdfe 100644 --- a/amlrt_project/models/my_model.py +++ b/amlrt_project/models/my_model.py @@ -1,11 +1,10 @@ import logging import typing -from torch import nn import pytorch_lightning as pl +from torch import nn from amlrt_project.models.optim import load_loss, load_optimizer - from amlrt_project.utils.hp_utils import check_and_log_hp logger = logging.getLogger(__name__) diff --git a/amlrt_project/models/optim.py b/amlrt_project/models/optim.py index 3b5f709..a124684 100644 --- a/amlrt_project/models/optim.py +++ b/amlrt_project/models/optim.py @@ -3,7 +3,6 @@ import torch from torch import optim - logger = logging.getLogger(__name__) diff --git a/amlrt_project/train.py b/amlrt_project/train.py index 2cd9456..b9e129e 100644 --- a/amlrt_project/train.py +++ b/amlrt_project/train.py @@ -1,22 +1,21 @@ import argparse +import glob import logging import os import shutil import sys -import glob - -import yaml -from yaml import load import orion import pytorch_lightning as pl -from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping +import yaml from orion.client import report_results +from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint +from yaml import load from amlrt_project.data.data_loader import FashionMnistDM -from amlrt_project.utils.hp_utils import check_and_log_hp from amlrt_project.models.model_loader import load_model from amlrt_project.utils.file_utils import rsync_folder +from amlrt_project.utils.hp_utils import check_and_log_hp from amlrt_project.utils.logging_utils import LoggerWriter, log_exp_details from amlrt_project.utils.reproducibility_utils import set_seed diff --git a/amlrt_project/utils/logging_utils.py b/amlrt_project/utils/logging_utils.py index dd87ac0..8b4367b 100644 --- a/amlrt_project/utils/logging_utils.py +++ b/amlrt_project/utils/logging_utils.py @@ -2,8 +2,8 @@ import os import socket -from pip._internal.operations import freeze from git import InvalidGitRepositoryError, Repo +from pip._internal.operations import freeze logger = logging.getLogger(__name__) diff --git a/hooks/pre-commit b/hooks/pre-commit index 46304e4..11a19a5 100755 --- a/hooks/pre-commit +++ b/hooks/pre-commit @@ -1,11 +1,12 @@ -#!/bin/sh +#!/bin/sh # exit at the first error set -e -# flake8 linting +# linting flake8 --ignore D,W503 --max-line-length=100 . # Check everything but docstrings flake8 --select D --ignore D104,D100,D401 --docstring-convention google --exclude tests/ # Check only the docstrings +isort --check . # Check imports # Raise error if any staged notebooks contain outputs GITDIR=$(git rev-parse --show-toplevel) # Full path to git working directory @@ -17,7 +18,7 @@ if [ "$IPYNB_FILES" != "" ] && [ -z $ALLOW_IPYNB ]; then if [ "$DIFF" != "" ]; then echo " The notebook $GITDIR/$f contains outputs. - Remove them all before committing. + Remove them all before committing. ***Hint*** use the command: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --ClearMetadataPreprocessor.enabled=True --to notebook --inplace $GITDIR/$f diff --git a/setup.py b/setup.py index 56d018a..1156cb9 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ -from setuptools import setup, find_packages - +from setuptools import find_packages, setup setup( name='amlrt_project', From fb0768b6929b3ab572f9e80d122dd2b181a75b22 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Fri, 20 Jan 2023 14:17:44 -0500 Subject: [PATCH 4/8] bump jinja to latest version (#98) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1156cb9..ed08eef 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'flake8-docstrings==1.6.0', 'gitpython==3.1.27', 'jupyter==1.0.0', - 'jinja2<3.1.0', + 'jinja2==3.1.2', 'myst-parser==0.18.0', 'orion>=0.2.4.post1', 'pyyaml==6.0', From a94e380969cd268e960a77889713429d5a03b391 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Fri, 20 Jan 2023 14:33:17 -0500 Subject: [PATCH 5/8] Add pytype to CI (#99) --- .github/workflows/ci.yml | 3 +++ amlrt_project/data/data_loader.py | 4 ++-- amlrt_project/data/data_preprocess.py | 4 ++-- setup.py | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f56d10b..f1d3e96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,9 @@ jobs: - name: pytorch-end2end run: | ./tests/end2end_pytorch/run.sh + - name: type checking + run: | + pytype amlrt_project/ - name: doc-creation-test run: | ./tests/test_docs/run.sh diff --git a/amlrt_project/data/data_loader.py b/amlrt_project/data/data_loader.py index 2fb00d1..7c89ef6 100644 --- a/amlrt_project/data/data_loader.py +++ b/amlrt_project/data/data_loader.py @@ -1,6 +1,6 @@ import logging import typing -from typing import Callable +from typing import Callable, Optional import numpy as np import pytorch_lightning as pl @@ -22,7 +22,7 @@ def __init__( self, images: np.ndarray, labels: np.ndarray, - transform: Callable[[torch.tensor], torch.tensor] = None, + transform: Optional[Callable[[torch.tensor], torch.tensor]] = None, ): """Initialize Dataset. diff --git a/amlrt_project/data/data_preprocess.py b/amlrt_project/data/data_preprocess.py index b81bac5..83ee325 100644 --- a/amlrt_project/data/data_preprocess.py +++ b/amlrt_project/data/data_preprocess.py @@ -85,7 +85,7 @@ def extract_labels(fname: typing.AnyStr): return labels @staticmethod - def val_from_train(images: np.array, labels: np.array, val_pct: float): + def val_from_train(images: np.ndarray, labels: np.ndarray, val_pct: float): """Fashion mnist doesn't have a validation set, we create one here.""" assert 0 < val_pct < 1 num_samples = len(images) @@ -101,7 +101,7 @@ def val_from_train(images: np.array, labels: np.array, val_pct: float): return train_images, train_labels, val_images, val_labels @staticmethod - def subsample_dataset(images: np.array, labels: np.array, num_samples: int): + def subsample_dataset(images: np.ndarray, labels: np.ndarray, num_samples: int): """Extract a subset of the dataset to speed up training.""" return images[:num_samples], labels[:num_samples] diff --git a/setup.py b/setup.py index ed08eef..843c9ea 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ 'pytest==7.1.2', 'pytest-cov==3.0.0', 'pytorch_lightning==1.8.3', + 'pytype==2023.1.17', 'sphinx==5.1.1', 'sphinx-autoapi==1.9.0', 'sphinx-rtd-theme==1.0.0', From 1a8a66c40ed1960da9db8e764a0300fcd5b9af14 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Mon, 23 Jan 2023 13:06:01 -0500 Subject: [PATCH 6/8] fix model loading (#102) --- amlrt_project/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amlrt_project/evaluate.py b/amlrt_project/evaluate.py index b235e7d..d717d8a 100644 --- a/amlrt_project/evaluate.py +++ b/amlrt_project/evaluate.py @@ -85,7 +85,7 @@ def evaluate(args, data_dir, hyper_params): datamodule.setup() model = load_model(hyper_params) - model.load_from_checkpoint(args.ckpt_path) + model = model.load_from_checkpoint(args.ckpt_path) val_metrics = trainer.validate(model, datamodule=datamodule) test_metrics = trainer.test(model, datamodule=datamodule) From 1c4c8386235b957ac48ced79ee17f4e89c264e7f Mon Sep 17 00:00:00 2001 From: mirkobronzi Date: Fri, 24 Feb 2023 14:43:28 -0500 Subject: [PATCH 7/8] added info about how to instantiate a new project --- README.md | 81 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index d2c378f..b50a5af 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,69 @@ -# amlrt_project +# AMLRT Cookiecutter - Initialize a new project +First, git clone this project template locally. -Replace this line with a short description about your project! - + git clone https://github.com/mila-iqia/cookiecutter-pyml.git -## Instructions to setup the project +Select a name for the new project; in the following we assume that +the name is `${PROJECT_NAME}`. Change it accordingly to the correct name. -### Install the dependencies: -First, activate a virtual environment (recommended). -Install the package in `editable` mode so you can modify the source directly: +Rename your just-cloned folder to the new project name: - pip install -e . + mv cookiecutter-pyml ${PROJECT_NAME} -To add new dependencies, simply add them to the setup.py. -### Add git: +Now go into the project folder and delete the git history. - git init + cd ${PROJECT_NAME} + rm -fr .git -### Setup pre-commit hooks: -These hooks will: -* validate flake8 before any commit -* check that jupyter notebook outputs have been stripped +This is done so that your new project will start with a clean git history. +Now, initialize the repository with git: - cd .git/hooks/ && ln -s ../../hooks/pre-commit . + git init -### Commit the code +And perform the first commit: git add . git commit -m 'first commit' -### Link github to your local repository Go on github and follow the instructions to create a new project. When done, do not add any file, and follow the instructions to link your local git to the remote project, which should look like this: (PS: these instructions are reported here for your convenience. We suggest to also look at the GitHub project page for more up-to-date info) - git remote add origin git@github.com:{$GITHUB_USERNAME}/amlrt_project.git + git remote add origin git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git git branch -M main git push -u origin main +At this point, the local code is versioned with git and pushed to GitHub. +You will not need to use the instructions in this section anymore, so we +suggest to delete this section entirely. +(by doing so it will be clear that the initialization has been already done, +and all you need from now on is just to git clone from the repository you +just pushed, i.e., `git@github.com:{$GITHUB_USERNAME}/${PROJECT_NAME}.git`). + +# amlrt_project (change this name to the name of your project) + +Replace this line with a short description about your project! + +## Instructions to setup the project + +### Install the dependencies: +First, activate a virtual environment (recommended). +Install the package in `editable` mode so you can modify the source directly: + + pip install -e . + +To add new dependencies, simply add them to the setup.py. + +### Setup pre-commit hooks: +These hooks will: +* validate flake8 before any commit +* check that jupyter notebook outputs have been stripped + + cd .git/hooks/ && ln -s ../../hooks/pre-commit . + ### Setup Continuous Integration Continuous integration will run the following: @@ -48,23 +72,13 @@ Continuous integration will run the following: - `flake8` to check the code syntax. - Checks on documentation presence and format (using `sphinx`). -We support the following Continuous Integration providers. -Check the following instructions for more details. - -#### GitHub Actions +We support the GitHub Actions for running CI. Github actions are already configured in `.github/workflows/tests.yml`. Github actions are already enabled by default when using Github, so, when pushing to github, they will be executed automatically for pull requests to `main` and to `develop`. -#### Azure - -Azure Continuous Integration is already configured in (`.azure_pipeline.yml`). - -To enable it server-side, just in azure and select `.azure_pipeline.yml` as the -configuration one for Continuous Integration. - ## Running the code ### Run the tests @@ -110,7 +124,7 @@ First, bring you project on the cluster (assuming you didn't create your project directly there). To do so, simply login on the cluster and git clone your project: - git clone git@github.com:<$GITHUB_USERNAME>/amlrt_project.git + git clone git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git Then activate your virtual env, and install the dependencies: @@ -143,7 +157,10 @@ To get a sumary for a particular tag, just run: #### GPU profiling on the Mila cluster -It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your experiments are running on. This can be done in the following way: +It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the +time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your +experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your +experiments are running on. This can be done in the following way: Once you have launched your job on the mila cluster, open the log for your current experiment: From 850a8706d5cfda11ec252fe32b933a440aa03b4a Mon Sep 17 00:00:00 2001 From: mirkobronzi Date: Fri, 24 Feb 2023 14:45:55 -0500 Subject: [PATCH 8/8] minor improvements --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b50a5af..e0a0fd3 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,10 @@ We suggest to also look at the GitHub project page for more up-to-date info) At this point, the local code is versioned with git and pushed to GitHub. You will not need to use the instructions in this section anymore, so we -suggest to delete this section entirely. +suggest to delete this section ("AMLRT Cookiecutter - Initialize a new project") entirely. (by doing so it will be clear that the initialization has been already done, and all you need from now on is just to git clone from the repository you -just pushed, i.e., `git@github.com:{$GITHUB_USERNAME}/${PROJECT_NAME}.git`). +just pushed, i.e., `git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git`). # amlrt_project (change this name to the name of your project)