diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec21256..f1d3e96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,11 +4,11 @@ on: # but only for the main branch push: branches: - - master + - main - development pull_request: branches: - - master + - main - development jobs: build: @@ -19,16 +19,23 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9 - - name: install-dependencies + - name: linting checks run: | python -m pip install --upgrade pip + pip install flake8 flake8-docstrings isort + sh hooks/pre-commit + - name: install project dependencies + run: | pip install -e . - - name: print env + - name: unit tests run: | - env - - name: pytorch-test + pytest --cov=amlrt-project + - name: pytorch-end2end run: | ./tests/end2end_pytorch/run.sh + - name: type checking + run: | + pytype amlrt_project/ - name: doc-creation-test run: | ./tests/test_docs/run.sh diff --git a/.gitignore b/.gitignore index a306c16..50e214b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .idea mlruns +examples/data/ +examples/*/output/ +examples/*/lightning_logs/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/{{cookiecutter.project_slug}}/APPROVED_LIBRARIES.md b/APPROVED_LIBRARIES.md similarity index 100% rename from {{cookiecutter.project_slug}}/APPROVED_LIBRARIES.md rename to APPROVED_LIBRARIES.md diff --git a/LICENSE b/LICENSE index b591866..69866e9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 Mila (Quebec AI Institute) +Copyright (c) 2023, amlrt_team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 612fe93..e0a0fd3 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,226 @@ -[![Build Status](https://api.travis-ci.com/mila-iqia/cookiecutter-pyml.svg?branch=master)](https://travis-ci.com/github/mila-iqia/cookiecutter-pyml) +# AMLRT Cookiecutter - Initialize a new project -About ------ +First, git clone this project template locally. -A cookiecutter is a generic project template that will instantiate a new project with sane defaults. This repo contains our custom cookiecutter (`cookiecutter-pyml`) which will generate a new python deep learning package preconfigured with best practices in mind. It currently supports: + git clone https://github.com/mila-iqia/cookiecutter-pyml.git -* Pytorch (PyTorch Lightning) -* Github Actions (CI/CD) -* Sphinx (documentation) -* Tensorboard (experiment tracking) -* Orion (hyperparameter optimization) -* Flake8 (linting) -* Pytest (unit testing) +Select a name for the new project; in the following we assume that +the name is `${PROJECT_NAME}`. Change it accordingly to the correct name. -More information on what a cookiecutter is [here.](https://cookiecutter.readthedocs.io) +Rename your just-cloned folder to the new project name: -Quickstart ----------- + mv cookiecutter-pyml ${PROJECT_NAME} -Install the latest version of cookiecutter: +Now go into the project folder and delete the git history. - pip install -U cookiecutter + cd ${PROJECT_NAME} + rm -fr .git -Generate your project using our template. Make sure to use the command exactly as you see it here. -This will use cookiecutter to instantiate your new project from our template (https://github.com/mila-iqia/cookiecutter-pyml.git). +This is done so that your new project will start with a clean git history. +Now, initialize the repository with git: - cookiecutter https://github.com/mila-iqia/cookiecutter-pyml.git + git init -Follow the CLI instructions, then cd into your newly created project folder: +And perform the first commit: - cd $YOUR_PROJECT_NAME + git add . + git commit -m 'first commit' -Follow the instructions in the README in the newly created repository (`$YOUR_PROJECT_NAME/README.md`) to get started with your new project (in particular, the section "Instructions to setup the project"). +Go on github and follow the instructions to create a new project. +When done, do not add any file, and follow the instructions to +link your local git to the remote project, which should look like this: +(PS: these instructions are reported here for your convenience. +We suggest to also look at the GitHub project page for more up-to-date info) -Enjoy the cookies! + git remote add origin git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git + git branch -M main + git push -u origin main + +At this point, the local code is versioned with git and pushed to GitHub. +You will not need to use the instructions in this section anymore, so we +suggest to delete this section ("AMLRT Cookiecutter - Initialize a new project") entirely. +(by doing so it will be clear that the initialization has been already done, +and all you need from now on is just to git clone from the repository you +just pushed, i.e., `git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git`). + +# amlrt_project (change this name to the name of your project) + +Replace this line with a short description about your project! + +## Instructions to setup the project + +### Install the dependencies: +First, activate a virtual environment (recommended). +Install the package in `editable` mode so you can modify the source directly: + + pip install -e . + +To add new dependencies, simply add them to the setup.py. + +### Setup pre-commit hooks: +These hooks will: +* validate flake8 before any commit +* check that jupyter notebook outputs have been stripped + + cd .git/hooks/ && ln -s ../../hooks/pre-commit . + +### Setup Continuous Integration + +Continuous integration will run the following: +- Unit tests under `tests`. +- End-to-end test under `exmaples/local`. +- `flake8` to check the code syntax. +- Checks on documentation presence and format (using `sphinx`). + +We support the GitHub Actions for running CI. + +Github actions are already configured in `.github/workflows/tests.yml`. +Github actions are already enabled by default when using Github, so, when +pushing to github, they will be executed automatically for pull requests to +`main` and to `develop`. + +## Running the code + +### Run the tests +Just run (from the root folder): + + pytest + +### Run the code/examples. +Note that the code should already compile at this point. + +Running examples can be found under the `examples` folder. + +In particular, you will find examples for: +* local machine (e.g., your laptop). +* a slurm cluster. + +For both these cases, there is the possibility to run with or without Orion. +(Orion is a hyper-parameter search tool - see https://github.com/Epistimio/orion - +that is already configured in this project) + +#### Run locally + +For example, to run on your local machine without Orion: + + cd examples/local + sh run.sh + +This will run a simple MLP on a simple toy task: sum 5 float numbers. +You should see an almost perfect loss of 0 after a few epochs. + +Note you have a new `output` folder which contains models and a summary of results: +* best_model: the best model checkpoint during training +* last_model: the last model checkpoint during training +* lightning_logs: contains the tensorboard logs. + +To view tensorboard logs, simply run: + + tensorboard --logdir output + +#### Run on a remote cluster (with Slurm) + +First, bring you project on the cluster (assuming you didn't create your +project directly there). To do so, simply login on the cluster and git +clone your project: + + git clone git@github.com:${GITHUB_USERNAME}/${PROJECT_NAME}.git + +Then activate your virtual env, and install the dependencies: + + cd amlrt_project + pip install -e . + +To run with Slurm, just: + + cd examples/slurm + sh run.sh + +Check the log to see that you got an almost perfect loss (i.e., 0). + +#### Measure GPU time (and others) on the Mila cluster + +You can track down the GPU time (and other resources) of your jobs by +associating a tag to the job (when using `sbatch`). +To associate a tag to a job, replace `my_tag` with a proper tag, +and uncomment the line (i.e., remove one #) from the line: + + ##SBATCH --wckey=my_tag + +This line is inside the file `examples/slurm_mila/to_submit.sh`. + +To get a sumary for a particular tag, just run: + + sacct --allusers --wckeys=my_tag --format=JobID,JobName,Start,Elapsed -X -P --delimiter=',' + +(again, remember to change `my_tag` into the real tag name) + +#### GPU profiling on the Mila cluster + +It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the +time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your +experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your +experiments are running on. This can be done in the following way: + +Once you have launched your job on the mila cluster, open the log for your current experiment: + +`head logs/amlrt_project__.err` + +You should see printed in the first few lines the hostname of your machine, e.g., + +``` +INFO:amlrt_project.utils.logging_utils:Experiment info: +hostname: leto35 +git code hash: a51bfc5447d188bd6d31fac3afbd5757650ef524 +data folder: ../data +data folder (abs): /network/tmp1/bronzimi/20191105_cookiecutter/amlrt_project/examples/data +``` + +In a separate shell on your local computer, run the following command: + +`ssh -L 19999:.server.mila.quebec:19999 @login.server.mila.quebec -p 2222` + +where `` is your user name on the Mila cluster and `` is the name of the machine your job is currenty running on (`leto35` in our example). You can then navigate your local browser to `http://localhost:19999/` to view the ressources being used on the cluster and monitor your job. You should see something like this: + +![image](https://user-images.githubusercontent.com/18450628/88088807-fe2acd80-cb58-11ea-8ab2-bd090e8a826c.png) +{%- endif %} + +#### Run with Orion on the Slurm cluster + +This example will run orion for 2 trials (see the orion config file). +To do so, go into `examples/slurm_orion`. +Here you can find the orion config file (`orion_config.yaml`), as well as the config +file (`config.yaml`) for your project (that contains the hyper-parameters). + +In general, you will want to run Orion in parallel over N slurm jobs. +To do so, simply run `sh run.sh` N times. + +When Orion has completed the trials, you will find the orion db file. + +You will also find the output of your experiments in `orion_working_dir`, which +will contain a folder for every trial. +Inside these folders, you can find the models (the best one and the last one), the config file with +the hyper-parameters for this trial, and the log file. + +You can check orion status with the following commands: +(to be run from `examples/slurm_orion`) + + export ORION_DB_ADDRESS='orion_db.pkl' + export ORION_DB_TYPE='pickleddb' + orion status + orion info --name my_exp + +### Building docs: + +Documentation is built using sphinx. It will automatically document all functions based on docstrings. +To automatically generate docs for your project, navigate to the `docs` folder and build the documentation: + + cd docs + make html + +To view the docs locally, open `docs/_build/html/index.html` in your browser. + + +## YOUR PROJECT README: + +* __TODO__ diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/__init__.py b/amlrt_project/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/__init__.py rename to amlrt_project/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/__init__.py b/amlrt_project/data/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/__init__.py rename to amlrt_project/data/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py b/amlrt_project/data/data_loader.py similarity index 92% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py rename to amlrt_project/data/data_loader.py index f490921..7c89ef6 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py +++ b/amlrt_project/data/data_loader.py @@ -1,15 +1,15 @@ import logging -from typing import Callable import typing +from typing import Callable, Optional import numpy as np import pytorch_lightning as pl import torch -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader, Dataset from torchvision import transforms -from {{cookiecutter.project_slug}}.data.data_preprocess import FashionMnistParser -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp +from amlrt_project.data.data_preprocess import FashionMnistParser +from amlrt_project.utils.hp_utils import check_and_log_hp logger = logging.getLogger(__name__) # __TODO__ change the dataloader to suit your needs... @@ -22,7 +22,7 @@ def __init__( self, images: np.ndarray, labels: np.ndarray, - transform: Callable[[torch.tensor], torch.tensor] = None, + transform: Optional[Callable[[torch.tensor], torch.tensor]] = None, ): """Initialize Dataset. diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_preprocess.py b/amlrt_project/data/data_preprocess.py similarity index 96% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_preprocess.py rename to amlrt_project/data/data_preprocess.py index 1ed8a24..83ee325 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_preprocess.py +++ b/amlrt_project/data/data_preprocess.py @@ -1,8 +1,8 @@ -import os -import logging -import urllib.request import gzip +import logging +import os import typing +import urllib.request import numpy as np @@ -85,7 +85,7 @@ def extract_labels(fname: typing.AnyStr): return labels @staticmethod - def val_from_train(images: np.array, labels: np.array, val_pct: float): + def val_from_train(images: np.ndarray, labels: np.ndarray, val_pct: float): """Fashion mnist doesn't have a validation set, we create one here.""" assert 0 < val_pct < 1 num_samples = len(images) @@ -101,7 +101,7 @@ def val_from_train(images: np.array, labels: np.array, val_pct: float): return train_images, train_labels, val_images, val_labels @staticmethod - def subsample_dataset(images: np.array, labels: np.array, num_samples: int): + def subsample_dataset(images: np.ndarray, labels: np.ndarray, num_samples: int): """Extract a subset of the dataset to speed up training.""" return images[:num_samples], labels[:num_samples] diff --git a/amlrt_project/evaluate.py b/amlrt_project/evaluate.py new file mode 100644 index 0000000..d717d8a --- /dev/null +++ b/amlrt_project/evaluate.py @@ -0,0 +1,99 @@ +import argparse +import logging +import sys + +import pytorch_lightning as pl +import yaml +from yaml import load + +from amlrt_project.data.data_loader import FashionMnistDM +from amlrt_project.models.model_loader import load_model +from amlrt_project.utils.hp_utils import check_and_log_hp +from amlrt_project.utils.logging_utils import LoggerWriter + +logger = logging.getLogger(__name__) + + +def main(): + """Main entry point of the program. + + Note: + This main.py file is meant to be called using the cli, + see the `examples/local/run.sh` file to see how to use it. + + """ + parser = argparse.ArgumentParser() + # __TODO__ check you need all the following CLI parameters + parser.add_argument('--log', help='log to this file (in addition to stdout/err)') + parser.add_argument('--ckpt-path', help='Path to best model') + parser.add_argument('--config', + help='config file with generic hyper-parameters, such as optimizer, ' + 'batch_size, ... - in yaml format') + parser.add_argument('--data', help='path to data', required=True) + parser.add_argument('--gpus', default=None, + help='list of GPUs to use. If not specified, runs on CPU.' + 'Example of GPU usage: 1 means run on GPU 1, 0 on GPU 0.') + args = parser.parse_args() + + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + data_dir = args.data + + # will log to a file if provided (useful for orion on cluster) + if args.log is not None: + handler = logging.handlers.WatchedFileHandler(args.log) + formatter = logging.Formatter(logging.BASIC_FORMAT) + handler.setFormatter(formatter) + root = logging.getLogger() + root.setLevel(logging.INFO) + root.addHandler(handler) + + # to intercept any print statement: + sys.stdout = LoggerWriter(logger.info) + sys.stderr = LoggerWriter(logger.warning) + + if args.config is not None: + with open(args.config, 'r') as stream: + hyper_params = load(stream, Loader=yaml.FullLoader) + else: + hyper_params = {} + + evaluate(args, data_dir, hyper_params) + + +def evaluate(args, data_dir, hyper_params): + """Performs an evaluation on both the validation and test sets. + + Args: + args (object): arguments passed from the cli + data_dir (str): path to input folder + output_dir (str): path to output folder + hyper_params (dict): hyper parameters from the config file + """ + # __TODO__ change the hparam that are used from the training algorithm + # (and NOT the model - these will be specified in the model itself) + logger.info('List of hyper-parameters:') + check_and_log_hp( + ['architecture', 'batch_size', 'exp_name', 'early_stopping'], + hyper_params) + + trainer = pl.Trainer( + gpus=args.gpus, + ) + + datamodule = FashionMnistDM(data_dir, hyper_params) + datamodule.setup() + + model = load_model(hyper_params) + model = model.load_from_checkpoint(args.ckpt_path) + + val_metrics = trainer.validate(model, datamodule=datamodule) + test_metrics = trainer.test(model, datamodule=datamodule) + + # We can have many val/test sets, so iterate throught their results. + logger.info(f"Validation Metrics: {val_metrics}") + logger.info(f"Test Metrics: {test_metrics}") + + +if __name__ == "__main__": + main() diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/__init__.py b/amlrt_project/models/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/__init__.py rename to amlrt_project/models/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py b/amlrt_project/models/model_loader.py similarity index 91% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py rename to amlrt_project/models/model_loader.py index 1731766..2c55664 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py +++ b/amlrt_project/models/model_loader.py @@ -1,6 +1,6 @@ import logging -from {{cookiecutter.project_slug}}.models.my_model import SimpleMLP +from amlrt_project.models.my_model import SimpleMLP logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py b/amlrt_project/models/my_model.py similarity index 95% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py rename to amlrt_project/models/my_model.py index 450ff56..e44cdfe 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py +++ b/amlrt_project/models/my_model.py @@ -1,12 +1,11 @@ import logging import typing -from torch import nn import pytorch_lightning as pl +from torch import nn -from {{cookiecutter.project_slug}}.models.optim import load_loss, load_optimizer - -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp +from amlrt_project.models.optim import load_loss, load_optimizer +from amlrt_project.utils.hp_utils import check_and_log_hp logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py b/amlrt_project/models/optim.py similarity index 99% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py rename to amlrt_project/models/optim.py index 3b5f709..a124684 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py +++ b/amlrt_project/models/optim.py @@ -3,7 +3,6 @@ import torch from torch import optim - logger = logging.getLogger(__name__) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/amlrt_project/train.py old mode 100755 new mode 100644 similarity index 52% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py rename to amlrt_project/train.py index 385a70c..b9e129e --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py +++ b/amlrt_project/train.py @@ -1,24 +1,29 @@ -#!/usr/bin/env python - import argparse +import glob import logging import os import shutil import sys +import orion +import pytorch_lightning as pl import yaml +from orion.client import report_results +from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from yaml import load -from {{cookiecutter.project_slug}}.data.data_loader import FashionMnistDM -from {{cookiecutter.project_slug}}.train import train -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp -from {{cookiecutter.project_slug}}.models.model_loader import load_model -from {{cookiecutter.project_slug}}.utils.file_utils import rsync_folder -from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details -from {{cookiecutter.project_slug}}.utils.reproducibility_utils import set_seed +from amlrt_project.data.data_loader import FashionMnistDM +from amlrt_project.models.model_loader import load_model +from amlrt_project.utils.file_utils import rsync_folder +from amlrt_project.utils.hp_utils import check_and_log_hp +from amlrt_project.utils.logging_utils import LoggerWriter, log_exp_details +from amlrt_project.utils.reproducibility_utils import set_seed logger = logging.getLogger(__name__) +BEST_MODEL_NAME = 'best_model' +LAST_MODEL_NAME = 'last_model' + def main(): """Main entry point of the program. @@ -126,5 +131,103 @@ def run(args, data_dir, output_dir, hyper_params): use_progress_bar=not args.disable_progressbar, gpus=args.gpus) +def train(**kwargs): # pragma: no cover + """Training loop wrapper. Used to catch exception if Orion is being used.""" + try: + best_dev_metric = train_impl(**kwargs) + except RuntimeError as err: + if orion.client.cli.IS_ORION_ON and 'CUDA out of memory' in str(err): + logger.error(err) + logger.error('model was out of memory - assigning a bad score to tell Orion to avoid' + 'too big model') + best_dev_metric = -999 + else: + raise err + + report_results([dict( + name='dev_metric', + type='objective', + # note the minus - cause orion is always trying to minimize (cit. from the guide) + value=-float(best_dev_metric))]) + + +def train_impl(model, datamodule, output, hyper_params, use_progress_bar, gpus): # pragma: no cover + """Main training loop implementation. + + Args: + model (obj): The neural network model object. + datamodule (obj): lightning data module that will instantiate data loaders. + output (str): Output directory. + hyper_params (dict): Dict containing hyper-parameters. + use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging). + gpus: number of GPUs to use. + """ + check_and_log_hp(['max_epoch'], hyper_params) + + best_model_path = os.path.join(output, BEST_MODEL_NAME) + best_checkpoint_callback = ModelCheckpoint( + dirpath=best_model_path, + filename='model', + save_top_k=1, + verbose=use_progress_bar, + monitor="val_loss", + mode="max", + every_n_epochs=1, + ) + + last_model_path = os.path.join(output, LAST_MODEL_NAME) + last_checkpoint_callback = ModelCheckpoint( + dirpath=last_model_path, + filename='model', + verbose=use_progress_bar, + every_n_epochs=1, + ) + + resume_from_checkpoint = handle_previous_models(output, last_model_path, best_model_path) + + early_stopping_params = hyper_params['early_stopping'] + check_and_log_hp(['metric', 'mode', 'patience'], hyper_params['early_stopping']) + early_stopping = EarlyStopping( + early_stopping_params['metric'], + mode=early_stopping_params['mode'], + patience=early_stopping_params['patience'], + verbose=use_progress_bar) + + logger = pl.loggers.TensorBoardLogger( + save_dir=output, + default_hp_metric=False, + version=0, # Necessary to resume tensorboard logging + ) + + trainer = pl.Trainer( + callbacks=[early_stopping, best_checkpoint_callback, last_checkpoint_callback], + max_epochs=hyper_params['max_epoch'], + resume_from_checkpoint=resume_from_checkpoint, + gpus=gpus, + logger=logger, + ) + + trainer.fit(model, datamodule=datamodule) + + # Log the best result and associated hyper parameters + best_dev_result = float(early_stopping.best_score.cpu().numpy()) + logger.log_hyperparams(hyper_params, metrics={'best_dev_metric': best_dev_result}) + + return best_dev_result + + +def handle_previous_models(output, last_model_path, best_model_path): + """Moves the previous models in a new timestamp folder.""" + last_models = glob.glob(last_model_path + os.sep + '*') + + if len(last_models) >= 1: + resume_from_checkpoint = sorted(last_models)[-1] + logger.info(f'models found - resuming from {resume_from_checkpoint}') + else: + logger.info('no model found - starting training from scratch') + resume_from_checkpoint = None + return resume_from_checkpoint + + if __name__ == '__main__': main() diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/__init__.py b/amlrt_project/utils/__init__.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/__init__.py rename to amlrt_project/utils/__init__.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/file_utils.py b/amlrt_project/utils/file_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/file_utils.py rename to amlrt_project/utils/file_utils.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py b/amlrt_project/utils/hp_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py rename to amlrt_project/utils/hp_utils.py diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/amlrt_project/utils/logging_utils.py similarity index 98% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py rename to amlrt_project/utils/logging_utils.py index 3425bcf..8b4367b 100644 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py +++ b/amlrt_project/utils/logging_utils.py @@ -2,8 +2,8 @@ import os import socket -from pip._internal.operations import freeze from git import InvalidGitRepositoryError, Repo +from pip._internal.operations import freeze logger = logging.getLogger(__name__) @@ -22,6 +22,7 @@ def __init__(self, printer): printer: (fn) function used to print message (e.g., logger.info). """ self.printer = printer + self.encoding = None def write(self, message): """write. diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/reproducibility_utils.py b/amlrt_project/utils/reproducibility_utils.py similarity index 100% rename from {{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/reproducibility_utils.py rename to amlrt_project/utils/reproducibility_utils.py diff --git a/cookiecutter.json b/cookiecutter.json deleted file mode 100644 index 4ebf2a6..0000000 --- a/cookiecutter.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "full_name": "John Doe", - "email": "john.doe@example.com", - "github_username": "johndoe", - "project_name": "Wonderful Project", - "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}", - "project_short_description": "{{ cookiecutter.project_name }} is wonderful!", - "python_version": "3.9", - "environment": ["mila", "generic"], - "pypi_username": "{{ cookiecutter.github_username }}", - "version": "0.0.1", - "open_source_license": ["MIT license", "BSD license", "ISC license", "Apache Software License 2.0", "GNU General Public License v3", "Not open source"] -} diff --git a/{{cookiecutter.project_slug}}/docs/Makefile b/docs/Makefile similarity index 100% rename from {{cookiecutter.project_slug}}/docs/Makefile rename to docs/Makefile diff --git a/{{cookiecutter.project_slug}}/docs/conf.py b/docs/conf.py similarity index 93% rename from {{cookiecutter.project_slug}}/docs/conf.py rename to docs/conf.py index 6916349..a3c29ab 100644 --- a/{{cookiecutter.project_slug}}/docs/conf.py +++ b/docs/conf.py @@ -16,9 +16,9 @@ # -- Project information ----------------------------------------------------- -project = '{{cookiecutter.project_slug}}' -copyright = '2020, {{cookiecutter.full_name}}' -author = '{{cookiecutter.full_name}}' +project = 'amlrt_project' +copyright = '2023, amlrt_team' +author = 'amlrt_team' # -- General configuration --------------------------------------------------- @@ -39,7 +39,7 @@ # autoapi extension for doc strings extensions.append('autoapi.extension') autoapi_type = 'python' -autoapi_dirs = ['../{{cookiecutter.project_slug}}/'] +autoapi_dirs = ['../amlrt_project/'] # Skip docstrings for loggers and tests diff --git a/{{cookiecutter.project_slug}}/docs/index.rst b/docs/index.rst similarity index 83% rename from {{cookiecutter.project_slug}}/docs/index.rst rename to docs/index.rst index ca81249..950fc40 100644 --- a/{{cookiecutter.project_slug}}/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,9 @@ -.. {{cookiecutter.project_slug}} documentation master file, created by +.. amlrt_project documentation master file, created by sphinx-quickstart on Fri Jul 3 10:11:19 2020. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to {{cookiecutter.project_slug}}'s documentation! +Welcome to amlrt_project's documentation! ============================================= About the project diff --git a/{{cookiecutter.project_slug}}/docs/make.bat b/docs/make.bat similarity index 100% rename from {{cookiecutter.project_slug}}/docs/make.bat rename to docs/make.bat diff --git a/{{cookiecutter.project_slug}}/docs/usage/guide.rst b/docs/usage/guide.rst similarity index 85% rename from {{cookiecutter.project_slug}}/docs/usage/guide.rst rename to docs/usage/guide.rst index 9e5b467..0db3d3d 100644 --- a/{{cookiecutter.project_slug}}/docs/usage/guide.rst +++ b/docs/usage/guide.rst @@ -13,4 +13,4 @@ Everybody loves Schrodinger's equation, why not put it everywhere? You can also add math or even link directly in your docstrings! For an example, click at the docstrings here: -:py:meth:`{{cookiecutter.project_slug}}.models.optim.load_loss` +:py:meth:`amlrt_project.models.optim.load_loss` diff --git a/{{cookiecutter.project_slug}}/docs/usage/quickstart.md b/docs/usage/quickstart.md similarity index 100% rename from {{cookiecutter.project_slug}}/docs/usage/quickstart.md rename to docs/usage/quickstart.md diff --git a/{{cookiecutter.project_slug}}/examples/local/config.yaml b/examples/local/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local/config.yaml rename to examples/local/config.yaml diff --git a/examples/local/run.sh b/examples/local/run.sh new file mode 100644 index 0000000..feb7704 --- /dev/null +++ b/examples/local/run.sh @@ -0,0 +1,2 @@ +amlrt-train --data ../data --output output --config config.yaml --start-from-scratch +amlrt-eval --data ../data --config config.yaml --ckpt-path output/best_model/model.ckpt diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/config.yaml b/examples/local_orion/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local_orion/config.yaml rename to examples/local_orion/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/orion_config.yaml b/examples/local_orion/orion_config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/local_orion/orion_config.yaml rename to examples/local_orion/orion_config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/local_orion/run.sh b/examples/local_orion/run.sh similarity index 71% rename from {{cookiecutter.project_slug}}/examples/local_orion/run.sh rename to examples/local_orion/run.sh index e472e31..314f688 100644 --- a/{{cookiecutter.project_slug}}/examples/local_orion/run.sh +++ b/examples/local_orion/run.sh @@ -2,7 +2,7 @@ export MLFLOW_TRACKING_URI='mlruns' export ORION_DB_ADDRESS='orion_db.pkl' export ORION_DB_TYPE='pickleddb' -orion -v hunt --config orion_config.yaml ../../{{cookiecutter.project_slug}}/main.py --data ../data \ +orion -v hunt --config orion_config.yaml amlrt-train --data ../data \ --config config.yaml --disable-progressbar \ --output '{exp.working_dir}/{trial.id}/' \ --log '{exp.working_dir}/{trial.id}/exp.log' diff --git a/{{cookiecutter.project_slug}}/examples/slurm/config.yaml b/examples/slurm/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm/config.yaml rename to examples/slurm/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm/run.sh b/examples/slurm/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm/run.sh rename to examples/slurm/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh b/examples/slurm/to_submit.sh similarity index 82% rename from {{cookiecutter.project_slug}}/examples/slurm/to_submit.sh rename to examples/slurm/to_submit.sh index c87cc12..55aeea3 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm/to_submit.sh +++ b/examples/slurm/to_submit.sh @@ -15,7 +15,7 @@ #SBATCH --gres=gpu:1 #SBATCH --mem=5G #SBATCH --time=0:05:00 -#SBATCH --job-name={{ cookiecutter.project_slug }} +#SBATCH --job-name=amlrt_project #SBATCH --output=logs/%x__%j.out #SBATCH --error=logs/%x__%j.err # to attach a tag to your run (e.g., used to track the GPU time) @@ -23,8 +23,8 @@ ##SBATCH --wckey=my_tag # remove one # if you prefer receiving emails ##SBATCH --mail-type=all -##SBATCH --mail-user={{ cookiecutter.email }} +##SBATCH --mail-user=amlrt_email@mila.quebec export MLFLOW_TRACKING_URI='mlruns' -main --data ../data --output output --config config.yaml --tmp-folder ${SLURM_TMPDIR} --disable-progressbar +amlrt-train --data ../data --output output --config config.yaml --tmp-folder ${SLURM_TMPDIR} --disable-progressbar diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/config.yaml b/examples/slurm_orion/config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/config.yaml rename to examples/slurm_orion/config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/orion_config.yaml b/examples/slurm_orion/orion_config.yaml similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/orion_config.yaml rename to examples/slurm_orion/orion_config.yaml diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/run.sh b/examples/slurm_orion/run.sh similarity index 100% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/run.sh rename to examples/slurm_orion/run.sh diff --git a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh b/examples/slurm_orion/to_submit.sh similarity index 86% rename from {{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh rename to examples/slurm_orion/to_submit.sh index dfa90e5..baa1031 100644 --- a/{{cookiecutter.project_slug}}/examples/slurm_orion/to_submit.sh +++ b/examples/slurm_orion/to_submit.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH --job-name={{ cookiecutter.project_slug }} +#SBATCH --job-name=amlrt_project {%- if cookiecutter.environment == 'mila' %} ## this is for the mila cluster (uncomment it if you need it): ##SBATCH --account=rrg-bengioy-ad @@ -20,14 +20,14 @@ #SBATCH --error=logs/%x__%j.err # remove one # if you prefer receiving emails ##SBATCH --mail-type=all -##SBATCH --mail-user={{ cookiecutter.email }} +##SBATCH --mail-user=amlrt_email@mila.quebec export MLFLOW_TRACKING_URI='mlruns' export ORION_DB_ADDRESS='orion_db.pkl' export ORION_DB_TYPE='pickleddb' orion -v hunt --config orion_config.yaml \ - main --data ../data --config config.yaml --disable-progressbar \ + amlrt-train --data ../data --config config.yaml --disable-progressbar \ --output '{exp.working_dir}/{trial.id}/' \ --log '{exp.working_dir}/{trial.id}/exp.log' \ --tmp-folder ${SLURM_TMPDIR}/{trial.id} diff --git a/{{cookiecutter.project_slug}}/hooks/pre-commit b/hooks/pre-commit similarity index 92% rename from {{cookiecutter.project_slug}}/hooks/pre-commit rename to hooks/pre-commit index 46304e4..11a19a5 100755 --- a/{{cookiecutter.project_slug}}/hooks/pre-commit +++ b/hooks/pre-commit @@ -1,11 +1,12 @@ -#!/bin/sh +#!/bin/sh # exit at the first error set -e -# flake8 linting +# linting flake8 --ignore D,W503 --max-line-length=100 . # Check everything but docstrings flake8 --select D --ignore D104,D100,D401 --docstring-convention google --exclude tests/ # Check only the docstrings +isort --check . # Check imports # Raise error if any staged notebooks contain outputs GITDIR=$(git rev-parse --show-toplevel) # Full path to git working directory @@ -17,7 +18,7 @@ if [ "$IPYNB_FILES" != "" ] && [ -z $ALLOW_IPYNB ]; then if [ "$DIFF" != "" ]; then echo " The notebook $GITDIR/$f contains outputs. - Remove them all before committing. + Remove them all before committing. ***Hint*** use the command: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --ClearMetadataPreprocessor.enabled=True --to notebook --inplace $GITDIR/$f diff --git a/setup.py b/setup.py index b8714d6..843c9ea 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,38 @@ -from setuptools import setup, find_packages - +from setuptools import find_packages, setup setup( - name='cookiecutter-pyml', - version='0.1', - author='Mirko Bronzi', - author_email='m.bronzi@gmail.com', - python_requires='>=3.7', - install_requires=['cookiecutter>=1.4.0'] + name='amlrt_project', + version='0.0.1', + packages=find_packages(include=['amlrt_project', 'amlrt_project.*']), + python_requires='>=3.9', + install_requires=[ + 'flake8==4.0.1', + 'flake8-docstrings==1.6.0', + 'gitpython==3.1.27', + 'jupyter==1.0.0', + 'jinja2==3.1.2', + 'myst-parser==0.18.0', + 'orion>=0.2.4.post1', + 'pyyaml==6.0', + 'pytest==7.1.2', + 'pytest-cov==3.0.0', + 'pytorch_lightning==1.8.3', + 'pytype==2023.1.17', + 'sphinx==5.1.1', + 'sphinx-autoapi==1.9.0', + 'sphinx-rtd-theme==1.0.0', + 'sphinxcontrib-napoleon==0.7', + 'sphinxcontrib-katex==0.8.6', + 'tensorboard==2.9.1', + 'tqdm==4.64.0', + 'torch==1.12.0', + 'torchvision==0.13.0' + ], + entry_points={ + 'console_scripts': [ + # TODO: change amlrt- prefix, placeholder for now. + 'amlrt-train=amlrt_project.train:main', + 'amlrt-eval=amlrt_project.evaluate:main', + ], + } ) diff --git a/tests/end2end_pytorch/run.sh b/tests/end2end_pytorch/run.sh index 4ce5120..e9c3be5 100755 --- a/tests/end2end_pytorch/run.sh +++ b/tests/end2end_pytorch/run.sh @@ -1,32 +1,8 @@ # exit at the first error set -e -rm -rf wonderful_project # if it already exists -# go to the test folder -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd ${DIR} -cookiecutter ../.. --no-input --output-dir=./ -cd wonderful_project -git init -# setting some fake name/email for git: -git config user.email "you@example.com" -git config user.name "Your Name" -git add -A -git commit -m "initial commit" -pip install -e . --quiet -pip install flake8 pytest --quiet - -# print all dependencies -pip freeze - -# run flake8 test first -sh hooks/pre-commit - -# run tests -pytest --cov=wonderful_project - -# run the example -cd examples/local +# go to the examples folder and run the example +cd $GITHUB_WORKSPACE/examples/local sh run.sh mv output outout_OLD # re-run the example to check reproducibility @@ -41,8 +17,7 @@ else fi # run Orion -cd ../.. -cd examples/local_orion +cd $GITHUB_WORKSPACE/examples/local_orion sh run.sh mv orion_working_dir orion_working_dir_OLD # re-run the example to check reproducibility diff --git a/tests/test_docs/run.sh b/tests/test_docs/run.sh index 77f4bd9..6cc90a7 100755 --- a/tests/test_docs/run.sh +++ b/tests/test_docs/run.sh @@ -1,16 +1,6 @@ # exit at the first error set -e -# go to the test folder -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd ${DIR} - -cookiecutter ../.. --no-input --output-dir=./ -cd wonderful_project -pip install -e . --quiet - -# necessary cause tf dependencies are sometimes not updated -pip install -U setuptools numpy six --quiet # Build the docs -cd docs +cd $GITHUB_WORKSPACE/docs/ sphinx-build -b html -d _build/doctrees . _build/html diff --git a/{{cookiecutter.project_slug}}/tests/test_hp_utils.py b/tests/test_hp_utils.py similarity index 89% rename from {{cookiecutter.project_slug}}/tests/test_hp_utils.py rename to tests/test_hp_utils.py index 2b17680..9e16e96 100644 --- a/{{cookiecutter.project_slug}}/tests/test_hp_utils.py +++ b/tests/test_hp_utils.py @@ -1,6 +1,6 @@ import pytest -from {{cookiecutter.project_slug}}.utils.hp_utils import check_hp +from amlrt_project.utils.hp_utils import check_hp def test_check_hp__all_params_are_there(): diff --git a/{{cookiecutter.project_slug}}/.azure_pipeline.yml b/{{cookiecutter.project_slug}}/.azure_pipeline.yml deleted file mode 100644 index f1191b1..0000000 --- a/{{cookiecutter.project_slug}}/.azure_pipeline.yml +++ /dev/null @@ -1,27 +0,0 @@ -jobs: -- job: - pool: - vmImage: 'ubuntu-16.04' - strategy: - matrix: - Python: - python.version: '{{ cookiecutter.python_version }}' - - steps: - - task: UsePythonVersion@0 - displayName: 'Use Python $(python.version)' - inputs: - versionSpec: '$(python.version)' - - - script: pip install -e . - displayName: 'Install dependencies' - - - script: sh config/hooks/pre-commit - displayName: 'Running commit hook' - - - script: pytest --cov={{cookiecutter.project_slug}} - displayName: 'Run pytest and display test coverage' - - - script: sh run.sh - workingDirectory: examples/local - displayName: 'Run single toy experiment' diff --git a/{{cookiecutter.project_slug}}/.github/workflows/tests.yml b/{{cookiecutter.project_slug}}/.github/workflows/tests.yml deleted file mode 100644 index bf22c39..0000000 --- a/{{cookiecutter.project_slug}}/.github/workflows/tests.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: unit-tests -on: - # Trigger the workflow on push or pull request, - # but only for the main/develop branch - push: - branches: - - main - - develop - pull_request: - branches: - - main - - develop -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: python-{{ cookiecutter.python_version }} - uses: actions/setup-python@v2 - with: - python-version: {{ cookiecutter.python_version }} - - name: upgrade-pip - run: | - python -m pip install --upgrade pip - - name: flake8 - run: | - pip install flake8==4.0.1 flake8-docstrings==1.6.0 - hooks/pre-commit - - name: install-dependencies - run: | - pip install pytest - pip install -e . - - name: print env - run: | - env - - name: pytest_and_coverage - run: | - pytest --cov={{cookiecutter.project_slug}} - - name: end2end-toyexp - run: | - cd examples/local - sh run.sh diff --git a/{{cookiecutter.project_slug}}/.gitignore b/{{cookiecutter.project_slug}}/.gitignore deleted file mode 100644 index a06f135..0000000 --- a/{{cookiecutter.project_slug}}/.gitignore +++ /dev/null @@ -1,110 +0,0 @@ -.idea -mlruns - -# Ignore example outputs -examples/*/output/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ diff --git a/{{cookiecutter.project_slug}}/LICENSE b/{{cookiecutter.project_slug}}/LICENSE deleted file mode 100644 index 24b2600..0000000 --- a/{{cookiecutter.project_slug}}/LICENSE +++ /dev/null @@ -1,111 +0,0 @@ -{% if cookiecutter.open_source_license == 'MIT license' -%} -MIT License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -{% elif cookiecutter.open_source_license == 'BSD license' %} - -BSD License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. -{% elif cookiecutter.open_source_license == 'ISC license' -%} -ISC License - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -{% elif cookiecutter.open_source_license == 'Apache Software License 2.0' -%} -Apache Software License 2.0 - -Copyright (c) {% now 'local', '%Y' %}, {{ cookiecutter.full_name }} - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% elif cookiecutter.open_source_license == 'GNU General Public License v3' -%} -GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - {{ cookiecutter.project_short_description }} - Copyright (C) {% now 'local', '%Y' %} {{ cookiecutter.full_name }} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. -{% endif %} diff --git a/{{cookiecutter.project_slug}}/README.md b/{{cookiecutter.project_slug}}/README.md deleted file mode 100644 index 798f39e..0000000 --- a/{{cookiecutter.project_slug}}/README.md +++ /dev/null @@ -1,215 +0,0 @@ -{% set is_open_source = cookiecutter.open_source_license != 'Not open source' -%} - -# {{ cookiecutter.project_name }} - - -{{ cookiecutter.project_short_description }} - -{% if is_open_source %} -* Free software: {{ cookiecutter.open_source_license }} -{% endif %} - - -## Instructions to setup the project - -### Install the dependencies: -(remember to activate the virtual env if you want to use one) -Add new dependencies (if needed) to setup.py. - - pip install -e . - -### Add git: - - git init - -### Setup pre-commit hooks: -These hooks will: -* validate flake8 before any commit -* check that jupyter notebook outputs have been stripped - - cd .git/hooks/ && ln -s ../../hooks/pre-commit . && cd - - -### Commit the code - - git add . - git commit -m 'first commit' - -### Link github to your local repository -Go on github and follow the instructions to create a new project. -When done, do not add any file, and follow the instructions to -link your local git to the remote project, which should look like this: -(PS: these instructions are reported here for your convenience. -We suggest to also look at the GitHub project page for more up-to-date info) - - git remote add origin git@github.com:{{ cookiecutter.github_username }}/{{ cookiecutter.project_slug }}.git - git branch -M main - git push -u origin main - -### Setup Continuous Integration - -Continuous integration will run the following: -- Unit tests under `tests`. -- End-to-end test under `exmaples/local`. -- `flake8` to check the code syntax. -- Checks on documentation presence and format (using `sphinx`). - -We support the following Continuous Integration providers. -Check the following instructions for more details. - -#### GitHub Actions - -Github actions are already configured in `.github/workflows/tests.yml`. -Github actions are already enabled by default when using Github, so, when -pushing to github, they will be executed automatically for pull requests to -`main` and to `develop`. - -#### Azure - -Azure Continuous Integration is already configured in (`.azure_pipeline.yml`). - -To enable it server-side, just in azure and select `.azure_pipeline.yml` as the -configuration one for Continuous Integration. - -## Running the code - -### Run the tests -Just run (from the root folder): - - pytest - -### Run the code/examples. -Note that the code should already compile at this point. - -Running examples can be found under the `examples` folder. - -In particular, you will find examples for: -* local machine (e.g., your laptop). -* a slurm cluster. - -For both these cases, there is the possibility to run with or without Orion. -(Orion is a hyper-parameter search tool - see https://github.com/Epistimio/orion - -that is already configured in this project) - -#### Run locally - -For example, to run on your local machine without Orion: - - cd examples/local - sh run.sh - -This will run a simple MLP on a simple toy task: sum 5 float numbers. -You should see an almost perfect loss of 0 after a few epochs. - -Note you have a new `output` folder which contains models and a summary of results: -* best_model: the best model checkpoint during training -* last_model: the last model checkpoint during training -* lightning_logs: contains the tensorboard logs. - -To view tensorboard logs, simply run: - - tensorboard --logdir output - -#### Run on a remote cluster (with Slurm) - -First, bring you project on the cluster (assuming you didn't create your -project directly there). To do so, simply login on the cluster and git -clone your project: - - git clone git@github.com:{{ cookiecutter.github_username }}/{{ cookiecutter.project_slug }}.git - -Then activate your virtual env, and install the dependencies: - - cd {{ cookiecutter.project_slug }} - pip install -e . - -To run with Slurm, just: - - cd examples/slurm - sh run.sh - -Check the log to see that you got an almost perfect loss (i.e., 0). -{%- if cookiecutter.environment == 'mila' %} - -#### Measure GPU time (and others) on the Mila cluster - -You can track down the GPU time (and other resources) of your jobs by -associating a tag to the job (when using `sbatch`). -To associate a tag to a job, replace `my_tag` with a proper tag, -and uncomment the line (i.e., remove one #) from the line: - - ##SBATCH --wckey=my_tag - -This line is inside the file `examples/slurm_mila/to_submit.sh`. - -To get a sumary for a particular tag, just run: - - sacct --allusers --wckeys=my_tag --format=JobID,JobName,Start,Elapsed -X -P --delimiter=',' - -(again, remember to change `my_tag` into the real tag name) - -#### GPU profiling on the Mila cluster - -It can be useful to monitor and profile how you utilise your GPU (usage, memory, etc.). For the time being, you can only monitor your profiling in real-time from the Mila cluster, i.e. while your experiments are running. To monitor your GPU, you need to setup port-forwarding on the host your experiments are running on. This can be done in the following way: - -Once you have launched your job on the mila cluster, open the log for your current experiment: - -`head logs/{{ cookiecutter.project_slug }}__.err` - -You should see printed in the first few lines the hostname of your machine, e.g., - -``` -INFO:{{ cookiecutter.project_slug }}.utils.logging_utils:Experiment info: -hostname: leto35 -git code hash: a51bfc5447d188bd6d31fac3afbd5757650ef524 -data folder: ../data -data folder (abs): /network/tmp1/bronzimi/20191105_cookiecutter/{{ cookiecutter.project_slug }}/examples/data -``` - -In a separate shell on your local computer, run the following command: - -`ssh -L 19999:.server.mila.quebec:19999 @login.server.mila.quebec -p 2222` - -where `` is your user name on the Mila cluster and `` is the name of the machine your job is currenty running on (`leto35` in our example). You can then navigate your local browser to `http://localhost:19999/` to view the ressources being used on the cluster and monitor your job. You should see something like this: - -![image](https://user-images.githubusercontent.com/18450628/88088807-fe2acd80-cb58-11ea-8ab2-bd090e8a826c.png) -{%- endif %} - -#### Run with Orion on the Slurm cluster - -This example will run orion for 2 trials (see the orion config file). -To do so, go into `examples/slurm_orion`. -Here you can find the orion config file (`orion_config.yaml`), as well as the config -file (`config.yaml`) for your project (that contains the hyper-parameters). - -In general, you will want to run Orion in parallel over N slurm jobs. -To do so, simply run `sh run.sh` N times. - -When Orion has completed the trials, you will find the orion db file. - -You will also find the output of your experiments in `orion_working_dir`, which -will contain a folder for every trial. -Inside these folders, you can find the models (the best one and the last one), the config file with -the hyper-parameters for this trial, and the log file. - -You can check orion status with the following commands: -(to be run from `examples/slurm_orion`) - - export ORION_DB_ADDRESS='orion_db.pkl' - export ORION_DB_TYPE='pickleddb' - orion status - orion info --name my_exp - -### Building docs: - -Documentation is built using sphinx. It will automatically document all functions based on docstrings. -To automatically generate docs for your project, navigate to the `docs` folder and build the documentation: - - cd docs - make html - -To view the docs locally, open `docs/_build/html/index.html` in your browser. - - -## YOUR PROJECT README: - -* __TODO__ diff --git a/{{cookiecutter.project_slug}}/examples/local/run.sh b/{{cookiecutter.project_slug}}/examples/local/run.sh deleted file mode 100644 index 5d9e811..0000000 --- a/{{cookiecutter.project_slug}}/examples/local/run.sh +++ /dev/null @@ -1 +0,0 @@ -main --data ../data --output output --config config.yaml --start-from-scratch diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py deleted file mode 100644 index 0e4fcd7..0000000 --- a/{{cookiecutter.project_slug}}/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -from setuptools import setup, find_packages - - -setup( - name='{{ cookiecutter.project_slug }}', - version='{{ cookiecutter.version }}', - packages=find_packages(include=['{{ cookiecutter.project_slug }}', '{{ cookiecutter.project_slug }}.*']), - python_requires='>={{ cookiecutter.python_version }}', - install_requires=[ - 'flake8==4.0.1', - 'flake8-docstrings==1.6.0', - 'gitpython==3.1.27', - 'jupyter==1.0.0', - 'jinja2<3.1.0', - 'myst-parser==0.18.0', - 'orion>=0.2.4.post1', - 'pyyaml==6.0', - 'pytest==7.1.2', - 'pytest-cov==3.0.0', - 'pytorch_lightning==1.8.3', - 'sphinx==5.1.1', - 'sphinx-autoapi==1.9.0', - 'sphinx-rtd-theme==1.0.0', - 'sphinxcontrib-napoleon==0.7', - 'sphinxcontrib-katex==0.8.6', - 'tensorboard==2.9.1', - 'tqdm==4.64.0', - 'torch==1.12.0', - 'torchvision==0.13.0' - ], - entry_points={ - 'console_scripts': [ - 'main={{ cookiecutter.project_slug }}.main:main' - ], - } -) diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py deleted file mode 100644 index 7a9d8a6..0000000 --- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py +++ /dev/null @@ -1,113 +0,0 @@ -import glob -import logging -import os - -import orion -import pytorch_lightning as pl -from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping -from orion.client import report_results - -from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp - -logger = logging.getLogger(__name__) - -BEST_MODEL_NAME = 'best_model' -LAST_MODEL_NAME = 'last_model' - - -def train(**kwargs): # pragma: no cover - """Training loop wrapper. Used to catch exception if Orion is being used.""" - try: - best_dev_metric = train_impl(**kwargs) - except RuntimeError as err: - if orion.client.cli.IS_ORION_ON and 'CUDA out of memory' in str(err): - logger.error(err) - logger.error('model was out of memory - assigning a bad score to tell Orion to avoid' - 'too big model') - best_dev_metric = -999 - else: - raise err - - report_results([dict( - name='dev_metric', - type='objective', - # note the minus - cause orion is always trying to minimize (cit. from the guide) - value=-float(best_dev_metric))]) - - -def train_impl(model, datamodule, output, hyper_params, use_progress_bar, gpus): # pragma: no cover - """Main training loop implementation. - - Args: - model (obj): The neural network model object. - datamodule (obj): lightning data module that will instantiate data loaders. - output (str): Output directory. - hyper_params (dict): Dict containing hyper-parameters. - use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging). - gpus: number of GPUs to use. - """ - check_and_log_hp(['max_epoch'], hyper_params) - - best_model_path = os.path.join(output, BEST_MODEL_NAME) - best_checkpoint_callback = ModelCheckpoint( - dirpath=best_model_path, - filename='model', - save_top_k=1, - verbose=use_progress_bar, - monitor="val_loss", - mode="max", - every_n_epochs=1, - ) - - last_model_path = os.path.join(output, LAST_MODEL_NAME) - last_checkpoint_callback = ModelCheckpoint( - dirpath=last_model_path, - filename='model', - verbose=use_progress_bar, - every_n_epochs=1, - ) - - resume_from_checkpoint = handle_previous_models(output, last_model_path, best_model_path) - - early_stopping_params = hyper_params['early_stopping'] - check_and_log_hp(['metric', 'mode', 'patience'], hyper_params['early_stopping']) - early_stopping = EarlyStopping( - early_stopping_params['metric'], - mode=early_stopping_params['mode'], - patience=early_stopping_params['patience'], - verbose=use_progress_bar) - - logger = pl.loggers.TensorBoardLogger( - save_dir=output, - default_hp_metric=False, - version=0, # Necessary to resume tensorboard logging - ) - - trainer = pl.Trainer( - callbacks=[early_stopping, best_checkpoint_callback, last_checkpoint_callback], - max_epochs=hyper_params['max_epoch'], - resume_from_checkpoint=resume_from_checkpoint, - gpus=gpus, - logger=logger, - ) - - trainer.fit(model, datamodule=datamodule) - - # Log the best result and associated hyper parameters - best_dev_result = float(early_stopping.best_score.cpu().numpy()) - logger.log_hyperparams(hyper_params, metrics={'best_dev_metric': best_dev_result}) - - return best_dev_result - - -def handle_previous_models(output, last_model_path, best_model_path): - """Moves the previous models in a new timestamp folder.""" - last_models = glob.glob(last_model_path + os.sep + '*') - - if len(last_models) >= 1: - resume_from_checkpoint = sorted(last_models)[-1] - logger.info(f'models found - resuming from {resume_from_checkpoint}') - else: - logger.info('no model found - starting training from scratch') - resume_from_checkpoint = None - return resume_from_checkpoint