diff --git a/.github/workflows/tests.yml b/.github/workflows/ci.yml
similarity index 89%
rename from .github/workflows/tests.yml
rename to .github/workflows/ci.yml
index 04ce39b..85110e6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/ci.yml
@@ -5,18 +5,20 @@ on:
   push:
     branches:
       - master
+      - develop
   pull_request:
     branches:
       - master
+      - develop
 jobs:
   build:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: python-3.7
+    - name: python-3.8
       uses: actions/setup-python@v2
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: install-dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/README.md b/README.md
index a537539..2674d52 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@ A cookiecutter is a generic project template that will instantiate a new project
 
 * Pytorch/Tensorflow
 * Travis CI
-* Codecov
 * Sphinx (documentation)
 * MLFlow (experiment management)
 * Orion (hyperparameter optimization)
diff --git a/cookiecutter.json b/cookiecutter.json
index 332a1f5..3e6909c 100644
--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -5,7 +5,7 @@
   "project_name": "Wonderful Project",
   "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}",
   "project_short_description": "{{ cookiecutter.project_name }} is wonderful!",
-  "python_version": "3.7",
+  "python_version": "3.8",
   "dl_framework": ["pytorch", "tensorflow_cpu", "tensorflow_gpu"],
   "pypi_username": "{{ cookiecutter.github_username }}",
   "version": "0.0.1",
diff --git a/tests/end2end_pytorch/run.sh b/tests/end2end_pytorch/run.sh
index e8383f7..7b432a7 100755
--- a/tests/end2end_pytorch/run.sh
+++ b/tests/end2end_pytorch/run.sh
@@ -25,7 +25,7 @@ pip freeze
 sh config/hooks/pre-commit
 
 # run tests
-pytest .
+pytest --cov=wonderful_project
 
 # run the example
 cd examples/local
diff --git a/tests/end2end_tensorflow/run.sh b/tests/end2end_tensorflow/run.sh
index ae5fc5e..41c17ab 100755
--- a/tests/end2end_tensorflow/run.sh
+++ b/tests/end2end_tensorflow/run.sh
@@ -14,8 +14,6 @@ git add -A
 git commit -m "initial commit"
 pip install -e . --quiet
 pip install flake8 pytest --quiet
-# necessary cause tf dependencies are sometimes not updated
-pip install -U setuptools numpy six --quiet
 
 # print all dependencies
 pip freeze
@@ -24,7 +22,7 @@ pip freeze
 sh config/hooks/pre-commit
 
 # run tests
-pytest .
+pytest --cov=wonderful_project
 
 # run the examples
 cd examples/local
diff --git a/{{cookiecutter.project_slug}}/.azure_pipeline.yml b/{{cookiecutter.project_slug}}/.azure_pipeline.yml
new file mode 100644
index 0000000..f1191b1
--- /dev/null
+++ b/{{cookiecutter.project_slug}}/.azure_pipeline.yml
@@ -0,0 +1,27 @@
+jobs:
+- job:
+  pool:
+    vmImage: 'ubuntu-16.04'
+  strategy:
+    matrix:
+      Python:
+        python.version: '{{ cookiecutter.python_version }}'
+
+  steps:
+  - task: UsePythonVersion@0
+    displayName: 'Use Python $(python.version)'
+    inputs:
+      versionSpec: '$(python.version)'
+
+  - script: pip install -e .
+    displayName: 'Install dependencies'
+
+  - script: sh config/hooks/pre-commit
+    displayName: 'Running commit hook'
+
+  - script: pytest --cov={{cookiecutter.project_slug}}
+    displayName: 'Run pytest and display test coverage'
+
+  - script: sh run.sh
+    workingDirectory: examples/local
+    displayName: 'Run single toy experiment'
diff --git a/{{cookiecutter.project_slug}}/.github/workflows/tests.yml b/{{cookiecutter.project_slug}}/.github/workflows/tests.yml
new file mode 100644
index 0000000..7f65188
--- /dev/null
+++ b/{{cookiecutter.project_slug}}/.github/workflows/tests.yml
@@ -0,0 +1,39 @@
+name: unit-tests
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main/develop branch
+  push:
+    branches:
+      - master
+      - develop
+  pull_request:
+    branches:
+      - master
+      - develop
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: python-{{ cookiecutter.python_version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: {{ cookiecutter.python_version }}
+    - name: install-dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        pip install -e .
+    - name: flake8
+      run: |
+        config/hooks/pre-commit
+    - name: print env
+      run: |
+        env
+    - name: pytest_and_coverage
+      run: |
+        pytest --cov={{cookiecutter.project_slug}}
+    - name: end2end-toyexp
+      run: |
+        cd examples/local
+        sh run.sh
diff --git a/{{cookiecutter.project_slug}}/.travis.yml b/{{cookiecutter.project_slug}}/.travis.yml
index d9936a2..8f22eb4 100644
--- a/{{cookiecutter.project_slug}}/.travis.yml
+++ b/{{cookiecutter.project_slug}}/.travis.yml
@@ -1,6 +1,6 @@
 language: python
 python:
-  - "3.7"
+  - "{{ cookiecutter.python_version }}"
 cache: pip
 install:
   # Reducing verbosity is needed because of Travis' limit on log length
@@ -9,4 +9,4 @@ script:
   # run flake8 with exactly the same options as in the commit hook:
   - config/hooks/pre-commit
   # run tests and compute the coverage
-  - pytest
+  - pytest --cov={{cookiecutter.project_slug}}
diff --git a/{{cookiecutter.project_slug}}/README.md b/{{cookiecutter.project_slug}}/README.md
index f83a13a..4180275 100644
--- a/{{cookiecutter.project_slug}}/README.md
+++ b/{{cookiecutter.project_slug}}/README.md
@@ -50,15 +50,40 @@ link your local git to the remote project, which should look like this:
     git remote add origin git@github.com:{{ cookiecutter.github_username }}/{{ cookiecutter.project_slug }}.git
     git push -u origin master
 
-### Add Travis
-A travis configuration file (`.travis.yml`) is already in your repository (so, no need to
-create it). This will run `flake8` and run the tests under `tests`.
+### Setup Continuous Integration
+
+Continuous integration will run the following:
+- Unit tests under `tests`.
+- End-to-end test under `exmaples/local`.
+- `flake8` to check the code syntax.
+- Checks on documentation presence and format (using `sphinx`).
+
+We support the following Continuous Integration providers.
+Check the following instructions for more details.
+
+#### GitHub Actions
+
+Github actions are already configured in `.github/workflows/tests.yml`.
+Github actions are already enabled by default when using Github, so, when
+pushing to github, they will be executed automatically for pull requests to
+`master` and to `develop`.
+
+#### Travis
+
+Travis is already configured in (`.travis.yml`).
 
 To enable it server-side, just go to https://travis-ci.com/account/repositories and click
 ` Manage repositories on GitHub`. Give the permission to run on the git repository you just created.
 
 Note, the link for public project may be https://travis-ci.org/account/repositories .
 
+#### Azure
+
+Azure Continuous Integration is already configured in (`.azure_pipeline.yml`).
+
+To enable it server-side, just in azure and select `.azure_pipeline.yml` as the 
+configuration one for Continuous Integration.
+
 ## Running the code
 
 ### Run the tests
diff --git a/{{cookiecutter.project_slug}}/docs/usage/quickstart.md b/{{cookiecutter.project_slug}}/docs/usage/quickstart.md
index 5b5a0b6..1b78411 100644
--- a/{{cookiecutter.project_slug}}/docs/usage/quickstart.md
+++ b/{{cookiecutter.project_slug}}/docs/usage/quickstart.md
@@ -14,12 +14,12 @@ Everybody loves Schrodinger's equation, why not put it everywhere?
 
 You can also add math directly in your docstrings! For an example, click at the docstrings here:
 ```eval_rst
-:py:meth:`{{cookiecutter.project_slug}}.models.model_loader.load_loss`
+:py:meth:`{{cookiecutter.project_slug}}.models.optim.load_loss`
 ```
 
 You can even reference them directly anywhere for convenience, because clicking is for the lazy:
 ```eval_rst
-.. autoclass:: {{cookiecutter.project_slug}}.models.model_loader.load_loss
+.. autoclass:: {{cookiecutter.project_slug}}.models.optim.load_loss
     :show-inheritance:
     :noindex:
 ```
diff --git a/{{cookiecutter.project_slug}}/examples/local/run.sh b/{{cookiecutter.project_slug}}/examples/local/run.sh
index c210988..5d9e811 100644
--- a/{{cookiecutter.project_slug}}/examples/local/run.sh
+++ b/{{cookiecutter.project_slug}}/examples/local/run.sh
@@ -1 +1 @@
-main --data ../data --output output --config config.yaml --disable-progressbar
+main --data ../data --output output --config config.yaml --start-from-scratch
diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py
index f97fe73..8afcb72 100644
--- a/{{cookiecutter.project_slug}}/setup.py
+++ b/{{cookiecutter.project_slug}}/setup.py
@@ -7,15 +7,22 @@
     packages=find_packages(include=['{{ cookiecutter.project_slug }}', '{{ cookiecutter.project_slug }}.*']),
     python_requires='>={{ cookiecutter.python_version }}',
     install_requires=[
+        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+        'numpy==1.19.2',
+        'scipy==1.4.1',
+        'setuptools>=41.0.0',
+        'six>=1.15.0',
+        {%- endif %}
         'flake8',
         'flake8-docstrings',
         'gitpython',
         'tqdm',
         'jupyter',
-        'mlflow==1.10.0',
-        'orion>=0.1.8',
+        'mlflow==1.15.0',
+        'orion>=0.1.14',
         'pyyaml>=5.3',
         'pytest>=4.6',
+        'pytest-cov',
         'sphinx',
         'sphinx-autoapi',
         'sphinx-rtd-theme',
@@ -23,19 +30,13 @@
         'sphinxcontrib-katex',
         'recommonmark',
         {%- if cookiecutter.dl_framework == 'pytorch' %}
-        'torch'],
+        'torch==1.8.1', 'pytorch_lightning==1.2.7'],
         {%- endif %}
         {%- if cookiecutter.dl_framework == 'tensorflow_cpu' %}
-        'tensorflow==2.2.0',
+        'tensorflow==2.4.0'],
         {%- endif %}
         {%- if cookiecutter.dl_framework == 'tensorflow_gpu' %}
-        'tensorflow-gpu==2.2.0'
-        {%- endif %}
-        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        'scipy==1.4.1',
-        'setuptools>=41.0.0',
-        'six>=1.12.0',
-        'numpy>=1.19.4'],
+        'tensorflow-gpu==2.4.0'],
         {%- endif %}
     entry_points={
         'console_scripts': [
diff --git a/{{cookiecutter.project_slug}}/tests/test_train.py b/{{cookiecutter.project_slug}}/tests/test_train.py
deleted file mode 100644
index 4bd035c..0000000
--- a/{{cookiecutter.project_slug}}/tests/test_train.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from {{cookiecutter.project_slug}}.train import write_stats, load_stats
-
-
-def test_write_and_load_stats__simple(tmp_path):
-    best_eval_score = 1.0
-    epoch = 1
-    remaining_patience = 2
-    mlflow_run_id = 'NO_MLFLOW'
-    write_stats(tmp_path, best_eval_score, epoch, remaining_patience)
-    result = load_stats(tmp_path)
-    assert (best_eval_score, epoch, remaining_patience, mlflow_run_id) == result
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py
index 42916c9..3241fd3 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/data/data_loader.py
@@ -1,18 +1,23 @@
 import os
+import typing
 
-import numpy
+import numpy as np
 {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 import tensorflow as tf
 
 BUFFER_SIZE = 100
 {%- endif %}
 {%- if cookiecutter.dl_framework == 'pytorch' %}
+import pytorch_lightning as pl
 from torch.utils.data import Dataset, DataLoader
 {%- endif %}
 # __TODO__ change the dataloader to suit your needs...
 
 
-def get_data(data_folder, prefix):  # pragma: no cover
+def get_data(
+    data_folder: typing.AnyStr,
+    prefix: typing.AnyStr
+) -> typing.Tuple[np.ndarray, np.ndarray]:  # pragma: no cover
     """Function to load data into memory.
 
     Args:
@@ -27,12 +32,12 @@ def get_data(data_folder, prefix):  # pragma: no cover
     with open(os.path.join(data_folder, '{}.input'.format(prefix))) as in_stream:
         for line in in_stream:
             inputs.append([float(x) for x in line.split()])
-    in_data = numpy.array(inputs, dtype=numpy.float32)
+    in_data = np.array(inputs, dtype=np.float32)
     targets = []
     with open(os.path.join(data_folder, '{}.target'.format(prefix))) as in_stream:
         for line in in_stream:
             targets.append(float(line))
-    tar_data = numpy.array(targets, dtype=numpy.float32)
+    tar_data = np.array(targets, dtype=np.float32)
     return in_data, tar_data
 {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 
@@ -65,7 +70,11 @@ def load_data(data_dir, hyper_params):  # pragma: no cover
 class MyDataset(Dataset):  # pragma: no cover
     """Dataset class for iterating over the data."""
 
-    def __init__(self, input_data, target_data):
+    def __init__(
+        self,
+        input_data: np.ndarray,
+        target_data: np.ndarray,
+    ):
         """Initialize MyDataset.
 
         Args:
@@ -79,7 +88,10 @@ def __len__(self):
         """Return the number of data items in MyDataset."""
         return len(self.input_data)
 
-    def __getitem__(self, index):
+    def __getitem__(
+        self,
+        index: int,
+    ):
         """__getitem__.
 
         Args:
@@ -90,6 +102,48 @@ def __getitem__(self, index):
         return input_example, target_example
 
 
+class MyDataModule(pl.LightningDataModule):  # pragma: no cover
+    """Data module class that prepares dataset parsers and instantiates data loaders."""
+
+    def __init__(
+        self,
+        data_dir: typing.AnyStr,
+        hyper_params: typing.Dict[typing.AnyStr, typing.Any],
+    ):
+        """Validates the hyperparameter config dictionary and sets up internal attributes."""
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = hyper_params['batch_size']
+        self.train_data_parser, self.dev_data_parser = None, None
+
+    def prepare_data(self):
+        """Downloads/extracts/unpacks the data if needed (we don't)."""
+        pass
+
+    def setup(self, stage=None):
+        """Parses and splits all samples across the train/valid/test parsers."""
+        # here, we will actually assign train/val datasets for use in dataloaders
+        if stage == 'fit' or stage is None:
+            train_input, train_target = get_data(self.data_dir, 'train')
+            self.train_data_parser = MyDataset(train_input, train_target)
+            dev_input, dev_target = get_data(self.data_dir, 'dev')
+            self.dev_data_parser = MyDataset(dev_input, dev_target)
+        if stage == 'test' or stage is None:
+            raise NotImplementedError  # __TODO__: add code to instantiate the test data parser here
+
+    def train_dataloader(self) -> DataLoader:
+        """Creates the training dataloader using the training data parser."""
+        return DataLoader(self.train_data_parser, batch_size=self.batch_size, shuffle=True)
+
+    def val_dataloader(self):
+        """Creates the validation dataloader using the validation data parser."""
+        return DataLoader(self.dev_data_parser, batch_size=self.batch_size, shuffle=False)
+
+    def test_dataloader(self):
+        """Creates the testing dataloader using the testing data parser."""
+        raise NotImplementedError  # __TODO__: add code to instantiate the test data loader here
+
+
 def load_data(data_dir, hyper_params):  # pragma: no cover
     """Prepare the data into datasets.
 
@@ -97,18 +151,9 @@ def load_data(data_dir, hyper_params):  # pragma: no cover
         data_dir (str): path to the folder containing the data
         hyper_params (dict): hyper parameters from the config file
 
-    Retruns:
-        train_dataset (obj): iterable training dataset object
-        dev_dataset (obj): iterable validation dataset object
-
-
+    Returns:
+        datamodule (obj): the data module used to prepare/instantiate data loaders.
     """
-    # __TODO__ load the data
-    train_input, train_target = get_data(data_dir, 'train')
-    train_data = MyDataset(train_input, train_target)
-    dev_input, dev_target = get_data(data_dir, 'dev')
-    dev_data = MyDataset(dev_input, dev_target)
-    train_loader = DataLoader(train_data, batch_size=hyper_params['batch_size'], shuffle=True)
-    dev_loader = DataLoader(dev_data, batch_size=hyper_params['batch_size'], shuffle=False)
-    return train_loader, dev_loader
+    # __TODO__ if you have different data modules, add whatever code is needed to select them here
+    return MyDataModule(data_dir, hyper_params)
 {%- endif %}
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
index 0beb8f0..9f22023 100755
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
@@ -10,12 +10,24 @@
 import yaml
 from yaml import load
 
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+from pytorch_lightning.loggers import MLFlowLogger
+{%- endif %}
 from {{cookiecutter.project_slug}}.data.data_loader import load_data
-from {{cookiecutter.project_slug}}.train import train, load_stats, STAT_FILE_NAME
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+from {{cookiecutter.project_slug}}.train import train, load_stats
+{%- endif %}
+from {{cookiecutter.project_slug}}.train import STAT_FILE_NAME
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+from {{cookiecutter.project_slug}}.train import load_mlflow
+from {{cookiecutter.project_slug}}.train import train
+{%- endif %}
 from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp
 from {{cookiecutter.project_slug}}.models.model_loader import load_model
-from {{cookiecutter.project_slug}}.models.model_loader import load_optimizer
-from {{cookiecutter.project_slug}}.models.model_loader import load_loss
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+from {{cookiecutter.project_slug}}.models.optim import load_optimizer
+from {{cookiecutter.project_slug}}.models.optim import load_loss
+{%- endif %}
 from {{cookiecutter.project_slug}}.utils.file_utils import rsync_folder
 from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details
 from {{cookiecutter.project_slug}}.utils.reproducibility_utils import set_seed
@@ -47,6 +59,11 @@ def main():
                         help='will disable the progressbar while going over the mini-batch')
     parser.add_argument('--start-from-scratch', action='store_true',
                         help='will not load any existing saved model - even if present')
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+    parser.add_argument('--gpus', default=None,
+                        help='list of GPUs to use. If not specified, runs on CPU.'
+                             'Example of GPU usage: 1 means run on GPU 1, 0 on GPU 0.')
+{%- endif %}
     parser.add_argument('--debug', action='store_true')
     args = parser.parse_args()
 
@@ -86,12 +103,33 @@ def main():
         hyper_params = {}
 
     # to be done as soon as possible otherwise mlflow will not log with the proper exp. name
-
     if orion.client.cli.IS_ORION_ON:
         exp_name = os.getenv('ORION_EXPERIMENT_NAME', 'orion_exp')
+        {%- if cookiecutter.dl_framework == 'pytorch' %}
+        tags = {'mlflow.runName': os.getenv('ORION_TRIAL_ID')}
+        {%- endif %}
     else:
         exp_name = hyper_params.get('exp_name', 'exp')
+        {%- if cookiecutter.dl_framework == 'pytorch' %}
+        tags = {}
+        {%- endif %}
     mlflow.set_experiment(exp_name)
+    {%- if cookiecutter.dl_framework == 'pytorch' %}
+    save_dir = os.getenv('MLFLOW_TRACKING_URI', './mlruns')
+    mlf_logger = MLFlowLogger(
+        experiment_name=exp_name,
+        tags=tags,
+        save_dir=save_dir
+    )
+
+    if os.path.exists(os.path.join(args.output, STAT_FILE_NAME)):
+        mlf_logger._run_id = load_mlflow(args.output)
+
+    mlflow.start_run(run_id=mlf_logger.run_id)
+    run(args, data_dir, output_dir, hyper_params, mlf_logger)
+    mlflow.end_run()
+    {%- endif %}
+    {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 
     if os.path.exists(os.path.join(args.output, STAT_FILE_NAME)):
         _, _, _, mlflow_run_id = load_stats(args.output)
@@ -100,19 +138,30 @@ def main():
         mlflow.start_run()
     run(args, data_dir, output_dir, hyper_params)
     mlflow.end_run()
+    {%- endif %}
 
     if args.tmp_folder is not None:
         rsync_folder(output_dir + os.path.sep, args.output)
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+
+
+def run(args, data_dir, output_dir, hyper_params, mlf_logger):
+{%- endif %}
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 
 
 def run(args, data_dir, output_dir, hyper_params):
+{%- endif %}
     """Setup and run the dataloaders, training loops, etc.
 
     Args:
-        args (list): arguments passed from the cli
+        args (object): arguments passed from the cli
         data_dir (str): path to input folder
         output_dir (str): path to output folder
         hyper_params (dict): hyper parameters from the config file
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+        mlf_logger (obj): MLFlow logger callback.
+{%- endif %}
     """
     # __TODO__ change the hparam that are used from the training algorithm
     # (and NOT the model - these will be specified in the model itself)
@@ -126,14 +175,25 @@ def run(args, data_dir, output_dir, hyper_params):
 
     log_exp_details(os.path.realpath(__file__), args)
 
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+    datamodule = load_data(data_dir, hyper_params)
+    model = load_model(hyper_params)
+
+    train(model=model, datamodule=datamodule, output=output_dir, hyper_params=hyper_params,
+          use_progress_bar=not args.disable_progressbar, start_from_scratch=args.start_from_scratch,
+          mlf_logger=mlf_logger, gpus=args.gpus)
+{%- endif %}
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
     train_loader, dev_loader = load_data(data_dir, hyper_params)
     model = load_model(hyper_params)
     optimizer = load_optimizer(hyper_params, model)
     loss_fun = load_loss(hyper_params)
 
-    train(model, optimizer, loss_fun, train_loader, dev_loader, hyper_params['patience'],
-          output_dir, max_epoch=hyper_params['max_epoch'],
-          use_progress_bar=not args.disable_progressbar, start_from_scratch=args.start_from_scratch)
+    train(model=model, optimizer=optimizer, loss_fun=loss_fun, train_loader=train_loader,
+          dev_loader=dev_loader, output=output_dir, hyper_params=hyper_params,
+          use_progress_bar=not args.disable_progressbar,
+          start_from_scratch=args.start_from_scratch)
+{%- endif %}
 
 
 if __name__ == '__main__':
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
index 8320579..2711334 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
@@ -1,13 +1,5 @@
 import logging
 
-{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-import tensorflow as tf
-{%- endif %}
-{%- if cookiecutter.dl_framework == 'pytorch' %}
-import torch
-from torch import optim
-{%- endif %}
-
 from {{cookiecutter.project_slug}}.models.my_model import MyModel
 
 logger = logging.getLogger(__name__)
@@ -33,61 +25,4 @@ def load_model(hyper_params):  # pragma: no cover
     model = model_class(hyper_params)
     logger.info('model info:\n' + str(model) + '\n')
 
-    {%- if cookiecutter.dl_framework == 'pytorch' %}
-    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-    logger.info('using device {}'.format(device))
-    if torch.cuda.is_available():
-        logger.info(torch.cuda.get_device_name(0))
-    {%- endif %}
-
     return model
-
-
-def load_optimizer(hyper_params, model):  # pragma: no cover
-    """Instantiate the optimizer.
-
-    Args:
-        hyper_params (dict): hyper parameters from the config file
-        model (obj): A neural network model object.
-
-    Returns:
-        optimizer (obj): The optimizer for the given model
-    """
-    optimizer_name = hyper_params['optimizer']
-    # __TODO__ fix optimizer list
-    if optimizer_name == 'adam':
-        optimizer = {%- if cookiecutter.dl_framework == 'pytorch' %} optim.Adam(model.parameters()){%- else %} tf.keras.optimizers.Adam(){%- endif %}
-    elif optimizer_name == 'sgd':
-        optimizer = {%- if cookiecutter.dl_framework == 'pytorch' %} optim.SGD(model.parameters()){%- else %} tf.keras.optimizers.SGD(){%- endif %}
-    else:
-        raise ValueError('optimizer {} not supported'.format(optimizer_name))
-    return optimizer
-
-
-def load_loss(hyper_params):  # pragma: no cover
-    r"""Instantiate the loss.
-
-    You can add some math directly in your docstrings, however don't forget the `r`
-    to indicate it is being treated as restructured text. For example, an L1-loss can be
-    defined as:
-
-    .. math::
-        \text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i}
-
-    Args:
-        hyper_params (dict): hyper parameters from the config file
-
-    Returns:
-        loss (obj): The loss for the given model
-    """
-    loss_name = hyper_params['loss']
-    if loss_name == 'L1':
-    {%- if cookiecutter.dl_framework == 'pytorch' %}
-        loss = torch.nn.L1Loss(reduction='sum')
-    {%- endif %}
-    {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        loss = tf.keras.losses.MeanAbsoluteError()
-    {%- endif %}
-    else:
-        raise ValueError('loss {} not supported'.format(loss_name))
-    return loss
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py
index 6e4880f..4c1fb42 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/my_model.py
@@ -1,24 +1,77 @@
 import logging
+import typing
 
 {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 import tensorflow as tf
 {%- endif %}
 {%- if cookiecutter.dl_framework == 'pytorch' %}
-import torch.nn as nn
+import torch
+import pytorch_lightning as pl
+
+from {{cookiecutter.project_slug}}.models.optim import load_loss, load_optimizer
 {%- endif %}
 
 from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp
 
 logger = logging.getLogger(__name__)
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+
 
+class BaseModel(pl.LightningModule):
+    """Base class for Pytorch Lightning model - useful to reuse the same *_step methods."""
 
-class MyModel({%- if cookiecutter.dl_framework == 'pytorch' %}nn.Module{%- else %}tf.keras.Model{%- endif %}):  # pragma: no cover
+    def configure_optimizers(self):
+        """Returns the combination of optimizer(s) and learning rate scheduler(s) to train with.
+
+        Here, we read all the optimization-related hyperparameters from the config dictionary and
+        create the required optimizer/scheduler combo.
+
+        This function will be called automatically by the pytorch lightning trainer implementation.
+        See https://pytorch-lightning.readthedocs.io/en/latest/common/optimizers.html for more info
+        on the expected returned elements.
+        """
+        # we use the generic loading function from the `model_loader` module, but it could be made
+        # a direct part of the model (useful if we want layer-dynamic optimization)
+        return load_optimizer(self.hparams, self)
+
+    def _generic_step(
+            self,
+            batch: typing.Any,
+            batch_idx: int,
+    ) -> typing.Any:
+        """Runs the prediction + evaluation step for training/validation/testing."""
+        input_data, targets = batch
+        preds = self(input_data)  # calls the forward pass of the model
+        loss = self.loss_fn(preds, targets)
+        return loss
+
+    def training_step(self, batch, batch_idx):
+        """Runs a prediction step for training, returning the loss."""
+        loss = self._generic_step(batch, batch_idx)
+        self.log("train_loss", loss)
+        self.log("epoch", self.current_epoch)
+        self.log("step", self.global_step)
+        return loss  # this function is required, as the loss returned here is used for backprop
+
+    def validation_step(self, batch, batch_idx):
+        """Runs a prediction step for validation, logging the loss."""
+        loss = self._generic_step(batch, batch_idx)
+        self.log("val_loss", loss)
+
+    def test_step(self, batch, batch_idx):
+        """Runs a prediction step for testing, logging the loss."""
+        loss = self._generic_step(batch, batch_idx)
+        self.log("test_loss", loss)
+{%- endif %}
+
+
+class MyModel({%- if cookiecutter.dl_framework == 'pytorch' %}BaseModel{%- else %}tf.keras.Model{%- endif %}):  # pragma: no cover
     """Simple Model Class.
 
     Inherits from the given framework's model class. This is a simple MLP model.
     """
 
-    def __init__(self, hyper_params):
+    def __init__(self, hyper_params: typing.Dict[typing.AnyStr, typing.Any]):
         """__init__.
 
         Args:
@@ -27,8 +80,6 @@ def __init__(self, hyper_params):
         super(MyModel, self).__init__()
 
         check_and_log_hp(['size'], hyper_params)
-        self.hyper_params = hyper_params
-
         {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
         self.hyper_params = hyper_params
         self.dense1 = tf.keras.layers.Dense(hyper_params['size'])
@@ -45,8 +96,10 @@ def call(self, inputs):
         return hidden2
         {%- endif %}
         {%- if cookiecutter.dl_framework == 'pytorch' %}
-        self.linear1 = nn.Linear(5, hyper_params['size'])
-        self.linear2 = nn.Linear(hyper_params['size'], 1)
+        self.save_hyperparameters(hyper_params)  # they will become available via model.hparams
+        self.linear1 = torch.nn.Linear(5, hyper_params['size'])
+        self.linear2 = torch.nn.Linear(hyper_params['size'], 1)
+        self.loss_fn = load_loss(hyper_params)  # 'load_loss' could be part of the model itself...
 
     def forward(self, data):
         """Forward method of the model.
@@ -58,7 +111,7 @@ def forward(self, data):
             tensor: the output of the model computation.
 
         """
-        hidden = nn.functional.relu(self.linear1(data))
+        hidden = torch.nn.functional.relu(self.linear1(data))
         result = self.linear2(hidden)
         return result.squeeze()
         {%- endif %}
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py
new file mode 100644
index 0000000..2d9644a
--- /dev/null
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/optim.py
@@ -0,0 +1,66 @@
+import logging
+
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+import tensorflow as tf
+{%- endif %}
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+import torch
+from torch import optim
+{%- endif %}
+
+
+logger = logging.getLogger(__name__)
+
+
+def load_optimizer(hyper_params, model):  # pragma: no cover
+    """Instantiate the optimizer.
+
+    Args:
+        hyper_params (dict): hyper parameters from the config file
+        model (obj): A neural network model object.
+
+    Returns:
+        optimizer (obj): The optimizer for the given model
+    """
+    optimizer_name = hyper_params['optimizer']
+    # __TODO__ fix optimizer list
+    if optimizer_name == 'adam':
+        optimizer = {%- if cookiecutter.dl_framework == 'pytorch' %} optim.Adam(model.parameters())
+    {%- else %} tf.keras.optimizers.Adam()
+    {%- endif %}
+    elif optimizer_name == 'sgd':
+        optimizer = {%- if cookiecutter.dl_framework == 'pytorch' %} optim.SGD(model.parameters())
+    {%- else %} tf.keras.optimizers.SGD()
+    {%- endif %}
+    else:
+        raise ValueError('optimizer {} not supported'.format(optimizer_name))
+    return optimizer
+
+
+def load_loss(hyper_params):  # pragma: no cover
+    r"""Instantiate the loss.
+
+    You can add some math directly in your docstrings, however don't forget the `r`
+    to indicate it is being treated as restructured text. For example, an L1-loss can be
+    defined as:
+
+    .. math::
+        \text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i}
+
+    Args:
+        hyper_params (dict): hyper parameters from the config file
+
+    Returns:
+        loss (obj): The loss for the given model
+    """
+    loss_name = hyper_params['loss']
+    if loss_name == 'L1':
+        {%- if cookiecutter.dl_framework == 'pytorch' %}
+        loss = torch.nn.L1Loss(reduction='sum')
+    {%- endif %}
+    {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+        loss = tf.keras.losses.MeanAbsoluteError()
+    {%- endif %}
+    else:
+        raise ValueError('loss {} not supported'.format(loss_name))
+    return loss
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py
index bc71af5..9589190 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/train.py
@@ -1,22 +1,32 @@
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+import datetime
+import glob
+{%- endif %}
 import logging
 import os
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+import shutil
+{%- endif %}
 
 import mlflow
 import orion
 import yaml
 {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+import mlflow.tensorflow as mt
 import tensorflow as tf
+from tensorflow.keras.callbacks import EarlyStopping
+from pathlib import Path
 {%- endif %}
-import time
 {%- if cookiecutter.dl_framework == 'pytorch' %}
-import torch
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
 {%- endif %}
-import tqdm
-from mlflow import log_metric
 from orion.client import report_results
 from yaml import dump
 from yaml import load
 
+from {{cookiecutter.project_slug}}.utils.hp_utils import check_and_log_hp
+
 logger = logging.getLogger(__name__)
 
 BEST_MODEL_NAME = 'best_model'
@@ -24,55 +34,37 @@
 STAT_FILE_NAME = 'stats.yaml'
 
 
-def reload_model(output, model_name, model, optimizer,
-                 start_from_scratch=False):  # pragma: no cover
-    """Reload a model.
+def train(**kwargs):  # pragma: no cover
+    """Training loop wrapper. Used to catch exception if Orion is being used."""
+    try:
+        best_dev_metric = train_impl(**kwargs)
+    except RuntimeError as err:
+        if orion.client.cli.IS_ORION_ON and 'CUDA out of memory' in str(err):
+            logger.error(err)
+            logger.error('model was out of memory - assigning a bad score to tell Orion to avoid'
+                         'too big model')
+            best_dev_metric = -999
+        else:
+            raise err
 
-    Can be useful for model checkpointing, hyper-parameter optimization, etc.
+    report_results([dict(
+        name='dev_metric',
+        type='objective',
+        # note the minus - cause orion is always trying to minimize (cit. from the guide)
+        value=-float(best_dev_metric))])
+{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
 
-    Args:
-        output (str): Output directory.
-        model_name (str): Name of the saved model.
-        model (obj): A model object.
-        optimizer (obj): Optimizer used during training.
-        start_from_scratch (bool): starts training from scratch even if a saved moel is present.
-    """
-    saved_model = os.path.join(output, model_name)
-    if start_from_scratch and os.path.exists(saved_model):
-        logger.info('saved model file "{}" already exists - but NOT loading it '
-                    '(cause --start_from_scratch)'.format(output))
-        return
-    if os.path.exists(saved_model):
-        logger.info('saved model file "{}" already exists - loading it'.format(output))
-        {%- if cookiecutter.dl_framework == 'pytorch' %}
-        model.load_state_dict(torch.load(saved_model))
-        {%- endif %}
-        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
-        ckpt_manager_best_model = tf.train.CheckpointManager(ckpt, saved_model, max_to_keep=1)
-        status = ckpt.restore(ckpt_manager_best_model.latest_checkpoint)
-        # NOTE: not using assert_consumed because it fails (see
-        # https://github.com/tensorflow/tensorflow/issues/33150) given that some variables
-        # are in the saved_model but not in the model. This seems more a bug with tensorflow.
-        # You can use assert_existing_objects_matched that checks only the variables in the
-        # model.
-        # In any case, we use expect_partial here because otherwise the restoring would complain
-        # when restoring the multitask model for prediction (given that - in that case - we only
-        # load the model part related to the main task).
-        # status.assert_consumed()
-        status.assert_existing_objects_matched()
-        # status.expect_partial()
-        {%- endif %}
 
-        stats = load_stats(output)
-        logger.info('model status: {}'.format(stats))
-        return stats
-    if os.path.exists(output):
-        logger.info('saved model file not found')
-        return
+def load_stats(output):
+    """Load the latest statistics.
 
-    logger.info('output folder not found')
-    os.makedirs(output)
+    Args:
+        output (str): Output directory
+    """
+    with open(os.path.join(output, STAT_FILE_NAME), 'r') as stream:
+        stats = load(stream, Loader=yaml.FullLoader)
+    return stats['best_dev_metric'], stats['epoch'], stats['remaining_patience'], \
+        stats['mlflow_run_id']
 
 
 def write_stats(output, best_eval_score, epoch, remaining_patience):
@@ -93,54 +85,35 @@ def write_stats(output, best_eval_score, epoch, remaining_patience):
         dump(to_store, stream)
 
 
-def load_stats(output):
-    """Load the latest statistics.
-
-    Args:
-        output (str): Output directory
-    """
-    with open(os.path.join(output, STAT_FILE_NAME), 'r') as stream:
-        stats = load(stream, Loader=yaml.FullLoader)
-    return stats['best_dev_metric'], stats['epoch'], stats['remaining_patience'], \
-        stats['mlflow_run_id']
-
+def reload_model(output, model_name, start_from_scratch=False):  # pragma: no cover
+    """Reload a model.
 
-def train(model, optimizer, loss_fun, train_loader, dev_loader, patience, output,
-          max_epoch, use_progress_bar=True, start_from_scratch=False):  # pragma: no cover
-    """Training loop wrapper. Used to catch exception (and to handle them) if Orion is being used.
+    Can be useful for model checkpointing, hyper-parameter optimization, etc.
 
     Args:
-        model (obj): The neural network model object.
-        optimizer (obj): Optimizer used during training.
-        loss_fun (obj): Loss function that will be optimized.
-        train_loader (obj): Dataloader for the training set.
-        dev_loader (obj): Dataloader for the validation set.
-        patience (int): max number of epochs without improving on `best_eval_score`.
-            After this point, the train ends.
         output (str): Output directory.
-        max_epoch (int): Max number of epochs to train for.
-        use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging).
-        start_from_scratch (bool): Start training from scratch (ignore checkpoints)
+        model_name (str): Model name to relaod.
+        start_from_scratch (bool): starts training from scratch even if a saved moel is present.
     """
-    try:
-        best_dev_metric = train_impl(
-            model, optimizer, loss_fun, train_loader, dev_loader, patience, output,
-            max_epoch, use_progress_bar, start_from_scratch)
-    except RuntimeError as err:
-        if orion.client.cli.IS_ORION_ON and 'CUDA out of memory' in str(err):
-            logger.error(err)
-            logger.error('model was out of memory - assigning a bad score to tell Orion to avoid'
-                         'too big model')
-            best_dev_metric = -999
-        else:
-            raise err
+    saved_model_folder = os.path.join(output, model_name)
+    if start_from_scratch and os.path.exists(saved_model_folder):
+        logger.info('saved model file "{}" already exists - but NOT loading it '
+                    '(cause --start_from_scratch)'.format(output))
+        restored = None
+    elif os.path.exists(saved_model_folder):
+        logger.info('loading model from {}'.format(saved_model_folder))
+        model = tf.keras.models.load_model(
+            os.path.join(output, LAST_MODEL_NAME)
+        )
 
-    report_results([dict(
-        name='dev_metric',
-        type='objective',
-        # note the minus - cause orion is always trying to minimize (cit. from the guide)
-        value=-float(best_dev_metric))])
-{%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+        stats = load_stats(output)
+        logger.info('model status: {}'.format(stats))
+
+        restored = model, stats
+    else:
+        logger.info('no model found to restore.')
+        restored = None
+    return restored
 
 
 def init_model(model, train_loader):  # pragma: no cover
@@ -153,11 +126,10 @@ def init_model(model, train_loader):  # pragma: no cover
     model_input, model_target = next(iter(train_loader))
     _ = model(model_input)
     model.summary(print_fn=logger.info)
-{%- endif %}
 
 
-def train_impl(model, optimizer, loss_fun, train_loader, dev_loader, patience, output,
-               max_epoch, use_progress_bar=True, start_from_scratch=False):  # pragma: no cover
+def train_impl(model, optimizer, loss_fun, train_loader, dev_loader, output, hyper_params,
+               use_progress_bar=True, start_from_scratch=False):  # pragma: no cover
     """Main training loop implementation.
 
     Args:
@@ -166,154 +138,203 @@ def train_impl(model, optimizer, loss_fun, train_loader, dev_loader, patience, o
         loss_fun (obj): Loss function that will be optimized.
         train_loader (obj): Dataloader for the training set.
         dev_loader (obj): Dataloader for the validation set.
-        patience (int): max number of epochs without improving on `best_eval_score`.
-            After this point, the train ends.
         output (str): Output directory.
-        max_epoch (int): Max number of epochs to train for.
+        hyper_params (dict): Dict containing hyper-parameters.
         use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging).
         start_from_scratch (bool): Start training from scratch (ignore checkpoints)
     """
-    if use_progress_bar:
-        pb = tqdm.tqdm
-    else:
-        def pb(x, total):
-            return x
-    {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
+    check_and_log_hp(['max_epoch', 'patience'], hyper_params)
 
-    init_model(model, train_loader)
-    ckpt_last = tf.train.Checkpoint(model=model, optimizer=optimizer)
-    ckpt_manager_last_model = tf.train.CheckpointManager(
-        ckpt_last, os.path.join(output, LAST_MODEL_NAME), max_to_keep=1)
-    ckpt_best = tf.train.Checkpoint(model=model, optimizer=optimizer)
-    ckpt_manager_best_model = tf.train.CheckpointManager(
-        ckpt_best, os.path.join(output, BEST_MODEL_NAME), max_to_keep=1)
-    {%- endif %}
-
-    stats = reload_model(output, LAST_MODEL_NAME, model, optimizer, start_from_scratch)
-    if stats is None:
+    restored = reload_model(output, LAST_MODEL_NAME, start_from_scratch)
+
+    if restored is None:
         best_dev_metric = None
-        remaining_patience = patience
+        remaining_patience = hyper_params['patience']
         start_epoch = 0
+
+        model.compile(
+            optimizer=optimizer,
+            loss=loss_fun,
+            metrics=[],
+        )
     else:
+        restored_model, stats = restored
         best_dev_metric, start_epoch, remaining_patience, _ = stats
+        model = restored_model
+
+    init_model(model, train_loader)
+
+    es = EarlyStoppingWithModelSave(
+        monitor='val_loss', min_delta=0, patience=hyper_params['patience'],
+        verbose=use_progress_bar, mode='min', restore_best_weights=False, baseline=best_dev_metric,
+        output=output, remaining_patience=remaining_patience
+    )
+
+    # set tensorflow/keras logging
+    mt.autolog(every_n_iter=1)
+
+    history = model.fit(x=train_loader, validation_data=dev_loader, callbacks=[es],
+                        epochs=hyper_params['max_epoch'], verbose=use_progress_bar,
+                        initial_epoch=start_epoch)
+
+    best_val_loss = min(history.history['val_loss'])
+    return best_val_loss
+
+
+class EarlyStoppingWithModelSave(EarlyStopping):
+    """Keras callback that extends the early stopping.
+
+    Adds the functionality to save the models in the new TF format. (both best and last model)
+    """
 
-    if remaining_patience <= 0:
-        logger.warning(
-            'remaining patience is zero - not training (and returning best dev score {})'.format(
-                best_dev_metric))
-        return best_dev_metric
-    if start_epoch >= max_epoch:
-        logger.warning(
-            'start epoch {} > max epoch {} - not training (and returning best dev score '
-            '{})'.format(start_epoch, max_epoch, best_dev_metric))
-        return best_dev_metric
-    {%- if cookiecutter.dl_framework == 'pytorch' %}
-    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-    model.to(device)
-    {%- endif %}
-
-    for epoch in range(start_epoch, max_epoch):
-
-        start = time.time()
-        # train
-        train_cumulative_loss = 0.0
-        examples = 0
-        {%- if cookiecutter.dl_framework == 'pytorch' %}
-        model.train()
-        train_steps = len(train_loader)
-        {%- endif %}
-        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        train_steps = sum(1 for _ in train_loader)
-        {%- endif %}
-        for i, data in pb(enumerate(train_loader, 0), total=train_steps):
-            model_input, model_target = data
-            # forward + backward + optimize
-            {%- if cookiecutter.dl_framework == 'pytorch' %}
-            optimizer.zero_grad()
-            outputs = model(model_input.to(device))
-            model_target = torch.squeeze(model_target.to(device))
-            loss = loss_fun(outputs, model_target)
-            loss.backward()
-            optimizer.step()
-
-            train_cumulative_loss += loss.item()
-            {%- endif %}
-            {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-            with tf.GradientTape() as tape:
-                outputs = model(model_input)
-                loss = loss_fun(model_target, outputs)
-            gradients = tape.gradient(loss, model.trainable_variables)
-            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-
-            train_cumulative_loss += float(loss.numpy())
-            {%- endif %}
-            examples += model_target.shape[0]
-
-        train_end = time.time()
-        avg_train_loss = train_cumulative_loss / examples
-
-        # dev
-        {%- if cookiecutter.dl_framework == 'pytorch' %}
-        model.eval()
-        dev_steps = len(dev_loader)
-        {%- endif %}
-        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        dev_steps = sum(1 for _ in dev_loader)
-        {%- endif %}
-        dev_cumulative_loss = 0.0
-        examples = 0
-        for i, data in pb(enumerate(dev_loader, 0), total=dev_steps):
-            model_input, model_target = data
-            {%- if cookiecutter.dl_framework == 'pytorch' %}
-            with torch.no_grad():
-                outputs = model(model_input.to(device))
-                model_target = torch.squeeze(model_target.to(device))
-                loss = loss_fun(outputs, model_target)
-                dev_cumulative_loss += loss.item()
-            {%- endif %}
-            {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-            outputs = model(model_input)
-            loss = loss_fun(model_target, outputs)
-            dev_cumulative_loss += float(loss.numpy())
-            {%- endif %}
-            examples += model_target.shape[0]
-
-        avg_dev_loss = dev_cumulative_loss / examples
-        log_metric("dev_loss", avg_dev_loss, step=epoch)
-        log_metric("train_loss", avg_train_loss, step=epoch)
-
-        dev_end = time.time()
-        {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-        ckpt_manager_last_model.save()
-        {%- endif %}
-        {%- if cookiecutter.dl_framework == 'pytorch' %}
-        torch.save(model.state_dict(), os.path.join(output, LAST_MODEL_NAME))
-        {%- endif %}
-
-        if best_dev_metric is None or avg_dev_loss < best_dev_metric:
-            best_dev_metric = avg_dev_loss
-            remaining_patience = patience
-            {%- if cookiecutter.dl_framework in ['tensorflow_cpu', 'tensorflow_gpu'] %}
-            ckpt_manager_best_model.save()
-            {%- endif %}
-            {%- if cookiecutter.dl_framework == 'pytorch' %}
-            torch.save(model.state_dict(), os.path.join(output, BEST_MODEL_NAME))
-            {%- endif %}
+    def __init__(self, output, remaining_patience, **kwargs):
+        """Main constructor - initializes the parent.
+
+        output (str): path to folder where to store the models.
+        remaining_patience (int): patience left when starting early stopping.
+            (in general it's equal to patience - but it may be less if train is resumed)
+        """
+        super(EarlyStoppingWithModelSave, self).__init__(**kwargs)
+        self.output = output
+        self.wait = self.patience - remaining_patience
+
+    def on_train_begin(self, logs=None):
+        """See parent class doc."""
+        super(EarlyStoppingWithModelSave, self).on_train_begin(logs)
+        Path(self.output).mkdir(parents=True, exist_ok=True)
+
+    # copy-pasted in order to modify what happens when we improve (see comment below)
+    def on_epoch_end(self, epoch, logs=None):
+        """See parent class doc."""
+        current = self.get_monitor_value(logs)
+        if current is None:
+            return
+        if self.monitor_op(current - self.min_delta, self.best):
+            self.best = current
+            self.wait = 0
+            if self.restore_best_weights:
+                self.best_weights = self.model.get_weights()
+
+            self.model.save(os.path.join(self.output, BEST_MODEL_NAME))
         else:
-            remaining_patience -= 1
-
-        logger.info(
-            'done #epoch {:3} => loss {:5.3f} - dev loss {:3.2f} (will try for {} more epoch) - '
-            'train min. {:4.2f} / dev min. {:4.2f}'.format(
-                epoch, avg_train_loss, avg_dev_loss, remaining_patience, (train_end - start) / 60,
-                (dev_end - train_end) / 60))
-
-        write_stats(output, best_dev_metric, epoch + 1, remaining_patience)
-        log_metric("best_dev_metric", best_dev_metric)
-
-        if remaining_patience <= 0:
-            logger.info('done! best dev metric is {}'.format(best_dev_metric))
-            break
-    logger.info('training completed (epoch done {} - max epoch {})'.format(epoch + 1, max_epoch))
-    logger.info('Finished Training')
-    return best_dev_metric
+            self.wait += 1
+            if self.wait >= self.patience:
+                self.stopped_epoch = epoch
+                self.model.stop_training = True
+                if self.restore_best_weights:
+                    if self.verbose > 0:
+                        print('Restoring model weights from the end of the best epoch.')
+                    self.model.set_weights(self.best_weights)
+
+        self.model.save(os.path.join(self.output, LAST_MODEL_NAME))
+        write_stats(self.output, self.best, epoch, self.patience - self.wait)
+{%- endif %}
+{%- if cookiecutter.dl_framework == 'pytorch' %}
+
+
+def load_mlflow(output):
+    """Load the mlflow run id.
+
+    Args:
+        output (str): Output directory
+    """
+    with open(os.path.join(output, STAT_FILE_NAME), 'r') as stream:
+        stats = load(stream, Loader=yaml.FullLoader)
+    return stats['mlflow_run_id']
+
+
+def write_mlflow(output):
+    """Write the mlflow info to resume the training plotting..
+
+    Args:
+        output (str): Output directory
+    """
+    mlflow_run = mlflow.active_run()
+    mlflow_run_id = mlflow_run.info.run_id if mlflow_run is not None else 'NO_MLFLOW'
+    to_store = {'mlflow_run_id': mlflow_run_id}
+    with open(os.path.join(output, STAT_FILE_NAME), 'w') as stream:
+        dump(to_store, stream)
+
+
+def train_impl(model, datamodule, output, hyper_params,
+               use_progress_bar, start_from_scratch, mlf_logger, gpus):  # pragma: no cover
+    """Main training loop implementation.
+
+    Args:
+        model (obj): The neural network model object.
+        datamodule (obj): lightning data module that will instantiate data loaders.
+        output (str): Output directory.
+        hyper_params (dict): Dict containing hyper-parameters.
+        use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging).
+        start_from_scratch (bool): Start training from scratch (ignore checkpoints)
+        mlf_logger (obj): MLFlow logger callback.
+        gpus: number of GPUs to use.
+    """
+    check_and_log_hp(['max_epoch', 'patience'], hyper_params)
+    write_mlflow(output)
+
+    best_model_path = os.path.join(output, BEST_MODEL_NAME)
+    best_checkpoint_callback = ModelCheckpoint(
+        dirpath=best_model_path,
+        filename='model',
+        save_top_k=1,
+        verbose=use_progress_bar,
+        monitor="val_loss",
+        mode="auto",
+        period=1,
+    )
+
+    last_model_path = os.path.join(output, LAST_MODEL_NAME)
+    last_checkpoint_callback = ModelCheckpoint(
+        dirpath=last_model_path,
+        filename='model',
+        verbose=use_progress_bar,
+        period=1,
+    )
+
+    resume_from_checkpoint = handle_previous_models(output, last_model_path, best_model_path,
+                                                    start_from_scratch)
+
+    early_stopping = EarlyStopping("val_loss", mode="auto", patience=hyper_params['patience'],
+                                   verbose=use_progress_bar)
+    trainer = pl.Trainer(
+        callbacks=[early_stopping, best_checkpoint_callback, last_checkpoint_callback],
+        checkpoint_callback=True,
+        logger=mlf_logger,
+        max_epochs=hyper_params['max_epoch'],
+        resume_from_checkpoint=resume_from_checkpoint,
+        gpus=gpus
+    )
+
+    trainer.fit(model, datamodule=datamodule)
+    best_dev_result = float(early_stopping.best_score.cpu().numpy())
+    return best_dev_result
+
+
+def handle_previous_models(output, last_model_path, best_model_path, start_from_scratch):
+    """Moves the previous models in a new timestamp folder."""
+    last_models = glob.glob(last_model_path + os.sep + '*')
+    best_models = glob.glob(best_model_path + os.sep + '*')
+
+    if len(last_models + best_models) > 0:
+        now = datetime.datetime.now()
+        timestamp = now.strftime('%Y%m%d_%H%M%S')
+        new_folder = output + os.path.sep + timestamp
+        os.mkdir(new_folder)
+        shutil.move(last_model_path, new_folder)
+        shutil.move(best_model_path, new_folder)
+        logger.info(f'old models found - moving them to {new_folder}')
+        # need to change the last model pointer to the new location
+        last_models = glob.glob(new_folder + os.path.sep + LAST_MODEL_NAME + os.sep + '*')
+
+    if start_from_scratch:
+        logger.info('will not load any pre-existent checkpoint (because of "--start-from-scratch")')
+        resume_from_checkpoint = None
+    elif len(last_models) >= 1:
+        resume_from_checkpoint = sorted(last_models)[-1]
+        logger.info(f'models found - resuming from {resume_from_checkpoint}')
+    else:
+        logger.info('no model found - starting training from scratch')
+        resume_from_checkpoint = None
+    return resume_from_checkpoint
+{%- endif %}
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
index 442a248..492315b 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
@@ -31,12 +31,13 @@ def check_hp(names, hps, allow_extra=True):
             missing.add(name)
     extra = hps.keys() - names
 
+    msgs = []
     if len(missing) > 0:
-        logger.error('please add the missing hyper-parameters: {}'.format(missing))
+        msgs.append(f'please add the missing hyper-parameters: {missing}')
     if len(extra) > 0 and not allow_extra:
-        logger.error('please remove the extra hyper-parameters: {}'.format(extra))
-    if len(missing) > 0 or (len(extra) > 0 and not allow_extra):
-        raise ValueError('fix according to the error message above')
+        msgs.append(f'please remove the extra hyper-parameters: {extra}')
+    if len(msgs) > 0:
+        raise ValueError('\n'.join(msgs))
 
 
 def log_hp(names, hps):  # pragma: no cover