From 1e6bccd11b00cfe0e3f3ba95245b756a9cd2cf00 Mon Sep 17 00:00:00 2001
From: mirkobronzi <m.bronzi@gmail.com>
Date: Tue, 21 Jul 2020 12:21:05 -0400
Subject: [PATCH 1/3] added code to log more exp info

---
 .../{{cookiecutter.project_slug}}/main.py     |  4 +-
 .../utils/logging_utils.py                    | 39 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
index 4ab02d7..0b0a038 100755
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
@@ -15,7 +15,7 @@
 from {{cookiecutter.project_slug}}.models.model_loader import load_model
 from {{cookiecutter.project_slug}}.models.model_loader import load_optimizer
 from {{cookiecutter.project_slug}}.models.model_loader import load_loss
-from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter
+from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details
 
 logger = logging.getLogger(__name__)
 
@@ -83,6 +83,8 @@ def run(args, hyper_params):
         args (list): arguments passed from the cli
         hyper_params (dict): hyper parameters from the config file
     """
+    log_exp_details(os.path.realpath(__file__), args)
+
     if not os.path.exists(args.output):
         os.makedirs(args.output)
 
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
index 5f72e17..6d2e3e0 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
@@ -1,3 +1,14 @@
+import logging
+import mlflow
+import os
+import socket
+
+from git import Repo
+from mlflow.utils.mlflow_tags import MLFLOW_RUN_NOTE
+
+logger = logging.getLogger(__name__)
+
+
 class LoggerWriter:  # pragma: no cover
     """LoggerWriter.
 
@@ -25,3 +36,31 @@ def write(self, message):
     def flush(self):
         """flush."""
         pass
+
+
+def get_git_hash(script_location):
+    """Find the git hash for the running repository.
+
+    :param script_location: (str) path to the script inside the git repos we want to find.
+    :return: (str) the git hash for the repository of the provided script.
+    """
+    if not script_location.endswith('.py'):
+        raise ValueError('script_location should point to a python script')
+    repo_folder = os.path.dirname(script_location)
+    repo = Repo(repo_folder, search_parent_directories=True)
+    commit_hash = repo.head.commit
+    return commit_hash
+
+
+def log_exp_details(script_location, args):
+    """Will log the experiment details to both screen logger and mlflow.
+
+    :param script_location: (str) path to the script inside the git repos we want to find.
+    :param args: the argparser object.
+    """
+    git_hash = get_git_hash(script_location)
+    hostname = socket.gethostname()
+    message = "\nhostname: {}\ncode git hash: {}\ndata folder: {}".format(
+        hostname, git_hash, args.data)
+    logger.info(message)
+    mlflow.set_tag(key=MLFLOW_RUN_NOTE, value=message)

From da1550fe0f2f0a81aef78a3a6256a7258fa2100b Mon Sep 17 00:00:00 2001
From: mirkobronzi <m.bronzi@gmail.com>
Date: Tue, 21 Jul 2020 13:19:43 -0400
Subject: [PATCH 2/3] better organized logging

---
 {{cookiecutter.project_slug}}/setup.py        |  1 +
 .../{{cookiecutter.project_slug}}/main.py     |  1 +
 .../models/model_loader.py                    |  2 +-
 .../utils/hp_utils.py                         |  1 +
 .../utils/logging_utils.py                    | 19 +++++++++++--------
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py
index ccd8d9f..00a7176 100644
--- a/{{cookiecutter.project_slug}}/setup.py
+++ b/{{cookiecutter.project_slug}}/setup.py
@@ -9,6 +9,7 @@
     install_requires=[
         'flake8',
         'flake8-docstrings',
+        'gitpython',
         'tqdm',
         'mlflow',
         'orion>=0.1.8',
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
index 0b0a038..23723dc 100755
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
@@ -90,6 +90,7 @@ def run(args, hyper_params):
 
     # __TODO__ change the hparam that are used from the training algorithm
     # (and NOT the model - these will be specified in the model itself)
+    logger.info('List of hyper-parameters:')
     check_and_log_hp(
         ['batch_size', 'optimizer', 'patience', 'architecture', 'max_epoch',
          'exp_name'],
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
index 5135173..8320579 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
@@ -31,7 +31,7 @@ def load_model(hyper_params):  # pragma: no cover
     logger.info('selected architecture: {}'.format(architecture))
 
     model = model_class(hyper_params)
-    logger.info(model)
+    logger.info('model info:\n' + str(model) + '\n')
 
     {%- if cookiecutter.dl_framework == 'pytorch' %}
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
index ebf3296..442a248 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
@@ -49,3 +49,4 @@ def log_hp(names, hps):  # pragma: no cover
     for name in sorted(names):
         log_param(name, hps[name])
         logger.info('\thp "{}" => "{}"'.format(name, hps[name]))
+    logger.info('\n')
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
index 6d2e3e0..3a8318d 100644
--- a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
+++ b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
@@ -3,7 +3,7 @@
 import os
 import socket
 
-from git import Repo
+from git import InvalidGitRepositoryError, Repo
 from mlflow.utils.mlflow_tags import MLFLOW_RUN_NOTE
 
 logger = logging.getLogger(__name__)
@@ -38,7 +38,7 @@ def flush(self):
         pass
 
 
-def get_git_hash(script_location):
+def get_git_hash(script_location):  # pragma: no cover
     """Find the git hash for the running repository.
 
     :param script_location: (str) path to the script inside the git repos we want to find.
@@ -47,12 +47,15 @@ def get_git_hash(script_location):
     if not script_location.endswith('.py'):
         raise ValueError('script_location should point to a python script')
     repo_folder = os.path.dirname(script_location)
-    repo = Repo(repo_folder, search_parent_directories=True)
-    commit_hash = repo.head.commit
+    try:
+        repo = Repo(repo_folder, search_parent_directories=True)
+        commit_hash = repo.head.commit
+    except (InvalidGitRepositoryError, ValueError):
+        commit_hash = 'git repository not found'
     return commit_hash
 
 
-def log_exp_details(script_location, args):
+def log_exp_details(script_location, args):  # pragma: no cover
     """Will log the experiment details to both screen logger and mlflow.
 
     :param script_location: (str) path to the script inside the git repos we want to find.
@@ -60,7 +63,7 @@ def log_exp_details(script_location, args):
     """
     git_hash = get_git_hash(script_location)
     hostname = socket.gethostname()
-    message = "\nhostname: {}\ncode git hash: {}\ndata folder: {}".format(
-        hostname, git_hash, args.data)
-    logger.info(message)
+    message = "\nhostname: {}\ngit code hash: {}\ndata folder: {}\ndata folder (abs): {}".format(
+        hostname, git_hash, args.data, os.path.abspath(args.data))
+    logger.info('Experiment info:' + message + '\n')
     mlflow.set_tag(key=MLFLOW_RUN_NOTE, value=message)

From 73f2c1f19d573864f7a0d8cc00ef2cd6be6f9a51 Mon Sep 17 00:00:00 2001
From: mirkobronzi <m.bronzi@gmail.com>
Date: Tue, 21 Jul 2020 13:30:42 -0400
Subject: [PATCH 3/3] using correct log name

---
 {{cookiecutter.project_slug}}/examples/slurm_cc/run.sh        | 3 +--
 {{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh  | 4 ++--
 {{cookiecutter.project_slug}}/examples/slurm_mila/run.sh      | 3 +--
 .../examples/slurm_mila/to_submit.sh                          | 4 ++--
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh b/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh
index 2347a76..9370362 100644
--- a/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh
+++ b/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh
@@ -1,3 +1,2 @@
-rm -fr logs
-mkdir logs
+mkdir -p logs
 sbatch to_submit.sh
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh
index ef95585..bb32622 100644
--- a/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh
+++ b/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh
@@ -5,8 +5,8 @@
 #SBATCH --mem=5G
 #SBATCH --time=0:05:00
 #SBATCH --job-name={{ cookiecutter.project_slug }}
-#SBATCH --output=logs/out_%a.log
-#SBATCH --error=logs/err_%a.log
+#SBATCH --output=logs/%x__%j.out
+#SBATCH --error=logs/%x__%j.err
 # remove one # if you prefer receiving emails
 ##SBATCH --mail-type=all
 ##SBATCH --mail-user={{ cookiecutter.email }}
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh b/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh
index 2347a76..9370362 100644
--- a/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh
+++ b/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh
@@ -1,3 +1,2 @@
-rm -fr logs
-mkdir logs
+mkdir -p logs
 sbatch to_submit.sh
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh
index 9c5c5e5..464cd60 100644
--- a/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh
+++ b/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh
@@ -5,8 +5,8 @@
 #SBATCH --mem=5G
 #SBATCH --time=0:05:00
 #SBATCH --job-name={{ cookiecutter.project_slug }}
-#SBATCH --output=logs/out_%a.log
-#SBATCH --error=logs/err_%a.log
+#SBATCH --output=logs/%x__%j.out
+#SBATCH --error=logs/%x__%j.err
 # to attach a tag to your run (e.g., used to track the GPU time)
 # uncomment the following line and add replace `my_tag` with the proper tag:
 ##SBATCH --wckey=my_tag