Merge pull request #31 from mirkobronzi/loggin_exp_info

Loggin exp info
mila-iqia · Jul 21, 2020 · 52ed67a · 52ed67a
2 parents 2a6aa57 + 73f2c1f
commit 52ed67a
Show file tree

Hide file tree

Showing 9 changed files with 55 additions and 10 deletions.
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh b/{{cookiecutter.project_slug}}/examples/slurm_cc/run.sh
@@ -1,3 +1,2 @@
-rm -fr logs
-mkdir logs
+mkdir -p logs
 sbatch to_submit.sh
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm_cc/to_submit.sh
@@ -5,8 +5,8 @@
 #SBATCH --mem=5G
 #SBATCH --time=0:05:00
 #SBATCH --job-name={{ cookiecutter.project_slug }}
-#SBATCH --output=logs/out_%a.log
-#SBATCH --error=logs/err_%a.log
+#SBATCH --output=logs/%x__%j.out
+#SBATCH --error=logs/%x__%j.err
 # remove one # if you prefer receiving emails
 ##SBATCH --mail-type=all
 ##SBATCH --mail-user={{ cookiecutter.email }}

diff --git a/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh b/{{cookiecutter.project_slug}}/examples/slurm_mila/run.sh
@@ -1,3 +1,2 @@
-rm -fr logs
-mkdir logs
+mkdir -p logs
 sbatch to_submit.sh
diff --git a/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh b/{{cookiecutter.project_slug}}/examples/slurm_mila/to_submit.sh
@@ -5,8 +5,8 @@
 #SBATCH --mem=5G
 #SBATCH --time=0:05:00
 #SBATCH --job-name={{ cookiecutter.project_slug }}
-#SBATCH --output=logs/out_%a.log
-#SBATCH --error=logs/err_%a.log
+#SBATCH --output=logs/%x__%j.out
+#SBATCH --error=logs/%x__%j.err
 # to attach a tag to your run (e.g., used to track the GPU time)
 # uncomment the following line and add replace `my_tag` with the proper tag:
 ##SBATCH --wckey=my_tag

diff --git a/{{cookiecutter.project_slug}}/setup.py b/{{cookiecutter.project_slug}}/setup.py
@@ -9,6 +9,7 @@
     install_requires=[
         'flake8',
         'flake8-docstrings',
+        'gitpython',
         'tqdm',
         'mlflow',
         'orion>=0.1.8',

diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/main.py
@@ -15,7 +15,7 @@
 from {{cookiecutter.project_slug}}.models.model_loader import load_model
 from {{cookiecutter.project_slug}}.models.model_loader import load_optimizer
 from {{cookiecutter.project_slug}}.models.model_loader import load_loss
-from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter
+from {{cookiecutter.project_slug}}.utils.logging_utils import LoggerWriter, log_exp_details
 
 logger = logging.getLogger(__name__)
 
@@ -83,11 +83,14 @@ def run(args, hyper_params):
         args (list): arguments passed from the cli
         hyper_params (dict): hyper parameters from the config file
     """
+    log_exp_details(os.path.realpath(__file__), args)
+
     if not os.path.exists(args.output):
         os.makedirs(args.output)
 
     # __TODO__ change the hparam that are used from the training algorithm
     # (and NOT the model - these will be specified in the model itself)
+    logger.info('List of hyper-parameters:')
     check_and_log_hp(
         ['batch_size', 'optimizer', 'patience', 'architecture', 'max_epoch',
          'exp_name'],

diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/models/model_loader.py
@@ -31,7 +31,7 @@ def load_model(hyper_params):  # pragma: no cover
     logger.info('selected architecture: {}'.format(architecture))
 
     model = model_class(hyper_params)
-    logger.info(model)
+    logger.info('model info:\n' + str(model) + '\n')
 
     {%- if cookiecutter.dl_framework == 'pytorch' %}
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/hp_utils.py
@@ -49,3 +49,4 @@ def log_hp(names, hps):  # pragma: no cover
     for name in sorted(names):
         log_param(name, hps[name])
         logger.info('\thp "{}" => "{}"'.format(name, hps[name]))
+    logger.info('\n')
diff --git a/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py b/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/utils/logging_utils.py
@@ -1,3 +1,14 @@
+import logging
+import mlflow
+import os
+import socket
+
+from git import InvalidGitRepositoryError, Repo
+from mlflow.utils.mlflow_tags import MLFLOW_RUN_NOTE
+
+logger = logging.getLogger(__name__)
+
+
 class LoggerWriter:  # pragma: no cover
     """LoggerWriter.
 
@@ -25,3 +36,34 @@ def write(self, message):
     def flush(self):
         """flush."""
         pass
+
+
+def get_git_hash(script_location):  # pragma: no cover
+    """Find the git hash for the running repository.
+
+    :param script_location: (str) path to the script inside the git repos we want to find.
+    :return: (str) the git hash for the repository of the provided script.
+    """
+    if not script_location.endswith('.py'):
+        raise ValueError('script_location should point to a python script')
+    repo_folder = os.path.dirname(script_location)
+    try:
+        repo = Repo(repo_folder, search_parent_directories=True)
+        commit_hash = repo.head.commit
+    except (InvalidGitRepositoryError, ValueError):
+        commit_hash = 'git repository not found'
+    return commit_hash
+
+
+def log_exp_details(script_location, args):  # pragma: no cover
+    """Will log the experiment details to both screen logger and mlflow.
+
+    :param script_location: (str) path to the script inside the git repos we want to find.
+    :param args: the argparser object.
+    """
+    git_hash = get_git_hash(script_location)
+    hostname = socket.gethostname()
+    message = "\nhostname: {}\ngit code hash: {}\ndata folder: {}\ndata folder (abs): {}".format(
+        hostname, git_hash, args.data, os.path.abspath(args.data))
+    logger.info('Experiment info:' + message + '\n')
+    mlflow.set_tag(key=MLFLOW_RUN_NOTE, value=message)