Brings pre-commit hooks and lints (#15)

* Updates table printing to save a table for ehrlich * Brings pre-commit hooks * Runs isort * Makes sure ruff checks well * Installs and runs pre-commit hooks * Adds dev dependencies * Updates the preprocessing env action to not run on drafts * Adds extra formatting and lint to GitHub action * Adds requirements dev * Updates dev requirements in project * removes isorting * Adds CPU torch installation by default in all tox envs
MachineLearningLifeScience · Nov 6, 2024 · ff218d9 · ff218d9
1 parent 998d6c8
commit ff218d9
Show file tree

Hide file tree

Showing 75 changed files with 351 additions and 324 deletions.
diff --git a/.github/workflows/hdbo-base.yml b/.github/workflows/hdbo-base.yml
@@ -34,7 +34,7 @@ jobs:
         python -m pip install tox
     - name: Check linting
       run: |
-        tox -e formatting-py310
+        tox -e formatting-and-linting-py310
     - name: Check tests with tox on base env
       run: |
         tox -e hdbo-base-py310
diff --git a/.github/workflows/preprocessing-env-builds.yml b/.github/workflows/preprocessing-env-builds.yml
@@ -2,12 +2,20 @@ name: Data preprocessing env builds (conda, python 3.10)
 
 on:
   push:
+      branches:
+      - master
+  pull_request:
+      types: [opened, synchronize, reopened, ready_for_review, closed]
+      branches:
+      - master
   schedule:
-  - cron: "0 0 * * 0"
+      - cron: '0 0 * * 0'
 
 jobs:
   build-linux:
     runs-on: ubuntu-latest
+    timeout-minutes: 8
+    if: github.event.pull_request.draft == false
     strategy:
       max-parallel: 5
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,23 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: trailing-whitespace
+        exclude: '.*\.pdb$'
+    -   id: check-yaml
+    -   id: check-added-large-files
+-   repo: https://github.com/psf/black
+    rev: 24.1.1
+    hooks:
+    -   id: black
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+    -   id: isort
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.5.5
+  hooks:
+    # Run the linter.
+    - id: ruff
+      args: [ --fix ]
diff --git a/environment.yml b/environment.yml
@@ -1,4 +1,4 @@
-name: hdbo_benchmark 
+name: hdbo_benchmark
 channels:
   - defaults
 dependencies:

diff --git a/envs/environment.bounce.yml b/envs/environment.bounce.yml
@@ -1,4 +1,4 @@
-name: hdbo_bounce 
+name: hdbo_bounce
 channels:
   - defaults
 dependencies:

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,16 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+dev = [
+    "mypy",
+    "pandas-stubs",
+    "types-Pillow",
+    "ruff",
+    "black",
+    "isort",
+    "pre-commit",
+    "tox",
+]
 bounce = [
     "poli-baselines[bounce]@git+https://github.com/MachineLearningLifeScience/poli-baselines.git"
 ]
@@ -52,6 +62,9 @@ warn_return_any = true
 warn_unused_configs = true
 ignore_missing_imports = true
 
+[tool.isort]
+profile = "black"
+
 [tool.pytest.ini_options]
 markers = [
     "hdbo_base: base tests for HDBO",

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,8 @@
+mypy
+pandas-stubs
+types-Pillow
+ruff
+black
+isort
+pre-commit
+tox
diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,4 @@ poli-core @ git+https://github.com/MachineLearningLifeScience/poli.git
 poli-baselines @ git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main
 wandb
 lightning
-fair-esm
-mypy
-pandas-stubs
-types-Pillow
+fair-esm
diff --git a/run.py b/run.py
@@ -10,26 +10,24 @@
 
 import click
 import numpy as np
-
-from poli.core.util.seeding import seed_python_numpy_and_torch
 from poli.core.exceptions import BudgetExhaustedException
+from poli.core.util.seeding import seed_python_numpy_and_torch
 
-from hdbo_benchmark.utils.experiments.load_solvers import (
-    load_solver_from_problem,
-    SOLVER_NAMES,
-    CONTINUOUS_SPACE_SOLVERS,
-)
-from hdbo_benchmark.utils.experiments.load_problems import load_problem
 from hdbo_benchmark.utils.experiments.load_generative_models import (
     load_generative_model_and_bounds,
 )
-from hdbo_benchmark.utils.experiments.verify_status_pre_experiment import (
-    verify_repos_are_clean,
+from hdbo_benchmark.utils.experiments.load_problems import load_problem
+from hdbo_benchmark.utils.experiments.load_solvers import (
+    CONTINUOUS_SPACE_SOLVERS,
+    SOLVER_NAMES,
+    load_solver_from_problem,
 )
 from hdbo_benchmark.utils.experiments.problem_transformations import (
     transform_problem_from_discrete_to_continuous,
 )
-
+from hdbo_benchmark.utils.experiments.verify_status_pre_experiment import (
+    verify_repos_are_clean,
+)
 from hdbo_benchmark.utils.logging.idempotence_of_experiments import (
     experiment_has_already_run,
 )

diff --git a/setup.cfg b/setup.cfg
@@ -21,4 +21,4 @@ include_package_data = True
 where=src
 
 [options.package_data]
-* = *.sht, *.yml, *.jar, *.pt, *.json, *.pdb, *.npz 
+* = *.sht, *.yml, *.jar, *.pt, *.json, *.pdb, *.npz
diff --git a/src/hdbo_benchmark/data_preprocessing/zinc250k/01_from_smiles_to_selfies.py b/src/hdbo_benchmark/data_preprocessing/zinc250k/01_from_smiles_to_selfies.py
@@ -5,9 +5,9 @@
 molecules in the ZINC dataset to SELFIES.
 """
 
-from typing import List
 import pickle
 from pathlib import Path
+from typing import List
 
 import selfies as sf
 
@@ -44,7 +44,7 @@ def translate_smiles_to_selfies(
             selfies_strings.append(sf.encoder(smile))
         except sf.EncoderError:
             if strict:
-                raise ValueError(f"Failed to encode SMILES to SELFIES.")
+                raise ValueError("Failed to encode SMILES to SELFIES.")
             else:
                 selfies_strings.append(None)
 

diff --git a/src/hdbo_benchmark/data_preprocessing/zinc250k/04_computing_alphabet.py b/src/hdbo_benchmark/data_preprocessing/zinc250k/04_computing_alphabet.py
@@ -1,13 +1,13 @@
 """Computes the alphabet by counting the tokens in the dataset."""
 
 from __future__ import annotations
+
+import json
 from collections import defaultdict
 from pathlib import Path
-import json
 
-import pandas as pd
 import matplotlib.pyplot as plt
-
+import pandas as pd
 import selfies as sf  # type: ignore
 
 if __name__ == "__main__":

diff --git a/..._benchmark/data_preprocessing/zinc250k/05_computing_integer_and_onehot_representations.py b/..._benchmark/data_preprocessing/zinc250k/05_computing_integer_and_onehot_representations.py
@@ -8,12 +8,11 @@
 experiments/assets/data/small_molecules/processed/
 """
 
-from pathlib import Path
 import json
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
-
 import selfies as sf  # type: ignore
 
 if __name__ == "__main__":

diff --git a/src/hdbo_benchmark/data_preprocessing/zinc250k/06_human_readable_metadata.py b/src/hdbo_benchmark/data_preprocessing/zinc250k/06_human_readable_metadata.py
@@ -5,8 +5,8 @@
 sequence length, and the length of the alphabet.
 """
 
-from pathlib import Path
 import json
+from pathlib import Path
 
 import pandas as pd
 

diff --git a/src/hdbo_benchmark/experiments/benchmark_on_ehrlich/run.py b/src/hdbo_benchmark/experiments/benchmark_on_ehrlich/run.py
@@ -7,29 +7,25 @@
 from uuid import uuid4
 
 import click
-
-import torch
 import numpy as np
-
 import poli
+import poli_baselines
+import torch
+from poli.core.exceptions import BudgetExhaustedException
 from poli.core.util.seeding import seed_numpy, seed_python
 from poli.objective_repository import EhrlichHoloBlackBox
-from poli.core.exceptions import BudgetExhaustedException
-
-import poli_baselines
 
 import hdbo_benchmark
+from hdbo_benchmark.utils.constants import DEVICE, ROOT_DIR
 from hdbo_benchmark.utils.experiments.load_solvers import (
-    load_solver_class,
     SOLVER_NAMES,
+    load_solver_class,
 )
-from hdbo_benchmark.utils.constants import ROOT_DIR, DEVICE
-from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
-
-from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
 from hdbo_benchmark.utils.logging.idempotence_of_experiments import (
     experiment_has_already_run,
 )
+from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
+from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
 
 torch.set_default_dtype(torch.float32)
 

diff --git a/src/hdbo_benchmark/experiments/benchmark_on_foldx/run.py b/src/hdbo_benchmark/experiments/benchmark_on_foldx/run.py
@@ -2,48 +2,45 @@
 Running the benchmark on FoldX stability.
 """
 
+import json
+
 # mypy: disable-error-code="import-untyped"
 from typing import Callable
 from uuid import uuid4
-import json
 
 import click
-
-import pandas as pd
-import torch
 import numpy as np
-
+import pandas as pd
 import poli
-from poli.repository import FoldXStabilityProblemFactory
-from poli.core.util.seeding import seed_numpy, seed_python
+import poli_baselines
+import torch
 from poli.core.abstract_black_box import AbstractBlackBox
 from poli.core.exceptions import BudgetExhaustedException
 from poli.core.problem import Problem
-
-import poli_baselines
+from poli.core.util.seeding import seed_numpy, seed_python
+from poli.repository import FoldXStabilityProblemFactory
 
 import hdbo_benchmark
 from hdbo_benchmark.generative_models.ae_for_esm import LitAutoEncoder
+from hdbo_benchmark.utils.constants import DEVICE, ROOT_DIR
+from hdbo_benchmark.utils.experiments.load_generative_models import (
+    load_generative_model_and_bounds,
+)
 from hdbo_benchmark.utils.experiments.load_solvers import (
-    load_solver_class,
-    SOLVER_NAMES,
     DISCRETE_SPACE_SOLVERS,
+    SOLVER_NAMES,
     SOLVERS_THAT_DONT_ALLOW_CUSTOM_INPUTS,
-)
-from hdbo_benchmark.utils.experiments.load_generative_models import (
-    load_generative_model_and_bounds,
+    load_solver_class,
 )
 from hdbo_benchmark.utils.experiments.normalization import (
-    from_unit_cube_to_range,
     from_range_to_unit_cube,
+    from_unit_cube_to_range,
 )
-from hdbo_benchmark.utils.constants import ROOT_DIR, DEVICE
-from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
-
-from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
 from hdbo_benchmark.utils.logging.idempotence_of_experiments import (
     experiment_has_already_run,
 )
+from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
+from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
 
 torch.set_default_dtype(torch.float32)
 

diff --git a/src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py b/src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py
@@ -7,35 +7,31 @@
 from uuid import uuid4
 
 import click
-
-import torch
 import numpy as np
-from selfies import split_selfies
-
 import poli
-from poli.core.util.seeding import seed_numpy, seed_python
+import poli_baselines
+import torch
 from poli.core.abstract_black_box import AbstractBlackBox
 from poli.core.exceptions import BudgetExhaustedException
-
-import poli_baselines
+from poli.core.util.seeding import seed_numpy, seed_python
+from selfies import split_selfies
 
 import hdbo_benchmark
-from hdbo_benchmark.generative_models.vae_factory import VAEFactory, VAESelfies, VAE
-from hdbo_benchmark.utils.experiments.load_solvers import (
-    load_solver_class,
-    SOLVER_NAMES,
-)
+from hdbo_benchmark.generative_models.vae_factory import VAE, VAEFactory, VAESelfies
+from hdbo_benchmark.utils.constants import DEVICE, ROOT_DIR
 from hdbo_benchmark.utils.experiments.load_metadata_for_vaes import (
     load_alphabet_for_pmo,
     load_sequence_length_for_pmo,
 )
-from hdbo_benchmark.utils.constants import ROOT_DIR, DEVICE
-from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
-
-from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
+from hdbo_benchmark.utils.experiments.load_solvers import (
+    SOLVER_NAMES,
+    load_solver_class,
+)
 from hdbo_benchmark.utils.logging.idempotence_of_experiments import (
     experiment_has_already_run,
 )
+from hdbo_benchmark.utils.logging.uncommited_changes import has_uncommitted_changes
+from hdbo_benchmark.utils.logging.wandb_observer import initialize_observer
 
 torch.set_default_dtype(torch.float32)