From e9dca7dc4197fe5a99633832c7dca7d53d7e54f8 Mon Sep 17 00:00:00 2001 From: EnricoTrizio Date: Mon, 16 Dec 2024 12:11:13 +0100 Subject: [PATCH] Removed already ported files --- mlcolvar/graph/README.md | 15 - mlcolvar/graph/__init__.py | 11 - mlcolvar/graph/cvs/__init__.py | 4 - mlcolvar/graph/cvs/committor/__init__.py | 3 - mlcolvar/graph/cvs/committor/committor.py | 176 ----------- mlcolvar/graph/cvs/cv.py | 347 ---------------------- mlcolvar/graph/utils/__init__.py | 4 - mlcolvar/graph/utils/progress.py | 66 ---- 8 files changed, 626 deletions(-) delete mode 100644 mlcolvar/graph/README.md delete mode 100644 mlcolvar/graph/__init__.py delete mode 100644 mlcolvar/graph/cvs/__init__.py delete mode 100644 mlcolvar/graph/cvs/committor/__init__.py delete mode 100644 mlcolvar/graph/cvs/committor/committor.py delete mode 100644 mlcolvar/graph/cvs/cv.py delete mode 100644 mlcolvar/graph/utils/__init__.py delete mode 100644 mlcolvar/graph/utils/progress.py diff --git a/mlcolvar/graph/README.md b/mlcolvar/graph/README.md deleted file mode 100644 index 4d40c7e0..00000000 --- a/mlcolvar/graph/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# The Graph Neural Network (GNN) Module in MLCOLVAR. - -

- -

- -## INTRODUCTION - -WIP - -## DEPENDENCIES - -- `pytorch_geometric` >= 2.5 -- `matscipy` -- `mdtraj` diff --git a/mlcolvar/graph/__init__.py b/mlcolvar/graph/__init__.py deleted file mode 100644 index 1e9f9239..00000000 --- a/mlcolvar/graph/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""The graph neural network module.""" - -from . import utils -from . import core -from . import data -from . import cvs -from . import explain - -utils.torch_tools.set_default_dtype('float32') -# torch_scatter will compile problems. -__import__('torch_geometric').typing.WITH_TORCH_SCATTER = False diff --git a/mlcolvar/graph/cvs/__init__.py b/mlcolvar/graph/cvs/__init__.py deleted file mode 100644 index 7833d8aa..00000000 --- a/mlcolvar/graph/cvs/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .cv import GraphBaseCV -from .supervised import GraphDeepTDA -from .timelagged import GraphDeepTICA -from .committor import GraphCommittor diff --git a/mlcolvar/graph/cvs/committor/__init__.py b/mlcolvar/graph/cvs/committor/__init__.py deleted file mode 100644 index 3af5089f..00000000 --- a/mlcolvar/graph/cvs/committor/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import utils -from .utils import get_dataset_kolmogorov_bias, compute_committor_weights -from .committor import GraphCommittor diff --git a/mlcolvar/graph/cvs/committor/committor.py b/mlcolvar/graph/cvs/committor/committor.py deleted file mode 100644 index 7478c326..00000000 --- a/mlcolvar/graph/cvs/committor/committor.py +++ /dev/null @@ -1,176 +0,0 @@ - -import torch -import torch_geometric as tg -from typing import Dict, Any, List - -from mlcolvar.core.nn.utils import Custom_Sigmoid - -from mlcolvar.graph.cvs import GraphBaseCV -from mlcolvar.graph.cvs.cv import test_get_data -from mlcolvar.graph.cvs.committor.utils import GraphCommittorLoss -from mlcolvar.graph.utils import torch_tools - -""" -Data-driven learning of committor function, based on Graph Neural Networks -(GNNs). -""" - -__all__ = ['GraphCommittor'] - - -class GraphCommittor(GraphBaseCV): - """ - Data-driven learning of committor function, based on GNNs. - - The committor function q is expressed as the output of a neural network - optimized with a self-consistent approach based on the Kolmogorov's - variational principle for the committor and on the imposition of its - boundary conditions. - - Parameters - ---------- - cutoff: float - Cutoff radius of the basis functions. Should be the same as the cutoff - radius used to build the graphs. - atomic_numbers: List[int] - The atomic numbers mapping, e.g. the `atomic_numbers` attribute of a - `mlcolvar.graph.data.GraphDataSet` instance. - atomic_masses : List[float] - List of masses of all the atoms we are using. - model_name: str - Name of the GNN model. - model_options: Dict[Any, Any] - Model options. - extra_loss_options: Dict[Any, Any] - Extra loss function options. - optimizer_options: Dict[Any, Any] - Optimizer options. - - References - ---------- - .. [*] P. Kang, E. Trizio, and M. Parrinello, "Computing the committor - using the committor to study the transition state ensemble", - Nat. Comput. Sci., 2024, DOI: 10.1038/s43588-024-00645-0 - - See also - -------- - mlcolvar.cvs.committor.Committor - The feedforward NN based ML committor module. - mlcolvar.graph.cvs.committor.utils.GraphCommittorLoss - Kolmogorov's variational optimization of committor and imposition of - boundary conditions. - mlcolvar.graph.cvs.committor.utils.compute_committor_weights - Utils to compute the appropriate weights for the training set - """ - def __init__( - self, - cutoff: float, - atomic_numbers: List[int], - atomic_masses: List[float], - model_name: str = 'GVPModel', - model_options: Dict[Any, Any] = {}, - extra_loss_options: Dict[Any, Any] = { - 'alpha': 1.0, 'gamma': 100.0, 'delta_f': 0.0, 'sigmoid_p': 3.0 - }, - optimizer_options: Dict[Any, Any] = {}, - **kwargs, - ) -> None: - if model_options.pop('n_out', None) is not None: - raise RuntimeError( - 'The `n_out` key of parameter `model_options` will be ignored!' - ) - model_options['n_out'] = 1 - if optimizer_options != {}: - kwargs['optimizer_options'] = optimizer_options - - super().__init__( - 2, cutoff, atomic_numbers, model_name, model_options, **kwargs - ) - - atomic_masses = torch.tensor( - atomic_masses, dtype=torch.get_default_dtype() - ) - self.register_buffer('atomic_masses', atomic_masses) - self.register_buffer('is_committor', torch.tensor(1, dtype=int)) - - self.sigmoid = Custom_Sigmoid(extra_loss_options.get('sigmoid_p', 3.0)) - - self.loss_fn = GraphCommittorLoss( - atomic_masses, - alpha=float(extra_loss_options.get('alpha', 1.0)), - gamma=float(extra_loss_options.get('gamma', 10000.0)), - delta_f=float(extra_loss_options.get('delta_f', 0.0)), - ) - - def forward_nn( - self, - data: Dict[str, torch.Tensor], - token: bool = False - ) -> torch.Tensor: - """ - The forward pass for the NN. - - Parameters - ---------- - data: Dict[str, torch.Tensor] - The data dict. Usually came from the `to_dict` method of a - `torch_geometric.data.Batch` object. - token: bool - To be used. - """ - data['positions'].requires_grad_(True) - data['node_attrs'].requires_grad_(True) - - return self._model(data) - - def forward( - self, - data: Dict[str, torch.Tensor], - token: bool = False - ) -> torch.Tensor: - """ - The forward pass. - - Parameters - ---------- - data: Dict[str, torch.Tensor] - The data dict. Usually came from the `to_dict` method of a - `torch_geometric.data.Batch` object. - token: bool - To be used. - """ - z = self.forward_nn(data) - q = self.sigmoid(z) - - return torch.hstack([z, q]) - - def training_step( - self, train_batch: tg.data.Batch, *args, **kwargs - ) -> torch.Tensor: - """ - Compute and return the training loss and record metrics. - - Parameters - ---------- - train_batch: torch_geometric.data.Batch - The data batch. - """ - torch.set_grad_enabled(True) - - batch_dict = train_batch.to_dict() - z = self.forward_nn(batch_dict) - q = self.sigmoid(z) - - loss, loss_var, loss_bound_A, loss_bound_B = self.loss_fn( - batch_dict, q - ) - loss_z_diff = (z.max().abs() - z.min().abs()).pow(2) - loss = loss + loss_z_diff - - name = 'train' if self.training else 'valid' - self.log(f'{name}_loss', loss, on_epoch=True) - self.log(f'{name}_loss_variational', loss_var, on_epoch=True) - self.log(f'{name}_loss_boundary_A', loss_bound_A, on_epoch=True) - self.log(f'{name}_loss_boundary_B', loss_bound_B, on_epoch=True) - self.log(f'{name}_loss_z_diff', loss_z_diff, on_epoch=True) - return loss diff --git a/mlcolvar/graph/cvs/cv.py b/mlcolvar/graph/cvs/cv.py deleted file mode 100644 index beeef02a..00000000 --- a/mlcolvar/graph/cvs/cv.py +++ /dev/null @@ -1,347 +0,0 @@ -import torch -import lightning -import numpy as np -import torch_geometric as tg -from typing import Dict, Any, List, Union, Tuple - -from mlcolvar.graph import data as gdata -from mlcolvar.graph.core.nn import models - -""" -Base collective variable class for Graph Neural Networks. -""" - -__all__ = ['GraphBaseCV'] - - -class GraphBaseCV(lightning.LightningModule): - """ - Base collective variable class for Graph Neural Networks. - - Parameters - ---------- - n_cvs: int - Number of components of the CV. - cutoff: float - Cutoff radius of the basis functions. Should be the same as the cutoff - radius used to build the graphs. - atomic_numbers: List[int] - The atomic numbers mapping, e.g. the `atomic_numbers` attribute of a - `mlcolvar.graph.data.GraphDataSet` instance. - model_name: str - Name of the GNN model. - model_options: Dict[Any, Any] - Model options. - optimizer_options: Dict[Any, Any] - Optimizer options. - """ - - def __init__( - self, - n_cvs: int, - cutoff: float, - atomic_numbers: List[int], - model_name: str, - model_options: Dict[Any, Any] = {}, - optimizer_options: Dict[Any, Any] = { - 'optimizer': {'lr': 1E-3, 'weight_decay': 1E-4}, - 'lr_scheduler': { - 'scheduler': torch.optim.lr_scheduler.ExponentialLR, - 'gamma': 0.9997 - } - }, - *args, - **kwargs, - ) -> None: - """ - Base CV class options. - """ - super().__init__(*args, **kwargs) - - self.register_buffer( - 'n_cvs', torch.tensor(n_cvs, dtype=torch.int64) - ) - self.register_buffer( - 'cutoff', torch.tensor(cutoff, dtype=torch.get_default_dtype()) - ) - self.register_buffer( - 'atomic_numbers', torch.tensor(atomic_numbers, dtype=torch.int64) - ) - - for key in ['cutoff', 'atomic_numbers']: - model_options.pop(key, None) - # For DeepTICA - n_out = model_options.pop('n_out', n_cvs) - - if not hasattr(models, model_name): - raise RuntimeError(f'Unknown model: {model_name}') - self._model = eval(f'models.{model_name}')( - n_out=n_out, - cutoff=cutoff, - atomic_numbers=atomic_numbers, - **model_options - ) - - self._optimizer_name = 'Adam' - self.optimizer_kwargs = {} - self.lr_scheduler_kwargs = {} - self._parse_optimizer(optimizer_options) - - self.save_hyperparameters(ignore=['n_cvs', 'cutoff', 'atomic_numbers']) - - def __setattr__(self, key, value) -> None: - # PyTorch overrides __setattr__ to raise a TypeError when you try to - # assignan attribute that is a Module to avoid substituting the model's - # component by mistake. This means we can't simply assign to loss_fn a - # lambda function after it's been assigned a Module, but we need to - # delete the Module first. - # https://github.com/pytorch/pytorch/issues/51896 - # https://stackoverflow.com/questions/61116433 - try: - super().__setattr__(key, value) - except TypeError as e: - # We make an exception only for loss_fn. - if (key == 'loss_fn') and ('cannot assign' in str(e)): - del self.loss_fn - super().__setattr__(key, value) - - def _parse_optimizer(self, options: dict) -> None: - """ - Parse optimizer options. - - Parameters - ---------- - options: Dict[Any, Any] - The options - """ - optimizer_kwargs = options.get('optimizer') - if optimizer_kwargs is not None: - self.optimizer_kwargs.update(optimizer_kwargs) - - lr_scheduler_kwargs = options.get('lr_scheduler') - if lr_scheduler_kwargs is not None: - self.lr_scheduler_kwargs.update(lr_scheduler_kwargs) - - def forward( - self, - data: Dict[str, torch.Tensor], - token: bool = False - ) -> torch.Tensor: - """ - The forward pass. - - Parameters - ---------- - data: Dict[str, torch.Tensor] - The data dict. Usually came from the `to_dict` method of a - `torch_geometric.data.Batch` object. - token: bool - To be used. - """ - data['positions'].requires_grad_(True) - data['node_attrs'].requires_grad_(True) - - return self._model(data) - - def validation_step(self, *args, **kwargs) -> torch.Tensor: - """ - Equal to training step if not overridden. Different behaviors for - train/valid step can be enforced in `training_step` based on the - `self.training` variable. - """ - return self.training_step(*args, **kwargs) - - def test_step(self, *args, **kwargs) -> torch.Tensor: - """ - Equal to training step if not overridden. Different behaviors for - train/valid step can be enforced in `training_step` based on the - `self.training` variable. - """ - return self.training_step(*args, **kwargs) - - @property - def optimizer_name(self) -> str: - """ - Optimizer name. Options can be set using optimizer_kwargs. Actual - optimizer will be return during training from configure_optimizer - function. - """ - return self._optimizer_name - - @optimizer_name.setter - def optimizer_name(self, optimizer_name: str) -> None: - self._optimizer_name = optimizer_name - - def configure_optimizers(self) -> Union[ - torch.optim.Optimizer, - Tuple[ - List[torch.optim.Optimizer], - List[torch.optim.lr_scheduler.LRScheduler] - ] - ]: - """ - Initialize the optimizer based on `self._optimizer_name` and - `self.optimizer_kwargs`. - """ - - optimizer = getattr(torch.optim, self._optimizer_name)( - self.parameters(), **self.optimizer_kwargs - ) - - if self.lr_scheduler_kwargs: - scheduler_cls = self.lr_scheduler_kwargs['scheduler'] - scheduler_kwargs = { - k: v for k, v in self.lr_scheduler_kwargs.items() - if k != 'scheduler' - } - lr_scheduler = scheduler_cls(optimizer, **scheduler_kwargs) - return [optimizer], [lr_scheduler] - else: - return optimizer - - @property - def example_input_array(self) -> Dict[str, torch.Tensor]: - """ - Example data. - """ - numbers = self._model.atomic_numbers.cpu().numpy().tolist() - positions = np.random.randn(2, len(numbers), 3) - cell = np.identity(3, dtype=float) * 0.2 - graph_labels = np.array([[[0]], [[1]]]) - node_labels = np.array([[0]] * len(numbers)) - z_table = gdata.atomic.AtomicNumberTable.from_zs(numbers) - - config = [ - gdata.atomic.Configuration( - atomic_numbers=numbers, - positions=positions[i], - cell=cell, - pbc=[True] * 3, - node_labels=node_labels, - graph_labels=graph_labels[i], - ) for i in range(2) - ] - dataset = gdata.create_dataset_from_configurations( - config, z_table, 0.1, show_progress=False - ) - - loader = gdata.GraphDataModule( - dataset, - lengths=(1.0,), - batch_size=10, - shuffle=False, - ) - loader.setup() - - return next(iter(loader.train_dataloader())) - - -def test_get_data() -> tg.data.Batch: - # TODO: This is not a real test, but a helper function for other tests. - # Maybe should change its name. - - numbers = [8, 1, 1] - positions = np.array( - [ - [[0.0, 0.0, 0.0], [0.07, 0.07, 0.0], [0.07, -0.07, 0.0]], - [[0.0, 0.0, 0.0], [-0.07, 0.07, 0.0], [0.07, 0.07, 0.0]], - [[0.0, 0.0, 0.0], [0.07, -0.07, 0.0], [0.07, 0.07, 0.0]], - [[0.0, 0.0, 0.0], [0.0, -0.07, 0.07], [0.0, 0.07, 0.07]], - [[0.0, 0.0, 0.0], [0.07, 0.0, 0.07], [-0.07, 0.0, 0.07]], - [[0.1, 0.0, 1.1], [0.17, 0.07, 1.1], [0.17, -0.07, 1.1]], - ], - dtype=np.float64 - ) - cell = np.identity(3, dtype=float) * 0.2 - graph_labels = np.array([[[0]], [[1]]] * 3) - node_labels = np.array([[0], [1], [1]]) - z_table = gdata.atomic.AtomicNumberTable.from_zs(numbers) - - config = [ - gdata.atomic.Configuration( - atomic_numbers=numbers, - positions=positions[i], - cell=cell, - pbc=[True] * 3, - node_labels=node_labels, - graph_labels=graph_labels[i], - ) for i in range(0, 6) - ] - dataset = gdata.create_dataset_from_configurations( - config, z_table, 0.1, show_progress=False - ) - - loader = gdata.GraphDataModule( - dataset, - lengths=(1.0,), - batch_size=10, - shuffle=False, - ) - loader.setup() - - return next(iter(loader.train_dataloader())) - - -def test_base_cv() -> None: - dtype = torch.get_default_dtype() - torch.set_default_dtype(torch.float64) - - cv = GraphBaseCV(2, 0.1, [1, 2, 3], 'GVPModel') - - assert cv.n_cvs == 2 - assert (cv.cutoff - 0.1) < 1E-12 - assert (cv._model.atomic_numbers == torch.tensor([1, 2, 3])).all() - - assert cv.optimizer_name == 'Adam' - objects = cv.configure_optimizers() - assert isinstance(objects[0][0], torch.optim.Adam) - assert isinstance(objects[1][0], torch.optim.lr_scheduler.ExponentialLR) - assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4 - assert objects[0][0].param_groups[0]['lr'] == 1E-3 - assert objects[1][0].gamma == 0.9997 - - cv.optimizer_name = 'SGD' - cv.optimizer_kwargs = {'lr': 2E-3, 'weight_decay': 1E-4} - objects = cv.configure_optimizers() - assert isinstance(objects[0][0], torch.optim.SGD) - assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4 - assert objects[0][0].param_groups[0]['lr'] == 2E-3 - - cv.lr_scheduler_kwargs = { - 'scheduler': torch.optim.lr_scheduler.StepLR, - 'gamma': 0.999, - 'step_size': 1 - } - objects = cv.configure_optimizers() - assert isinstance(objects[0][0], torch.optim.SGD) - assert isinstance(objects[1][0], torch.optim.lr_scheduler.StepLR) - assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4 - assert objects[0][0].param_groups[0]['lr'] == 2E-3 - assert objects[1][0].gamma == 0.999 - - cv = GraphBaseCV( - 2, - 0.1, - [1, 2, 3], - 'GVPModel', - optimizer_options={ - 'optimizer': {'lr': 2E-3, 'weight_decay': 1E-4}, - 'lr_scheduler': { - 'scheduler': torch.optim.lr_scheduler.ExponentialLR, - 'gamma': 0.9999 - } - } - ) - - objects = cv.configure_optimizers() - assert isinstance(objects[0][0], torch.optim.Adam) - assert isinstance(objects[1][0], torch.optim.lr_scheduler.ExponentialLR) - assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4 - assert objects[0][0].param_groups[0]['lr'] == 2E-3 - assert objects[1][0].gamma == 0.9999 - - torch.set_default_dtype(dtype) - - -if __name__ == '__main__': - test_base_cv() diff --git a/mlcolvar/graph/utils/__init__.py b/mlcolvar/graph/utils/__init__.py deleted file mode 100644 index c6fe1591..00000000 --- a/mlcolvar/graph/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from . import io -from . import progress -from . import torch_tools -from . import timelagged diff --git a/mlcolvar/graph/utils/progress.py b/mlcolvar/graph/utils/progress.py deleted file mode 100644 index f71dd249..00000000 --- a/mlcolvar/graph/utils/progress.py +++ /dev/null @@ -1,66 +0,0 @@ -import sys -import time -import typing - -""" -A simple progress bar. -""" - -__all__ = ['pbar'] - - -def pbar( - item: typing.List[int], - prefix: str = '', - size: int = 25, - frequency: int = 0.05, - use_unicode: bool = True, - file: typing.TextIO = sys.stdout -): - """ - A simple progress bar. Taken from stackoverflow: - https://stackoverflow.com/questions/3160699 - - Parameters - ---------- - it : List[int] - The looped item. - prefix : str - Prefix of the bar. - size : int - Size of the bar. - frequency : float - Flush frequency of the bar. - use_unicode : bool - If use unicode char to draw the bar. - file : TextIO - The output file. - """ - if (use_unicode): - c_1 = '' - c_2 = '█' - c_3 = '━' - c_4 = '' - else: - c_1 = '|' - c_2 = '|' - c_3 = '-' - c_4 = '|' - count = len(item) - start = time.time() - interval = max(int(count * frequency), 1) - - def show(j) -> None: - x = int(size * j / count) - remaining = ((time.time() - start) / j) * (count - j) - mins, sec = divmod(remaining, 60) - time_string = f'{int(mins):02}:{sec:02.1f}' - output = f' {prefix} {c_1}{c_2 * (x - 1) + c_4}{c_3 * (size - x)} ' + \ - f'{j}/{count} Est. {time_string}' - print('\x1b[1A\x1b[2K' + output, file=file, flush=True) - - for i, it in enumerate(item): - yield it - if ((i % interval) == 0 or i in [0, (count - 1)]): - show(i + 1) - print(flush=True, file=file)