From e9dca7dc4197fe5a99633832c7dca7d53d7e54f8 Mon Sep 17 00:00:00 2001
From: EnricoTrizio <enricotrizio@gmail.com>
Date: Mon, 16 Dec 2024 12:11:13 +0100
Subject: [PATCH] Removed already ported files

---
 mlcolvar/graph/README.md                  |  15 -
 mlcolvar/graph/__init__.py                |  11 -
 mlcolvar/graph/cvs/__init__.py            |   4 -
 mlcolvar/graph/cvs/committor/__init__.py  |   3 -
 mlcolvar/graph/cvs/committor/committor.py | 176 -----------
 mlcolvar/graph/cvs/cv.py                  | 347 ----------------------
 mlcolvar/graph/utils/__init__.py          |   4 -
 mlcolvar/graph/utils/progress.py          |  66 ----
 8 files changed, 626 deletions(-)
 delete mode 100644 mlcolvar/graph/README.md
 delete mode 100644 mlcolvar/graph/__init__.py
 delete mode 100644 mlcolvar/graph/cvs/__init__.py
 delete mode 100644 mlcolvar/graph/cvs/committor/__init__.py
 delete mode 100644 mlcolvar/graph/cvs/committor/committor.py
 delete mode 100644 mlcolvar/graph/cvs/cv.py
 delete mode 100644 mlcolvar/graph/utils/__init__.py
 delete mode 100644 mlcolvar/graph/utils/progress.py
diff --git a/mlcolvar/graph/README.md b/mlcolvar/graph/README.md
deleted file mode 100644
index 4d40c7e0..00000000
--- a/mlcolvar/graph/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# The Graph Neural Network (GNN) Module in MLCOLVAR.
-
-<p align="center">
-    <img src=".github/logo.png" width="400"/>
-</p>
-
-## INTRODUCTION
-
-WIP
-
-## DEPENDENCIES
-
-- `pytorch_geometric` >= 2.5
-- `matscipy`
-- `mdtraj`
diff --git a/mlcolvar/graph/__init__.py b/mlcolvar/graph/__init__.py
deleted file mode 100644
index 1e9f9239..00000000
--- a/mlcolvar/graph/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""The graph neural network module."""
-
-from . import utils
-from . import core
-from . import data
-from . import cvs
-from . import explain
-
-utils.torch_tools.set_default_dtype('float32')
-# torch_scatter will compile problems.
-__import__('torch_geometric').typing.WITH_TORCH_SCATTER = False
diff --git a/mlcolvar/graph/cvs/__init__.py b/mlcolvar/graph/cvs/__init__.py
deleted file mode 100644
index 7833d8aa..00000000
--- a/mlcolvar/graph/cvs/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .cv import GraphBaseCV
-from .supervised import GraphDeepTDA
-from .timelagged import GraphDeepTICA
-from .committor import GraphCommittor
diff --git a/mlcolvar/graph/cvs/committor/__init__.py b/mlcolvar/graph/cvs/committor/__init__.py
deleted file mode 100644
index 3af5089f..00000000
--- a/mlcolvar/graph/cvs/committor/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from . import utils
-from .utils import get_dataset_kolmogorov_bias, compute_committor_weights
-from .committor import GraphCommittor
diff --git a/mlcolvar/graph/cvs/committor/committor.py b/mlcolvar/graph/cvs/committor/committor.py
deleted file mode 100644
index 7478c326..00000000
--- a/mlcolvar/graph/cvs/committor/committor.py
+++ /dev/null
@@ -1,176 +0,0 @@
-
-import torch
-import torch_geometric as tg
-from typing import Dict, Any, List
-
-from mlcolvar.core.nn.utils import Custom_Sigmoid
-
-from mlcolvar.graph.cvs import GraphBaseCV
-from mlcolvar.graph.cvs.cv import test_get_data
-from mlcolvar.graph.cvs.committor.utils import GraphCommittorLoss
-from mlcolvar.graph.utils import torch_tools
-
-"""
-Data-driven learning of committor function, based on Graph Neural Networks
-(GNNs).
-"""
-
-__all__ = ['GraphCommittor']
-
-
-class GraphCommittor(GraphBaseCV):
-    """
-    Data-driven learning of committor function, based on GNNs.
-
-    The committor function q is expressed as the output of a neural network
-    optimized with a self-consistent approach based on the Kolmogorov's
-    variational principle for the committor and on the imposition of its
-    boundary conditions.
-
-    Parameters
-    ----------
-    cutoff: float
-        Cutoff radius of the basis functions. Should be the same as the cutoff
-        radius used to build the graphs.
-    atomic_numbers: List[int]
-        The atomic numbers mapping, e.g. the `atomic_numbers` attribute of a
-        `mlcolvar.graph.data.GraphDataSet` instance.
-    atomic_masses : List[float]
-        List of masses of all the atoms we are using.
-    model_name: str
-        Name of the GNN model.
-    model_options: Dict[Any, Any]
-        Model options.
-    extra_loss_options: Dict[Any, Any]
-        Extra loss function options.
-    optimizer_options: Dict[Any, Any]
-        Optimizer options.
-
-    References
-    ----------
-    .. [*] P. Kang, E. Trizio, and M. Parrinello, "Computing the committor
-        using the committor to study the transition state ensemble",
-        Nat. Comput. Sci., 2024, DOI: 10.1038/s43588-024-00645-0
-
-    See also
-    --------
-    mlcolvar.cvs.committor.Committor
-        The feedforward NN based ML committor module.
-    mlcolvar.graph.cvs.committor.utils.GraphCommittorLoss
-        Kolmogorov's variational optimization of committor and imposition of
-        boundary conditions.
-    mlcolvar.graph.cvs.committor.utils.compute_committor_weights
-        Utils to compute the appropriate weights for the training set
-    """
-    def __init__(
-        self,
-        cutoff: float,
-        atomic_numbers: List[int],
-        atomic_masses: List[float],
-        model_name: str = 'GVPModel',
-        model_options: Dict[Any, Any] = {},
-        extra_loss_options: Dict[Any, Any] = {
-            'alpha': 1.0, 'gamma': 100.0, 'delta_f': 0.0, 'sigmoid_p': 3.0
-        },
-        optimizer_options: Dict[Any, Any] = {},
-        **kwargs,
-    ) -> None:
-        if model_options.pop('n_out', None) is not None:
-            raise RuntimeError(
-                'The `n_out` key of parameter `model_options` will be ignored!'
-            )
-        model_options['n_out'] = 1
-        if optimizer_options != {}:
-            kwargs['optimizer_options'] = optimizer_options
-
-        super().__init__(
-            2, cutoff, atomic_numbers, model_name, model_options, **kwargs
-        )
-
-        atomic_masses = torch.tensor(
-            atomic_masses, dtype=torch.get_default_dtype()
-        )
-        self.register_buffer('atomic_masses', atomic_masses)
-        self.register_buffer('is_committor', torch.tensor(1, dtype=int))
-
-        self.sigmoid = Custom_Sigmoid(extra_loss_options.get('sigmoid_p', 3.0))
-
-        self.loss_fn = GraphCommittorLoss(
-            atomic_masses,
-            alpha=float(extra_loss_options.get('alpha', 1.0)),
-            gamma=float(extra_loss_options.get('gamma', 10000.0)),
-            delta_f=float(extra_loss_options.get('delta_f', 0.0)),
-        )
-
-    def forward_nn(
-        self,
-        data: Dict[str, torch.Tensor],
-        token: bool = False
-    ) -> torch.Tensor:
-        """
-        The forward pass for the NN.
-
-        Parameters
-        ----------
-        data: Dict[str, torch.Tensor]
-            The data dict. Usually came from the `to_dict` method of a
-            `torch_geometric.data.Batch` object.
-        token: bool
-            To be used.
-        """
-        data['positions'].requires_grad_(True)
-        data['node_attrs'].requires_grad_(True)
-
-        return self._model(data)
-
-    def forward(
-        self,
-        data: Dict[str, torch.Tensor],
-        token: bool = False
-    ) -> torch.Tensor:
-        """
-        The forward pass.
-
-        Parameters
-        ----------
-        data: Dict[str, torch.Tensor]
-            The data dict. Usually came from the `to_dict` method of a
-            `torch_geometric.data.Batch` object.
-        token: bool
-            To be used.
-        """
-        z = self.forward_nn(data)
-        q = self.sigmoid(z)
-
-        return torch.hstack([z, q])
-
-    def training_step(
-        self, train_batch: tg.data.Batch, *args, **kwargs
-    ) -> torch.Tensor:
-        """
-        Compute and return the training loss and record metrics.
-
-        Parameters
-        ----------
-        train_batch: torch_geometric.data.Batch
-            The data batch.
-        """
-        torch.set_grad_enabled(True)
-
-        batch_dict = train_batch.to_dict()
-        z = self.forward_nn(batch_dict)
-        q = self.sigmoid(z)
-
-        loss, loss_var, loss_bound_A, loss_bound_B = self.loss_fn(
-            batch_dict, q
-        )
-        loss_z_diff = (z.max().abs() - z.min().abs()).pow(2)
-        loss = loss + loss_z_diff
-
-        name = 'train' if self.training else 'valid'
-        self.log(f'{name}_loss', loss, on_epoch=True)
-        self.log(f'{name}_loss_variational', loss_var, on_epoch=True)
-        self.log(f'{name}_loss_boundary_A', loss_bound_A, on_epoch=True)
-        self.log(f'{name}_loss_boundary_B', loss_bound_B, on_epoch=True)
-        self.log(f'{name}_loss_z_diff', loss_z_diff, on_epoch=True)
-        return loss
diff --git a/mlcolvar/graph/cvs/cv.py b/mlcolvar/graph/cvs/cv.py
deleted file mode 100644
index beeef02a..00000000
--- a/mlcolvar/graph/cvs/cv.py
+++ /dev/null
@@ -1,347 +0,0 @@
-import torch
-import lightning
-import numpy as np
-import torch_geometric as tg
-from typing import Dict, Any, List, Union, Tuple
-
-from mlcolvar.graph import data as gdata
-from mlcolvar.graph.core.nn import models
-
-"""
-Base collective variable class for Graph Neural Networks.
-"""
-
-__all__ = ['GraphBaseCV']
-
-
-class GraphBaseCV(lightning.LightningModule):
-    """
-    Base collective variable class for Graph Neural Networks.
-
-    Parameters
-    ----------
-    n_cvs: int
-        Number of components of the CV.
-    cutoff: float
-        Cutoff radius of the basis functions. Should be the same as the cutoff
-        radius used to build the graphs.
-    atomic_numbers: List[int]
-        The atomic numbers mapping, e.g. the `atomic_numbers` attribute of a
-        `mlcolvar.graph.data.GraphDataSet` instance.
-    model_name: str
-        Name of the GNN model.
-    model_options: Dict[Any, Any]
-        Model options.
-    optimizer_options: Dict[Any, Any]
-        Optimizer options.
-    """
-
-    def __init__(
-        self,
-        n_cvs: int,
-        cutoff: float,
-        atomic_numbers: List[int],
-        model_name: str,
-        model_options: Dict[Any, Any] = {},
-        optimizer_options: Dict[Any, Any] = {
-            'optimizer': {'lr': 1E-3, 'weight_decay': 1E-4},
-            'lr_scheduler': {
-                'scheduler': torch.optim.lr_scheduler.ExponentialLR,
-                'gamma': 0.9997
-            }
-        },
-        *args,
-        **kwargs,
-    ) -> None:
-        """
-        Base CV class options.
-        """
-        super().__init__(*args, **kwargs)
-
-        self.register_buffer(
-            'n_cvs', torch.tensor(n_cvs, dtype=torch.int64)
-        )
-        self.register_buffer(
-            'cutoff', torch.tensor(cutoff, dtype=torch.get_default_dtype())
-        )
-        self.register_buffer(
-            'atomic_numbers', torch.tensor(atomic_numbers, dtype=torch.int64)
-        )
-
-        for key in ['cutoff', 'atomic_numbers']:
-            model_options.pop(key, None)
-        # For DeepTICA
-        n_out = model_options.pop('n_out', n_cvs)
-
-        if not hasattr(models, model_name):
-            raise RuntimeError(f'Unknown model: {model_name}')
-        self._model = eval(f'models.{model_name}')(
-            n_out=n_out,
-            cutoff=cutoff,
-            atomic_numbers=atomic_numbers,
-            **model_options
-        )
-
-        self._optimizer_name = 'Adam'
-        self.optimizer_kwargs = {}
-        self.lr_scheduler_kwargs = {}
-        self._parse_optimizer(optimizer_options)
-
-        self.save_hyperparameters(ignore=['n_cvs', 'cutoff', 'atomic_numbers'])
-
-    def __setattr__(self, key, value) -> None:
-        # PyTorch overrides __setattr__ to raise a TypeError when you try to
-        # assignan attribute that is a Module to avoid substituting the model's
-        # component by mistake. This means we can't simply assign to loss_fn a
-        # lambda function after it's been assigned a Module, but we need to
-        # delete the Module first.
-        #    https://github.com/pytorch/pytorch/issues/51896
-        #    https://stackoverflow.com/questions/61116433
-        try:
-            super().__setattr__(key, value)
-        except TypeError as e:
-            # We make an exception only for loss_fn.
-            if (key == 'loss_fn') and ('cannot assign' in str(e)):
-                del self.loss_fn
-                super().__setattr__(key, value)
-
-    def _parse_optimizer(self, options: dict) -> None:
-        """
-        Parse optimizer options.
-
-        Parameters
-        ----------
-        options: Dict[Any, Any]
-            The options
-        """
-        optimizer_kwargs = options.get('optimizer')
-        if optimizer_kwargs is not None:
-            self.optimizer_kwargs.update(optimizer_kwargs)
-
-        lr_scheduler_kwargs = options.get('lr_scheduler')
-        if lr_scheduler_kwargs is not None:
-            self.lr_scheduler_kwargs.update(lr_scheduler_kwargs)
-
-    def forward(
-        self,
-        data: Dict[str, torch.Tensor],
-        token: bool = False
-    ) -> torch.Tensor:
-        """
-        The forward pass.
-
-        Parameters
-        ----------
-        data: Dict[str, torch.Tensor]
-            The data dict. Usually came from the `to_dict` method of a
-            `torch_geometric.data.Batch` object.
-        token: bool
-            To be used.
-        """
-        data['positions'].requires_grad_(True)
-        data['node_attrs'].requires_grad_(True)
-
-        return self._model(data)
-
-    def validation_step(self, *args, **kwargs) -> torch.Tensor:
-        """
-        Equal to training step if not overridden. Different behaviors for
-        train/valid step can be enforced in `training_step` based on the
-        `self.training` variable.
-        """
-        return self.training_step(*args, **kwargs)
-
-    def test_step(self, *args, **kwargs) -> torch.Tensor:
-        """
-        Equal to training step if not overridden. Different behaviors for
-        train/valid step can be enforced in `training_step` based on the
-        `self.training` variable.
-        """
-        return self.training_step(*args, **kwargs)
-
-    @property
-    def optimizer_name(self) -> str:
-        """
-        Optimizer name. Options can be set using optimizer_kwargs. Actual
-        optimizer will be return during training from configure_optimizer
-        function.
-        """
-        return self._optimizer_name
-
-    @optimizer_name.setter
-    def optimizer_name(self, optimizer_name: str) -> None:
-        self._optimizer_name = optimizer_name
-
-    def configure_optimizers(self) -> Union[
-        torch.optim.Optimizer,
-        Tuple[
-            List[torch.optim.Optimizer],
-            List[torch.optim.lr_scheduler.LRScheduler]
-        ]
-    ]:
-        """
-        Initialize the optimizer based on `self._optimizer_name` and
-        `self.optimizer_kwargs`.
-        """
-
-        optimizer = getattr(torch.optim, self._optimizer_name)(
-            self.parameters(), **self.optimizer_kwargs
-        )
-
-        if self.lr_scheduler_kwargs:
-            scheduler_cls = self.lr_scheduler_kwargs['scheduler']
-            scheduler_kwargs = {
-                k: v for k, v in self.lr_scheduler_kwargs.items()
-                if k != 'scheduler'
-            }
-            lr_scheduler = scheduler_cls(optimizer, **scheduler_kwargs)
-            return [optimizer], [lr_scheduler]
-        else:
-            return optimizer
-
-    @property
-    def example_input_array(self) -> Dict[str, torch.Tensor]:
-        """
-        Example data.
-        """
-        numbers = self._model.atomic_numbers.cpu().numpy().tolist()
-        positions = np.random.randn(2, len(numbers), 3)
-        cell = np.identity(3, dtype=float) * 0.2
-        graph_labels = np.array([[[0]], [[1]]])
-        node_labels = np.array([[0]] * len(numbers))
-        z_table = gdata.atomic.AtomicNumberTable.from_zs(numbers)
-
-        config = [
-            gdata.atomic.Configuration(
-                atomic_numbers=numbers,
-                positions=positions[i],
-                cell=cell,
-                pbc=[True] * 3,
-                node_labels=node_labels,
-                graph_labels=graph_labels[i],
-            ) for i in range(2)
-        ]
-        dataset = gdata.create_dataset_from_configurations(
-            config, z_table, 0.1, show_progress=False
-        )
-
-        loader = gdata.GraphDataModule(
-            dataset,
-            lengths=(1.0,),
-            batch_size=10,
-            shuffle=False,
-        )
-        loader.setup()
-
-        return next(iter(loader.train_dataloader()))
-
-
-def test_get_data() -> tg.data.Batch:
-    # TODO: This is not a real test, but a helper function for other tests.
-    # Maybe should change its name.
-
-    numbers = [8, 1, 1]
-    positions = np.array(
-        [
-            [[0.0, 0.0, 0.0], [0.07, 0.07, 0.0], [0.07, -0.07, 0.0]],
-            [[0.0, 0.0, 0.0], [-0.07, 0.07, 0.0], [0.07, 0.07, 0.0]],
-            [[0.0, 0.0, 0.0], [0.07, -0.07, 0.0], [0.07, 0.07, 0.0]],
-            [[0.0, 0.0, 0.0], [0.0, -0.07, 0.07], [0.0, 0.07, 0.07]],
-            [[0.0, 0.0, 0.0], [0.07, 0.0, 0.07], [-0.07, 0.0, 0.07]],
-            [[0.1, 0.0, 1.1], [0.17, 0.07, 1.1], [0.17, -0.07, 1.1]],
-        ],
-        dtype=np.float64
-    )
-    cell = np.identity(3, dtype=float) * 0.2
-    graph_labels = np.array([[[0]], [[1]]] * 3)
-    node_labels = np.array([[0], [1], [1]])
-    z_table = gdata.atomic.AtomicNumberTable.from_zs(numbers)
-
-    config = [
-        gdata.atomic.Configuration(
-            atomic_numbers=numbers,
-            positions=positions[i],
-            cell=cell,
-            pbc=[True] * 3,
-            node_labels=node_labels,
-            graph_labels=graph_labels[i],
-        ) for i in range(0, 6)
-    ]
-    dataset = gdata.create_dataset_from_configurations(
-        config, z_table, 0.1, show_progress=False
-    )
-
-    loader = gdata.GraphDataModule(
-        dataset,
-        lengths=(1.0,),
-        batch_size=10,
-        shuffle=False,
-    )
-    loader.setup()
-
-    return next(iter(loader.train_dataloader()))
-
-
-def test_base_cv() -> None:
-    dtype = torch.get_default_dtype()
-    torch.set_default_dtype(torch.float64)
-
-    cv = GraphBaseCV(2, 0.1, [1, 2, 3], 'GVPModel')
-
-    assert cv.n_cvs == 2
-    assert (cv.cutoff - 0.1) < 1E-12
-    assert (cv._model.atomic_numbers == torch.tensor([1, 2, 3])).all()
-
-    assert cv.optimizer_name == 'Adam'
-    objects = cv.configure_optimizers()
-    assert isinstance(objects[0][0], torch.optim.Adam)
-    assert isinstance(objects[1][0], torch.optim.lr_scheduler.ExponentialLR)
-    assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4
-    assert objects[0][0].param_groups[0]['lr'] == 1E-3
-    assert objects[1][0].gamma == 0.9997
-
-    cv.optimizer_name = 'SGD'
-    cv.optimizer_kwargs = {'lr': 2E-3, 'weight_decay': 1E-4}
-    objects = cv.configure_optimizers()
-    assert isinstance(objects[0][0], torch.optim.SGD)
-    assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4
-    assert objects[0][0].param_groups[0]['lr'] == 2E-3
-
-    cv.lr_scheduler_kwargs = {
-        'scheduler': torch.optim.lr_scheduler.StepLR,
-        'gamma': 0.999,
-        'step_size': 1
-    }
-    objects = cv.configure_optimizers()
-    assert isinstance(objects[0][0], torch.optim.SGD)
-    assert isinstance(objects[1][0], torch.optim.lr_scheduler.StepLR)
-    assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4
-    assert objects[0][0].param_groups[0]['lr'] == 2E-3
-    assert objects[1][0].gamma == 0.999
-
-    cv = GraphBaseCV(
-        2,
-        0.1,
-        [1, 2, 3],
-        'GVPModel',
-        optimizer_options={
-            'optimizer': {'lr': 2E-3, 'weight_decay': 1E-4},
-            'lr_scheduler': {
-                'scheduler': torch.optim.lr_scheduler.ExponentialLR,
-                'gamma': 0.9999
-            }
-        }
-    )
-
-    objects = cv.configure_optimizers()
-    assert isinstance(objects[0][0], torch.optim.Adam)
-    assert isinstance(objects[1][0], torch.optim.lr_scheduler.ExponentialLR)
-    assert objects[0][0].param_groups[0]['weight_decay'] == 1E-4
-    assert objects[0][0].param_groups[0]['lr'] == 2E-3
-    assert objects[1][0].gamma == 0.9999
-
-    torch.set_default_dtype(dtype)
-
-
-if __name__ == '__main__':
-    test_base_cv()
diff --git a/mlcolvar/graph/utils/__init__.py b/mlcolvar/graph/utils/__init__.py
deleted file mode 100644
index c6fe1591..00000000
--- a/mlcolvar/graph/utils/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from . import io
-from . import progress
-from . import torch_tools
-from . import timelagged
diff --git a/mlcolvar/graph/utils/progress.py b/mlcolvar/graph/utils/progress.py
deleted file mode 100644
index f71dd249..00000000
--- a/mlcolvar/graph/utils/progress.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import sys
-import time
-import typing
-
-"""
-A simple progress bar.
-"""
-
-__all__ = ['pbar']
-
-
-def pbar(
-    item: typing.List[int],
-    prefix: str = '',
-    size: int = 25,
-    frequency: int = 0.05,
-    use_unicode: bool = True,
-    file: typing.TextIO = sys.stdout
-):
-    """
-    A simple progress bar. Taken from stackoverflow:
-    https://stackoverflow.com/questions/3160699
-
-    Parameters
-    ----------
-    it : List[int]
-        The looped item.
-    prefix : str
-        Prefix of the bar.
-    size : int
-        Size of the bar.
-    frequency : float
-        Flush frequency of the bar.
-    use_unicode : bool
-        If use unicode char to draw the bar.
-    file : TextIO
-        The output file.
-    """
-    if (use_unicode):
-        c_1 = ''
-        c_2 = '█'
-        c_3 = '━'
-        c_4 = ''
-    else:
-        c_1 = '|'
-        c_2 = '|'
-        c_3 = '-'
-        c_4 = '|'
-    count = len(item)
-    start = time.time()
-    interval = max(int(count * frequency), 1)
-
-    def show(j) -> None:
-        x = int(size * j / count)
-        remaining = ((time.time() - start) / j) * (count - j)
-        mins, sec = divmod(remaining, 60)
-        time_string = f'{int(mins):02}:{sec:02.1f}'
-        output = f' {prefix} {c_1}{c_2 * (x - 1) + c_4}{c_3 * (size - x)} ' + \
-                 f'{j}/{count} Est. {time_string}'
-        print('\x1b[1A\x1b[2K' + output, file=file, flush=True)
-
-    for i, it in enumerate(item):
-        yield it
-        if ((i % interval) == 0 or i in [0, (count - 1)]):
-            show(i + 1)
-    print(flush=True, file=file)