Merge branch 'devel' into 3742

deepmodeling · May 24, 2024 · e3f426d · e3f426d
2 parents e0c59c2 + 7b16911
commit e3f426d
Show file tree

Hide file tree

Showing 63 changed files with 1,168 additions and 245 deletions.
diff --git a/CITATIONS.bib b/CITATIONS.bib
@@ -110,19 +110,22 @@ @article{Wang_NuclFusion_2022_v62_p126013
   doi          = {10.1088/1741-4326/ac888b},
 }
 
-@misc{Zhang_2022_DPA1,
-  annote       = {attention-based descriptor},
+@article{Zhang_NpjComputMater_2024_v10_p94,
+  annote       = {DPA-1, attention-based descriptor},
   author       = {
-    Zhang, Duo and Bi, Hangrui and Dai, Fu-Zhi and Jiang, Wanrun and Zhang,
-    Linfeng and Wang, Han
+    Duo Zhang and Hangrui Bi and Fu-Zhi Dai and Wanrun Jiang and Xinzijian Liu
+    and Linfeng Zhang and Han Wang
   },
   title        = {
-    {DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular
-    Simulation}
+    {Pretraining of attention-based deep learning potential model for molecular
+    simulation}
   },
-  publisher    = {arXiv},
-  year         = 2022,
-  doi          = {10.48550/arXiv.2208.08236},
+  journal      = {Npj Comput. Mater},
+  year         = 2024,
+  volume       = 10,
+  issue        = 1,
+  pages        = 94,
+  doi          = {10.1038/s41524-024-01278-7},
 }
 
 @misc{Zhang_2023_DPA2,

diff --git a/deepmd/common.py b/deepmd/common.py
@@ -16,7 +16,6 @@
     Any,
     Dict,
     List,
-    Optional,
     Set,
     TypeVar,
     Union,
@@ -39,8 +38,6 @@
 )
 
 __all__ = [
-    "data_requirement",
-    "add_data_requirement",
     "select_idx_map",
     "make_default_mesh",
     "j_must_have",
@@ -78,64 +75,6 @@
     )
 
 
-# TODO: refactor data_requirement to make it not a global variable
-# this is not a good way to do things. This is some global variable to which
-# anyone can write and there is no good way to keep track of the changes
-data_requirement = {}
-
-
-def add_data_requirement(
-    key: str,
-    ndof: int,
-    atomic: bool = False,
-    must: bool = False,
-    high_prec: bool = False,
-    type_sel: Optional[bool] = None,
-    repeat: int = 1,
-    default: float = 0.0,
-    dtype: Optional[np.dtype] = None,
-    output_natoms_for_type_sel: bool = False,
-):
-    """Specify data requirements for training.
-
-    Parameters
-    ----------
-    key : str
-        type of data stored in corresponding `*.npy` file e.g. `forces` or `energy`
-    ndof : int
-        number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces
-        have `atomic=True` and `ndof=3`
-    atomic : bool, optional
-        specifies whwther the `ndof` keyworrd applies to per atom quantity or not,
-        by default False
-    must : bool, optional
-        specifi if the `*.npy` data file must exist, by default False
-    high_prec : bool, optional
-        if true load data to `np.float64` else `np.float32`, by default False
-    type_sel : bool, optional
-        select only certain type of atoms, by default None
-    repeat : int, optional
-        if specify repaeat data `repeat` times, by default 1
-    default : float, optional, default=0.
-        default value of data
-    dtype : np.dtype, optional
-        the dtype of data, overwrites `high_prec` if provided
-    output_natoms_for_type_sel : bool, optional
-        if True and type_sel is True, the atomic dimension will be natoms instead of nsel
-    """
-    data_requirement[key] = {
-        "ndof": ndof,
-        "atomic": atomic,
-        "must": must,
-        "high_prec": high_prec,
-        "type_sel": type_sel,
-        "repeat": repeat,
-        "default": default,
-        "dtype": dtype,
-        "output_natoms_for_type_sel": output_natoms_for_type_sel,
-    }
-
-
 def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
     """Build map of indices for element supplied element types from all atoms list.
 

diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -9,6 +9,9 @@
 
 import numpy as np
 
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
 from deepmd.dpmodel.output_def import (
     FittingOutputDef,
     OutputVariableDef,
@@ -25,7 +28,7 @@
 BaseAtomicModel_ = make_base_atomic_model(np.ndarray)
 
 
-class BaseAtomicModel(BaseAtomicModel_):
+class BaseAtomicModel(BaseAtomicModel_, NativeOP):
     def __init__(
         self,
         type_map: List[str],
@@ -183,6 +186,24 @@ def forward_common_atomic(
 
         return ret_dict
 
+    def call(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        nlist: np.ndarray,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        return self.forward_common_atomic(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+        )
+
     def serialize(self) -> dict:
         return {
             "type_map": self.type_map,

diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
@@ -236,6 +236,8 @@ def call_lower(
             model_predict = self.output_type_cast(model_predict, input_prec)
             return model_predict
 
+        forward_lower = call_lower
+
         def input_type_cast(
             self,
             coord: np.ndarray,
@@ -473,4 +475,8 @@ def atomic_output_def(self) -> FittingOutputDef:
             """Get the output def of the atomic model."""
             return self.atomic_model.atomic_output_def()
 
+        def get_ntypes(self) -> int:
+            """Get the number of types."""
+            return len(self.get_type_map())
+
     return CM
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
@@ -9,10 +9,42 @@
 import numpy as np
 
 from .region import (
+    normalize_coord,
     to_face_distance,
 )
 
 
+def extend_input_and_build_neighbor_list(
+    coord,
+    atype,
+    rcut: float,
+    sel: List[int],
+    mixed_types: bool = False,
+    box: Optional[np.ndarray] = None,
+):
+    nframes, nloc = atype.shape[:2]
+    if box is not None:
+        coord_normalized = normalize_coord(
+            coord.reshape(nframes, nloc, 3),
+            box.reshape(nframes, 3, 3),
+        )
+    else:
+        coord_normalized = coord
+    extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+        coord_normalized, atype, box, rcut
+    )
+    nlist = build_neighbor_list(
+        extended_coord,
+        extended_atype,
+        nloc,
+        rcut,
+        sel,
+        distinguish_types=(not mixed_types),
+    )
+    extended_coord = extended_coord.reshape(nframes, -1, 3)
+    return extended_coord, extended_atype, mapping, nlist
+
+
 ## translated from torch implemantation by chatgpt
 def build_neighbor_list(
     coord: np.ndarray,

diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -256,6 +256,26 @@ def forward_common_atomic(
 
         return ret_dict
 
+    def forward(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        comm_dict: Optional[Dict[str, torch.Tensor]] = None,
+    ) -> Dict[str, torch.Tensor]:
+        return self.forward_common_atomic(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            comm_dict=comm_dict,
+        )
+
     def serialize(self) -> dict:
         return {
             "type_map": self.type_map,

diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py
@@ -23,8 +23,6 @@
 from deepmd.common import (
     VALID_ACTIVATION,
     VALID_PRECISION,
-    add_data_requirement,
-    data_requirement,
     expand_sys_str,
     get_np_precision,
     j_loader,
@@ -47,8 +45,6 @@
 
 __all__ = [
     # from deepmd.common
-    "data_requirement",
-    "add_data_requirement",
     "select_idx_map",
     "make_default_mesh",
     "j_must_have",
@@ -291,8 +287,6 @@ def wrapper(self, *args, **kwargs):
 def clear_session():
     """Reset all state generated by DeePMD-kit."""
     tf.reset_default_graph()
-    # TODO: remove this line when data_requirement is not a global variable
-    data_requirement.clear()
     _TF_VERSION = Version(TF_VERSION)
     if _TF_VERSION < Version("2.4.0"):
         tf.train.experimental.disable_mixed_precision_graph_rewrite()

diff --git a/deepmd/tf/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py
@@ -23,6 +23,9 @@
 from deepmd.tf.utils import (
     PluginVariant,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 from deepmd.utils.plugin import (
     make_plugin_registry,
 )
@@ -512,3 +515,8 @@ def serialize(self, suffix: str = "") -> dict:
             Name suffix to identify this descriptor
         """
         raise NotImplementedError(f"Not implemented in class {self.__name__}")
+
+    @property
+    def input_requirement(self) -> List[DataRequirementItem]:
+        """Return data requirements needed for the model input."""
+        return []
diff --git a/deepmd/tf/descriptor/se_a_ebd.py b/deepmd/tf/descriptor/se_a_ebd.py
@@ -6,9 +6,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    add_data_requirement,
-)
 from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
@@ -18,6 +15,9 @@
     embedding_net,
     one_layer,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 from .descriptor import (
     Descriptor,
@@ -110,8 +110,6 @@ def __init__(
         self.type_nlayer = type_nlayer
         self.type_one_side = type_one_side
         self.numb_aparam = numb_aparam
-        if self.numb_aparam > 0:
-            add_data_requirement("aparam", 3, atomic=True, must=True, high_prec=False)
 
     def build(
         self,
@@ -600,3 +598,15 @@ def _ebd_filter(
         result = tf.reshape(result, [-1, outputs_size_2 * outputs_size])
 
         return result, qmat
+
+    @property
+    def input_requirement(self) -> List[DataRequirementItem]:
+        """Return data requirements needed for the model input."""
+        data_requirement = super().input_requirement
+        if self.numb_aparam > 0:
+            data_requirement.append(
+                DataRequirementItem(
+                    "aparam", 3, atomic=True, must=True, high_prec=False
+                )
+            )
+        return data_requirement
diff --git a/deepmd/tf/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py
@@ -7,9 +7,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    add_data_requirement,
-)
 from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
@@ -20,6 +17,9 @@
 from deepmd.tf.utils.sess import (
     run_sess,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 from .descriptor import (
     Descriptor,
@@ -361,8 +361,6 @@ def __init__(
         self.dstd = None
         self.davg = None
 
-        add_data_requirement("efield", 3, atomic=True, must=True, high_prec=False)
-
         self.place_holders = {}
         avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
             GLOBAL_NP_FLOAT_PRECISION
@@ -586,3 +584,12 @@ def _compute_dstats_sys_smth(
             sysr2.append(sumr2)
             sysa2.append(suma2)
         return sysr, sysr2, sysa, sysa2, sysn
+
+    @property
+    def input_requirement(self) -> List[DataRequirementItem]:
+        """Return data requirements needed for the model input."""
+        data_requirement = super().input_requirement
+        data_requirement.append(
+            DataRequirementItem("efield", 3, atomic=True, must=True, high_prec=False)
+        )
+        return data_requirement
diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
@@ -195,6 +195,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
         train_data = get_data(
             jdata["training"]["training_data"], rcut, ipt_type_map, modifier
         )
+        train_data.add_data_requirements(model.data_requirements)
         train_data.print_summary("training")
         if jdata["training"].get("validation_data", None) is not None:
             valid_data = get_data(
@@ -203,6 +204,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
                 train_data.type_map,
                 modifier,
             )
+            valid_data.add_data_requirements(model.data_requirements)
             valid_data.print_summary("validation")
     else:
         if modifier is not None: