deepmodeling · njzjz · Jun 6, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
diff --git a/deepmd/dpmodel/descriptor/__init__.py b/deepmd/dpmodel/descriptor/__init__.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from .dpa1 import (
     DescrptDPA1,
+    DescrptSeAttenV2,
 )
 from .dpa2 import (
     DescrptDPA2,
@@ -22,6 +23,7 @@
     "DescrptSeA",
     "DescrptSeR",
     "DescrptDPA1",
+    "DescrptSeAttenV2",
     "DescrptDPA2",
     "DescrptHybrid",
     "make_base_descriptor",

diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
@@ -871,6 +871,80 @@ def call(
         )
 
 
+@BaseDescriptor.register("se_atten_v2")
+class DescrptSeAttenV2(DescrptDPA1):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[List[int], int],
+        ntypes: int,
+        neuron: List[int] = [25, 50, 100],
+        axis_neuron: int = 8,
+        tebd_dim: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        type_one_side: bool = False,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        scaling_factor=1.0,
+        normalize: bool = True,
+        temperature: Optional[float] = None,
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        concat_output_tebd: bool = True,
+        spin: Optional[Any] = None,
+        stripped_type_embedding: Optional[bool] = None,
+        use_econf_tebd: bool = False,
+        type_map: Optional[List[str]] = None,
+        # consistent with argcheck, not used though
+        seed: Optional[int] = None,
+    ) -> None:
+        DescrptDPA1.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode="strip",
+            resnet_dt=resnet_dt,
+            trainable=trainable,
+            type_one_side=type_one_side,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=attn_mask,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            scaling_factor=scaling_factor,
+            normalize=normalize,
+            temperature=temperature,
+            trainable_ln=trainable_ln,
+            ln_eps=ln_eps,
+            smooth_type_embedding=True,
+            concat_output_tebd=concat_output_tebd,
+            spin=spin,
+            stripped_type_embedding=stripped_type_embedding,
+            use_econf_tebd=use_econf_tebd,
+            type_map=type_map,
+            # consistent with argcheck, not used though
+            seed=seed,
+        )
+
+
 class NeighborGatedAttention(NativeOP):
     def __init__(
         self,

diff --git a/deepmd/pt/model/descriptor/__init__.py b/deepmd/pt/model/descriptor/__init__.py
@@ -10,6 +10,9 @@
     DescrptBlockSeAtten,
     DescrptDPA1,
 )
+from .se_atten_v2 import (
+    DescrptSeAttenV2,
+)
 from .dpa2 import (
     DescrptDPA2,
 )
@@ -39,6 +42,7 @@
     "make_default_type_embedding",
     "DescrptBlockSeA",
     "DescrptBlockSeAtten",
+    "DescrptSeAttenV2"
     "DescrptSeA",
     "DescrptSeR",
     "DescrptDPA1",

diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+
+
+@BaseDescriptor.register("se_atten_v2")
+class DescrptSeAttenV2(DescrptDPA1):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[List[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        set_davg_zero: bool = True,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        scaling_factor: int = 1.0,
+        normalize=True,
+        temperature=None,
+        concat_output_tebd: bool = True,
+        trainable: bool = True,
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        type_one_side: bool = False,
+        stripped_type_embedding: Optional[bool] = None,
+        seed: Optional[int] = None,
+        use_econf_tebd: bool = False,
+        type_map: Optional[List[str]] = None,
+        # not implemented
+        spin=None,
+        type: Optional[str] = None,
+        old_impl: bool = False,
+    ) -> None:
+        r"""Construct smooth version of embedding net of type `se_atten`.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius :math:`r_c`
+        rcut_smth : float
+            From where the environment matrix should be smoothed :math:`r_s`
+        sel : list[int], int
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+        ntypes : int
+            Number of element types
+        neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+        axis_neuron : int
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+        tebd_dim : int
+            Dimension of the type embedding
+        set_davg_zero : bool
+            Set the shift of embedding net input to zero.
+        attn : int
+            Hidden dimension of the attention vectors
+        attn_layer : int
+            Number of attention layers
+        attn_dotr : bool
+            If dot the angular gate to the attention weights
+        attn_mask : bool
+            (Only support False to keep consistent with other backend references.)
+            (Not used in this version.)
+            If mask the diagonal of attention weights
+        activation_function : str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+        precision : str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+        resnet_dt : bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+        exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+        scaling_factor : float
+            The scaling factor of normalization in calculations of attention weights.
+            If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5
+        normalize : bool
+            Whether to normalize the hidden vectors in attention weights calculation.
+        temperature : float
+            If not None, the scaling of attention weights is `temperature` itself.
+        trainable_ln : bool
+            Whether to use trainable shift and scale weights in layer normalization.
+        ln_eps : float, Optional
+            The epsilon value for layer normalization.
+        type_one_side : bool
+            If 'False', type embeddings of both neighbor and central atoms are considered.
+            If 'True', only type embeddings of neighbor atoms are considered.
+            Default is 'False'.
+        seed : int, Optional
+            Random seed for parameter initialization.
+        """
+        DescrptDPA1.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode="strip",
+            set_davg_zero=set_davg_zero,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=attn_mask,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            scaling_factor=scaling_factor,
+            normalize=normalize,
+            temperature=temperature,
+            concat_output_tebd=concat_output_tebd,
+            trainable=trainable,
+            trainable_ln=trainable_ln,
+            ln_eps=ln_eps,
+            smooth_type_embedding=True,
+            type_one_side=type_one_side,
+            stripped_type_embedding=stripped_type_embedding,
+            seed=seed,
+            use_econf_tebd=use_econf_tebd,
+            type_map=type_map,
+            # not implemented
+            spin=spin,
+            type=type,
+            old_impl=old_impl,
+        )
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -615,14 +615,91 @@ def descrpt_se_atten_args():
     ]
 
 
-@descrpt_args_plugin.register("se_atten_v2", doc=doc_only_tf_supported)
+@descrpt_args_plugin.register("se_atten_v2")
 def descrpt_se_atten_v2_args():
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+    doc_trainable_ln = (
+        "Whether to use trainable shift and scale weights in layer normalization."
+    )
+    doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
+    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
+    doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
+    doc_scaling_factor = (
+        "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
+        "If `temperature` is None, the scaling of attention weights is (N_hidden_dim * scaling_factor)**0.5. "
+        "Else, the scaling of attention weights is setting to `temperature`."
+    )
+    doc_normalize = (
+        "Whether to normalize the hidden vectors during attention calculation."
+    )
+    doc_concat_output_tebd = (
+        "Whether to concat type embedding at the output of the descriptor."
+    )
+    doc_stripped_type_embedding = (
+        "(Deprecated, kept only for compatibility.) Whether to strip the type embedding into a separate embedding network. "
+        "Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'. "
+        "Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'."
+        "The default value is `None`, which means the `tebd_input_mode` setting will be used instead."
+    )
 
     return [
         *descrpt_se_atten_common_args(),
         Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+            "stripped_type_embedding",
+            bool,
+            optional=True,
+            default=None,
+            doc=doc_stripped_type_embedding,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
+        ),
+        Argument(
+            "trainable_ln", bool, optional=True, default=True, doc=doc_trainable_ln
+        ),
+        Argument("ln_eps", float, optional=True, default=None, doc=doc_ln_eps),
+        # pt only
+        Argument(
+            "tebd_dim",
+            int,
+            optional=True,
+            default=8,
+            doc=doc_only_pt_supported + doc_tebd_dim,
+        ),
+        Argument(
+            "use_econf_tebd",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_pt_supported + doc_use_econf_tebd,
+        ),
+        Argument(
+            "scaling_factor",
+            float,
+            optional=True,
+            default=1.0,
+            doc=doc_only_pt_supported + doc_scaling_factor,
+        ),
+        Argument(
+            "normalize",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_only_pt_supported + doc_normalize,
+        ),
+        Argument(
+            "temperature",
+            float,
+            optional=True,
+            doc=doc_only_pt_supported + doc_temperature,
+        ),
+        Argument(
+            "concat_output_tebd",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_only_pt_supported + doc_concat_output_tebd,
         ),
     ]