diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index 768590980f..4fb3da4202 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -32,6 +32,10 @@ jobs:
       run: |
          wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
          unzip libtorch.zip
+    - name: Download paddle_inference_lib
+      run: |
+         wget https://paddle-qa.bj.bcebos.com/paddle-pipeline/GITHUB_Docker_Compile_Test_Cuda118_cudnn860_Trt8531_D1/ce51e82e84fc97e0a55a162037f1554746159cad/paddle_inference.tgz
+         tar -xzvf paddle_inference.tgz
     # https://github.com/actions/runner-images/issues/9491
     - name: Fix kernel mmap rnd bits
       run: sudo sysctl vm.mmap_rnd_bits=28
@@ -49,6 +53,10 @@ jobs:
     # test lammps
     - run: |
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
+        export PADDLE_INFERENCE_ROOT=$PWD/paddle_inference_install_dir/
+        export LD_LIBRARY_PATH=${PADDLE_INFERENCE_ROOT}/paddle/lib:$LD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=${PADDLE_INFERENCE_ROOT}/third_party/install/onednn/lib:$LD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=${PADDLE_INFERENCE_ROOT}/third_party/install/mklml/lib:$LD_LIBRARY_PATH
         source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp] mpi4py
       env:
         DP_BUILD_TESTING: 1
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 6bf4c8552f..b73ba5d4bf 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -48,6 +48,7 @@ jobs:
       if: false  # skip as we use nvidia image
     - run: python -m pip install -U uv
     - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0"
+    - run: source/install/uv_with_retry.sh pip install --system --pre "paddlepaddle-gpu" -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/
     - run: |
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index e46bddd98a..033e4c6ba3 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -29,6 +29,7 @@ jobs:
         source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test,jax] horovod[tensorflow-cpu] mpi4py
+        source/install/uv_with_retry.sh pip install --system --pre "paddlepaddle" -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
       env:
         # Please note that uv has some issues with finding
         # existing TensorFlow package. Currently, it uses
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6cb534fd22..0856f837a1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -64,14 +64,14 @@ repos:
     hooks:
       - id: clang-format
         exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)
-  # markdown, yaml, CSS, javascript
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v4.0.0-alpha.8
-    hooks:
-      - id: prettier
-        types_or: [markdown, yaml, css]
-        # workflow files cannot be modified by pre-commit.ci
-        exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
+  # # markdown, yaml, CSS, javascript
+  # - repo: https://github.com/pre-commit/mirrors-prettier
+  #   rev: v4.0.0-alpha.8
+  #   hooks:
+  #     - id: prettier
+  #       types_or: [markdown, yaml, css]
+  #       # workflow files cannot be modified by pre-commit.ci
+  #       exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
   # Shell
   - repo: https://github.com/scop/pre-commit-shfmt
     rev: v3.10.0-1
@@ -83,25 +83,25 @@ repos:
     hooks:
       - id: cmake-format
       #- id: cmake-lint
-  - repo: https://github.com/njzjz/mirrors-bibtex-tidy
-    rev: v1.13.0
-    hooks:
-      - id: bibtex-tidy
-        args:
-          - --curly
-          - --numeric
-          - --align=13
-          - --blank-lines
-          # disable sort: the order of keys and fields has explict meanings
-          #- --sort=key
-          - --duplicates=key,doi,citation,abstract
-          - --merge=combine
-          #- --sort-fields
-          #- --strip-comments
-          - --trailing-commas
-          - --encode-urls
-          - --remove-empty-fields
-          - --wrap=80
+  # - repo: https://github.com/njzjz/mirrors-bibtex-tidy
+  #   rev: v1.13.0
+  #   hooks:
+  #     - id: bibtex-tidy
+  #       args:
+  #         - --curly
+  #         - --numeric
+  #         - --align=13
+  #         - --blank-lines
+  #         # disable sort: the order of keys and fields has explict meanings
+  #         #- --sort=key
+  #         - --duplicates=key,doi,citation,abstract
+  #         - --merge=combine
+  #         #- --sort-fields
+  #         #- --strip-comments
+  #         - --trailing-commas
+  #         - --encode-urls
+  #         - --remove-empty-fields
+  #         - --wrap=80
   # license header
   - repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.5
diff --git a/README.md b/README.md
index 55f927d62b..100dcec4c2 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ For more information, check the [documentation](https://deepmd.readthedocs.io/).
 
 ### Highlighted features
 
-- **interfaced with multiple backends**, including TensorFlow, PyTorch, and JAX, the most popular deep learning frameworks, making the training process highly automatic and efficient.
+- **interfaced with multiple backends**, including TensorFlow, PyTorch, JAX and Paddle, the most popular deep learning frameworks, making the training process highly automatic and efficient.
 - **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABUCUS.
 - **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc.
 - **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
@@ -72,7 +72,7 @@ See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all fea
 
 #### v3
 
-- Multiple backends supported. Add PyTorch and JAX backends.
+- Multiple backends supported. Add PyTorch, JAX and Paddle backends.
 - The DPA-2 model.
 
 ## Install and use DeePMD-kit
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
index 81c3f20f19..68abcea1c5 100644
--- a/backend/dp_backend.py
+++ b/backend/dp_backend.py
@@ -3,6 +3,9 @@
 
 from scikit_build_core import build as _orig
 
+from .find_paddle import (
+    find_paddle,
+)
 from .find_pytorch import (
     find_pytorch,
 )
@@ -43,6 +46,7 @@ def get_requires_for_build_wheel(
         _orig.get_requires_for_build_wheel(config_settings)
         + find_tensorflow()[1]
         + find_pytorch()[1]
+        + find_paddle()[1]
     )
 
 
@@ -53,4 +57,5 @@ def get_requires_for_build_editable(
         _orig.get_requires_for_build_editable(config_settings)
         + find_tensorflow()[1]
         + find_pytorch()[1]
+        + find_paddle()[1]
     )
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index a66e9a2759..65870aa39b 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -7,6 +7,9 @@
     Optional,
 )
 
+from .find_paddle import (
+    get_pd_requirement,
+)
 from .find_pytorch import (
     get_pt_requirement,
 )
@@ -34,7 +37,7 @@ def dynamic_metadata(
     settings: Optional[dict[str, object]] = None,
 ):
     assert field in ["optional-dependencies", "entry-points", "scripts"]
-    _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = (
+    _, _, find_libpython_requires, extra_scripts, tf_version, pt_version, pd_version = (
         get_argument_from_env()
     )
     with Path("pyproject.toml").open("rb") as f:
@@ -55,4 +58,5 @@ def dynamic_metadata(
             **optional_dependencies,
             **get_tf_requirement(tf_version),
             **get_pt_requirement(pt_version),
+            **get_pd_requirement(pd_version),
         }
diff --git a/backend/find_paddle.py b/backend/find_paddle.py
new file mode 100644
index 0000000000..fb8044730e
--- /dev/null
+++ b/backend/find_paddle.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
+import os
+import site
+from functools import (
+    lru_cache,
+)
+from importlib.machinery import (
+    FileFinder,
+)
+from importlib.util import (
+    find_spec,
+)
+from pathlib import (
+    Path,
+)
+from sysconfig import (
+    get_path,
+)
+from typing import (
+    Optional,
+    Union,
+)
+
+
+@lru_cache
+def find_paddle() -> tuple[Optional[str], list[str]]:
+    """Find PaddlePadle library.
+
+    Tries to find PaddlePadle in the order of:
+
+    1. Environment variable `PADDLE_ROOT` if set
+    2. The current Python environment.
+    3. user site packages directory if enabled
+    4. system site packages directory (purelib)
+
+    Considering the default PaddlePadle package still uses old CXX11 ABI, we
+    cannot install it automatically.
+
+    Returns
+    -------
+    str, optional
+        PaddlePadle library path if found.
+    list of str
+        Paddle requirement if not found. Empty if found.
+    """
+    if os.environ.get("DP_ENABLE_PADDLE", "0") == "0":
+        return None, []
+    requires = []
+    pd_spec = None
+
+    if (pd_spec is None or not pd_spec) and os.environ.get("PADDLE_ROOT") is not None:
+        site_packages = Path(os.environ.get("PADDLE_ROOT")).parent.absolute()
+        pd_spec = FileFinder(str(site_packages)).find_spec("paddle")
+
+    # get paddle spec
+    # note: isolated build will not work for backend
+    if pd_spec is None or not pd_spec:
+        pd_spec = find_spec("paddle")
+
+    if not pd_spec and site.ENABLE_USER_SITE:
+        # first search TF from user site-packages before global site-packages
+        site_packages = site.getusersitepackages()
+        if site_packages:
+            pd_spec = FileFinder(site_packages).find_spec("paddle")
+
+    if not pd_spec:
+        # purelib gets site-packages path
+        site_packages = get_path("purelib")
+        if site_packages:
+            pd_spec = FileFinder(site_packages).find_spec("paddle")
+
+    # get install dir from spec
+    try:
+        pd_install_dir = pd_spec.submodule_search_locations[0]  # type: ignore
+        # AttributeError if ft_spec is None
+        # TypeError if submodule_search_locations are None
+        # IndexError if submodule_search_locations is an empty list
+    except (AttributeError, TypeError, IndexError):
+        pd_install_dir = None
+        requires.extend(get_pd_requirement()["paddle"])
+    return pd_install_dir, requires
+
+
+@lru_cache
+def get_pd_requirement(pd_version: str = "") -> dict:
+    """Get PaddlePadle requirement when Paddle is not installed.
+
+    If pd_version is not given and the environment variable `PADDLE_VERSION` is set, use it as the requirement.
+
+    Parameters
+    ----------
+    pd_version : str, optional
+        Paddle version
+
+    Returns
+    -------
+    dict
+        PaddlePadle requirement.
+    """
+    if pd_version is None:
+        return {"paddle": []}
+    if pd_version == "":
+        pd_version = os.environ.get("PADDLE_VERSION", "")
+
+    return {
+        "paddle": [
+            "paddlepaddle" if pd_version != "" else "paddlepaddle",
+        ],
+    }
+
+
+@lru_cache
+def get_pd_version(pd_path: Optional[Union[str, Path]]) -> str:
+    """Get Paddle version from a Paddle Python library path.
+
+    Parameters
+    ----------
+    pd_path : str or Path
+        Paddle Python library path, e.g. "/python3.10/site-packages/paddle/"
+
+    Returns
+    -------
+    str
+        version
+    """
+    if pd_path is None or pd_path == "":
+        return ""
+    version_file = Path(pd_path) / "version" / "__init__.py"
+    spec = importlib.util.spec_from_file_location("paddle.version", version_file)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.full_version
diff --git a/backend/read_env.py b/backend/read_env.py
index 3b217926d6..fc2be4c9cf 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -10,6 +10,10 @@
     Version,
 )
 
+from .find_paddle import (
+    find_paddle,
+    get_pd_version,
+)
 from .find_pytorch import (
     find_pytorch,
     get_pt_version,
@@ -21,7 +25,7 @@
 
 
 @lru_cache
-def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
+def get_argument_from_env() -> tuple[str, list, list, dict, str, str, str]:
     """Get the arguments from environment variables.
 
     The environment variables are assumed to be not changed during the build.
@@ -40,6 +44,8 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
         The TensorFlow version.
     str
         The PyTorch version.
+    str
+        The Paddle version.
     """
     cmake_args = []
     extra_scripts = {}
@@ -117,6 +123,18 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
         cmake_args.append("-DENABLE_PYTORCH=OFF")
         pt_version = None
 
+    if os.environ.get("DP_ENABLE_PADDLE", "0") == "1":
+        pd_install_dir, _ = find_paddle()
+        pd_version = get_pd_version(pd_install_dir)
+        cmake_args.extend(
+            [
+                "-DENABLE_PADDLE=ON",
+            ]
+        )
+    else:
+        cmake_args.append("-DENABLE_PADDLE=OFF")
+        pd_version = None
+
     cmake_args = [
         "-DBUILD_PY_IF:BOOL=TRUE",
         *cmake_args,
@@ -128,11 +146,12 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
         extra_scripts,
         tf_version,
         pt_version,
+        pd_version,
     )
 
 
 def set_scikit_build_env():
     """Set scikit-build environment variables before executing scikit-build."""
-    cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env()
+    cmake_minimum_required_version, cmake_args, _, _, _, _, _ = get_argument_from_env()
     os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version
     os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args)
diff --git a/deepmd/backend/paddle.py b/deepmd/backend/paddle.py
new file mode 100644
index 0000000000..b1f664e76a
--- /dev/null
+++ b/deepmd/backend/paddle.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from importlib.util import (
+    find_spec,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+@Backend.register("pd")
+@Backend.register("paddle")
+class PaddleBackend(Backend):
+    """Paddle backend."""
+
+    name = "Paddle"
+    """The formal name of the backend."""
+    features: ClassVar[Backend.Feature] = (
+        Backend.Feature.ENTRY_POINT
+        | Backend.Feature.DEEP_EVAL
+        | Backend.Feature.NEIGHBOR_STAT
+        | Backend.Feature.IO
+    )
+    """The features of the backend."""
+    suffixes: ClassVar[list[str]] = [".json", ".pd"]
+    """The suffixes of the backend."""
+
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+        return find_spec("paddle") is not None
+
+    @property
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        from deepmd.pd.entrypoints.main import main as deepmd_main
+
+        return deepmd_main
+
+    @property
+    def deep_eval(self) -> type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        from deepmd.pd.infer.deep_eval import DeepEval as DeepEvalPD
+
+        return DeepEvalPD
+
+    @property
+    def neighbor_stat(self) -> type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        from deepmd.pd.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
+
+    @property
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        from deepmd.pd.utils.serialization import (
+            serialize_from_file,
+        )
+
+        return serialize_from_file
+
+    @property
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        from deepmd.pd.utils.serialization import (
+            deserialize_to_file,
+        )
+
+        return deserialize_to_file
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
index 4e7620bdda..314cd97f74 100644
--- a/deepmd/dpmodel/atomic_model/base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -210,7 +210,7 @@ def forward_common_atomic(
             out_shape2 = math.prod(out_shape[2:])
             ret_dict[kk] = (
                 ret_dict[kk].reshape([out_shape[0], out_shape[1], out_shape2])
-                * atom_mask[:, :, None]
+                * atom_mask[:, :, None].astype(ret_dict[kk].dtype)
             ).reshape(out_shape)
         ret_dict["mask"] = atom_mask
 
diff --git a/deepmd/dpmodel/descriptor/se_t_tebd.py b/deepmd/dpmodel/descriptor/se_t_tebd.py
index ca89c23968..4c8234e12f 100644
--- a/deepmd/dpmodel/descriptor/se_t_tebd.py
+++ b/deepmd/dpmodel/descriptor/se_t_tebd.py
@@ -672,7 +672,7 @@ def call(
         dmatrix = dmatrix.reshape(nf * nloc, nnei, 4)
         # nfnl x nnei x 4
         rr = dmatrix
-        rr = rr * exclude_mask[:, :, None]
+        rr = rr * exclude_mask[:, :, None].astype(rr.dtype)
         # nfnl x nt_i x 3
         rr_i = rr[:, :, 1:]
         # nfnl x nt_j x 3
diff --git a/deepmd/main.py b/deepmd/main.py
index 60b8da2850..fe9b7b2516 100644
--- a/deepmd/main.py
+++ b/deepmd/main.py
@@ -99,9 +99,10 @@ def main_parser() -> argparse.ArgumentParser:
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
         epilog=textwrap.dedent(
             """\
-        Use --tf or --pt to choose the backend:
+        Use --tf, --pt or --pd to choose the backend:
             dp --tf train input.json
             dp --pt train input.json
+            dp --pd train input.json
         """
         ),
     )
diff --git a/deepmd/pd/__init__.py b/deepmd/pd/__init__.py
new file mode 100644
index 0000000000..bd8b881bc5
--- /dev/null
+++ b/deepmd/pd/__init__.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+# import customized OPs globally
+try:
+    from deepmd.pd.cxx_op import (
+        ENABLE_CUSTOMIZED_OP,
+    )
+
+    __all__ = [
+        "ENABLE_CUSTOMIZED_OP",
+    ]
+except Exception as e:
+    __all__ = []
+
+import paddle
+
+# enable primitive mode for eager/static graph
+paddle.framework.core.set_prim_eager_enabled(True)
+paddle.framework.core._set_prim_all_enabled(True)
diff --git a/deepmd/pd/cxx_op.py b/deepmd/pd/cxx_op.py
new file mode 100644
index 0000000000..61d34a958c
--- /dev/null
+++ b/deepmd/pd/cxx_op.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+def load_library(module_name: str) -> bool:
+    """Load OP library.
+
+    Parameters
+    ----------
+    module_name : str
+        Name of the module
+
+    Returns
+    -------
+    bool
+        Whether the library is loaded successfully
+    """
+    # NOTE: Paddle do not support loading library from .so file yet.
+    return False
+
+
+ENABLE_CUSTOMIZED_OP = load_library("deepmd_op_pd")
+
+__all__ = [
+    "ENABLE_CUSTOMIZED_OP",
+]
diff --git a/deepmd/pd/entrypoints/__init__.py b/deepmd/pd/entrypoints/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pd/entrypoints/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
new file mode 100644
index 0000000000..e8b6a0d0c7
--- /dev/null
+++ b/deepmd/pd/entrypoints/main.py
@@ -0,0 +1,586 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import argparse
+import copy
+import json
+import logging
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+    Union,
+)
+
+import h5py
+import paddle
+import paddle.distributed as dist
+import paddle.distributed.fleet as fleet
+import paddle.version
+
+from deepmd import (
+    __version__,
+)
+from deepmd.common import (
+    expand_sys_str,
+)
+from deepmd.env import (
+    GLOBAL_CONFIG,
+)
+from deepmd.loggers.loggers import (
+    set_log_handles,
+)
+from deepmd.main import (
+    parse_args,
+)
+from deepmd.pd.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pd.infer import (
+    inference,
+)
+from deepmd.pd.model.model import (
+    BaseModel,
+)
+from deepmd.pd.train import (
+    training,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+)
+from deepmd.pd.utils.finetune import (
+    get_finetune_rules,
+)
+from deepmd.pd.utils.multi_task import (
+    preprocess_shared_params,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    get_data,
+    process_systems,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.summary import SummaryPrinter as BaseSummaryPrinter
+
+# from paddle.distributed.elastic.multiprocessing.errors import (
+#     record,
+# )
+
+
+log = logging.getLogger(__name__)
+
+
+def get_trainer(
+    config,
+    init_model=None,
+    restart_model=None,
+    finetune_model=None,
+    force_load=False,
+    init_frz_model=None,
+    shared_links=None,
+    finetune_links=None,
+):
+    multi_task = "model_dict" in config.get("model", {})
+
+    # Initialize DDP
+    world_size = dist.get_world_size()
+    if world_size > 1:
+        assert paddle.version.nccl() != "0"
+        fleet.init(is_collective=True)
+
+    def prepare_trainer_input_single(
+        model_params_single, data_dict_single, rank=0, seed=None
+    ):
+        training_dataset_params = data_dict_single["training_data"]
+        validation_dataset_params = data_dict_single.get("validation_data", None)
+        validation_systems = (
+            validation_dataset_params["systems"] if validation_dataset_params else None
+        )
+        training_systems = training_dataset_params["systems"]
+        training_systems = process_systems(training_systems)
+        if validation_systems is not None:
+            validation_systems = process_systems(validation_systems)
+
+        # stat files
+        stat_file_path_single = data_dict_single.get("stat_file", None)
+        if rank != 0:
+            stat_file_path_single = None
+        elif stat_file_path_single is not None:
+            if not Path(stat_file_path_single).exists():
+                if stat_file_path_single.endswith((".h5", ".hdf5")):
+                    with h5py.File(stat_file_path_single, "w") as f:
+                        pass
+                else:
+                    Path(stat_file_path_single).mkdir()
+            stat_file_path_single = DPPath(stat_file_path_single, "a")
+
+        # validation and training data
+        # avoid the same batch sequence among devices
+        rank_seed = (seed + rank) % (2**32) if seed is not None else None
+        validation_data_single = (
+            DpLoaderSet(
+                validation_systems,
+                validation_dataset_params["batch_size"],
+                model_params_single["type_map"],
+                seed=rank_seed,
+            )
+            if validation_systems
+            else None
+        )
+        train_data_single = DpLoaderSet(
+            training_systems,
+            training_dataset_params["batch_size"],
+            model_params_single["type_map"],
+            seed=rank_seed,
+        )
+        return (
+            train_data_single,
+            validation_data_single,
+            stat_file_path_single,
+        )
+
+    rank = dist.get_rank() if dist.is_available() and dist.is_initialized() else 0
+    data_seed = config["training"].get("seed", None)
+    if not multi_task:
+        (
+            train_data,
+            validation_data,
+            stat_file_path,
+        ) = prepare_trainer_input_single(
+            config["model"],
+            config["training"],
+            rank=rank,
+            seed=data_seed,
+        )
+    else:
+        train_data, validation_data, stat_file_path = {}, {}, {}
+        for model_key in config["model"]["model_dict"]:
+            (
+                train_data[model_key],
+                validation_data[model_key],
+                stat_file_path[model_key],
+            ) = prepare_trainer_input_single(
+                config["model"]["model_dict"][model_key],
+                config["training"]["data_dict"][model_key],
+                rank=rank,
+                seed=data_seed,
+            )
+
+    trainer = training.Trainer(
+        config,
+        train_data,
+        stat_file_path=stat_file_path,
+        validation_data=validation_data,
+        init_model=init_model,
+        restart_model=restart_model,
+        finetune_model=finetune_model,
+        force_load=force_load,
+        shared_links=shared_links,
+        finetune_links=finetune_links,
+        init_frz_model=init_frz_model,
+    )
+    return trainer
+
+
+class SummaryPrinter(BaseSummaryPrinter):
+    """Summary printer for Paddle."""
+
+    def is_built_with_cuda(self) -> bool:
+        """Check if the backend is built with CUDA."""
+        return paddle.device.is_compiled_with_cuda()
+
+    def is_built_with_rocm(self) -> bool:
+        """Check if the backend is built with ROCm."""
+        return paddle.device.is_compiled_with_rocm()
+
+    def get_compute_device(self) -> str:
+        """Get Compute device."""
+        return str(DEVICE)
+
+    def get_ngpus(self) -> int:
+        """Get the number of GPUs."""
+        return paddle.device.cuda.device_count()
+
+    def get_backend_info(self) -> dict:
+        """Get backend information."""
+        if ENABLE_CUSTOMIZED_OP:
+            op_info = {
+                "build with PD ver": GLOBAL_CONFIG["pd_version"],
+                "build with PD inc": GLOBAL_CONFIG["pd_include_dir"].replace(";", "\n"),
+                "build with PD lib": GLOBAL_CONFIG["pd_libs"].replace(";", "\n"),
+            }
+        else:
+            op_info = {}
+        return {
+            "Backend": "Paddle",
+            "PD ver": f"v{paddle.__version__}-g{paddle.version.commit[:11]}",
+            "Enable custom OP": ENABLE_CUSTOMIZED_OP,
+            **op_info,
+        }
+
+
+def train(FLAGS):
+    log.info("Configuration path: %s", FLAGS.INPUT)
+    SummaryPrinter()()
+    with open(FLAGS.INPUT) as fin:
+        config = json.load(fin)
+    # ensure suffix, as in the command line help, we say "path prefix of checkpoint files"
+    if FLAGS.init_model is not None and not FLAGS.init_model.endswith(".pd"):
+        FLAGS.init_model += ".pd"
+    if FLAGS.restart is not None and not FLAGS.restart.endswith(".pd"):
+        FLAGS.restart += ".pd"
+
+    # update multitask config
+    multi_task = "model_dict" in config["model"]
+    shared_links = None
+    if multi_task:
+        config["model"], shared_links = preprocess_shared_params(config["model"])
+        # handle the special key
+        assert (
+            "RANDOM" not in config["model"]["model_dict"]
+        ), "Model name can not be 'RANDOM' in multi-task mode!"
+
+    # update fine-tuning config
+    finetune_links = None
+    if FLAGS.finetune is not None:
+        config["model"], finetune_links = get_finetune_rules(
+            FLAGS.finetune,
+            config["model"],
+            model_branch=FLAGS.model_branch,
+            change_model_params=FLAGS.use_pretrain_script,
+        )
+    # update init_model or init_frz_model config if necessary
+    if (
+        FLAGS.init_model is not None or FLAGS.init_frz_model is not None
+    ) and FLAGS.use_pretrain_script:
+        if FLAGS.init_model is not None:
+            init_state_dict = paddle.load(FLAGS.init_model)
+            if "model" in init_state_dict:
+                init_state_dict = init_state_dict["model"]
+            config["model"] = init_state_dict["_extra_state"]["model_params"]
+        else:
+            raise NotImplementedError("FLAGS.init_model can not be empty.")
+
+    # argcheck
+    config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json")
+    config = normalize(config, multi_task=multi_task)
+
+    # do neighbor stat
+    min_nbor_dist = None
+    if not FLAGS.skip_neighbor_stat:
+        log.info(
+            "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
+        )
+
+        if not multi_task:
+            type_map = config["model"].get("type_map")
+            train_data = get_data(
+                config["training"]["training_data"], 0, type_map, None
+            )
+            config["model"], min_nbor_dist = BaseModel.update_sel(
+                train_data, type_map, config["model"]
+            )
+        else:
+            min_nbor_dist = {}
+            for model_item in config["model"]["model_dict"]:
+                type_map = config["model"]["model_dict"][model_item].get("type_map")
+                train_data = get_data(
+                    config["training"]["data_dict"][model_item]["training_data"],
+                    0,
+                    type_map,
+                    None,
+                )
+                config["model"]["model_dict"][model_item], min_nbor_dist[model_item] = (
+                    BaseModel.update_sel(
+                        train_data, type_map, config["model"]["model_dict"][model_item]
+                    )
+                )
+
+    with open(FLAGS.output, "w") as fp:
+        json.dump(config, fp, indent=4)
+
+    trainer = get_trainer(
+        config,
+        FLAGS.init_model,
+        FLAGS.restart,
+        FLAGS.finetune,
+        FLAGS.force_load,
+        FLAGS.init_frz_model,
+        shared_links=shared_links,
+        finetune_links=finetune_links,
+    )
+    # save min_nbor_dist
+    if min_nbor_dist is not None:
+        if not multi_task:
+            trainer.model.min_nbor_dist = min_nbor_dist
+        else:
+            for model_item in min_nbor_dist:
+                trainer.model[model_item].min_nbor_dist = min_nbor_dist[model_item]
+    trainer.run()
+
+
+def freeze(FLAGS):
+    paddle.set_flags(
+        {
+            "FLAGS_save_cf_stack_op": 1,
+            "FLAGS_prim_enable_dynamic": 1,
+            "FLAGS_enable_pir_api": 1,
+        }
+    )
+    model = inference.Tester(FLAGS.model, head=FLAGS.head).model
+    model.eval()
+    from paddle.static import (
+        InputSpec,
+    )
+
+    """
+    ** coord [None, natoms, 3] paddle.float64
+    ** atype [None, natoms] paddle.int64
+    ** nlist [None, natoms, nnei] paddle.int32
+    """
+    # NOTE: 'FLAGS_save_cf_stack_op', 'FLAGS_prim_enable_dynamic' and
+    # 'FLAGS_enable_pir_api' shoule be enabled when freezing model.
+    jit_model = paddle.jit.to_static(
+        model.forward_lower,
+        full_graph=True,
+        input_spec=[
+            InputSpec([-1, -1, 3], dtype="float64", name="coord"),
+            InputSpec([-1, -1], dtype="int32", name="atype"),
+            InputSpec([-1, -1, -1], dtype="int32", name="nlist"),
+        ],
+    )
+    if FLAGS.output.endswith(".json"):
+        FLAGS.output = FLAGS.output[:-5]
+    paddle.jit.save(
+        jit_model,
+        path=FLAGS.output,
+        skip_prune_program=True,
+    )
+    log.info(
+        f"Paddle inference model has been exported to: {FLAGS.output}.json and {FLAGS.output}.pdiparams"
+    )
+
+
+def show(FLAGS):
+    if FLAGS.INPUT.split(".")[-1] == "pd":
+        state_dict = paddle.load(FLAGS.INPUT)
+        if "model" in state_dict:
+            state_dict = state_dict["model"]
+        model_params = state_dict["_extra_state"]["model_params"]
+    else:
+        raise RuntimeError(
+            "The model provided must be a checkpoint file with a .pd extension"
+        )
+    model_is_multi_task = "model_dict" in model_params
+    log.info("This is a multitask model") if model_is_multi_task else log.info(
+        "This is a singletask model"
+    )
+
+    if "model-branch" in FLAGS.ATTRIBUTES:
+        #  The model must be multitask mode
+        if not model_is_multi_task:
+            raise RuntimeError(
+                "The 'model-branch' option requires a multitask model."
+                " The provided model does not meet this criterion."
+            )
+        model_branches = list(model_params["model_dict"].keys())
+        model_branches += ["RANDOM"]
+        log.info(
+            f"Available model branches are {model_branches}, "
+            f"where 'RANDOM' means using a randomly initialized fitting net."
+        )
+    if "type-map" in FLAGS.ATTRIBUTES:
+        if model_is_multi_task:
+            model_branches = list(model_params["model_dict"].keys())
+            for branch in model_branches:
+                type_map = model_params["model_dict"][branch]["type_map"]
+                log.info(f"The type_map of branch {branch} is {type_map}")
+        else:
+            type_map = model_params["type_map"]
+            log.info(f"The type_map is {type_map}")
+    if "descriptor" in FLAGS.ATTRIBUTES:
+        if model_is_multi_task:
+            model_branches = list(model_params["model_dict"].keys())
+            for branch in model_branches:
+                descriptor = model_params["model_dict"][branch]["descriptor"]
+                log.info(f"The descriptor parameter of branch {branch} is {descriptor}")
+        else:
+            descriptor = model_params["descriptor"]
+            log.info(f"The descriptor parameter is {descriptor}")
+    if "fitting-net" in FLAGS.ATTRIBUTES:
+        if model_is_multi_task:
+            model_branches = list(model_params["model_dict"].keys())
+            for branch in model_branches:
+                fitting_net = model_params["model_dict"][branch]["fitting_net"]
+                log.info(
+                    f"The fitting_net parameter of branch {branch} is {fitting_net}"
+                )
+        else:
+            fitting_net = model_params["fitting_net"]
+            log.info(f"The fitting_net parameter is {fitting_net}")
+
+
+def change_bias(FLAGS):
+    if FLAGS.INPUT.endswith(".pd"):
+        old_state_dict = paddle.load(FLAGS.INPUT)
+        model_state_dict = copy.deepcopy(old_state_dict.get("model", old_state_dict))
+        model_params = model_state_dict["_extra_state"]["model_params"]
+    else:
+        raise RuntimeError(
+            "Paddle now do not support change bias directly from a freezed model file"
+            "Please provided a checkpoint file with a .pd extension"
+        )
+    multi_task = "model_dict" in model_params
+    model_branch = FLAGS.model_branch
+    bias_adjust_mode = (
+        "change-by-statistic" if FLAGS.mode == "change" else "set-by-statistic"
+    )
+    if multi_task:
+        assert (
+            model_branch is not None
+        ), "For multitask model, the model branch must be set!"
+        assert model_branch in model_params["model_dict"], (
+            f"For multitask model, the model branch must be in the 'model_dict'! "
+            f"Available options are : {list(model_params['model_dict'].keys())}."
+        )
+        log.info(f"Changing out bias for model {model_branch}.")
+    model = training.get_model_for_wrapper(model_params)
+    type_map = (
+        model_params["type_map"]
+        if not multi_task
+        else model_params["model_dict"][model_branch]["type_map"]
+    )
+    model_to_change = model if not multi_task else model[model_branch]
+    if FLAGS.INPUT.endswith(".pd"):
+        wrapper = ModelWrapper(model)
+        wrapper.set_state_dict(old_state_dict["model"])
+    else:
+        raise NotImplementedError("Only support .pd file")
+
+    if FLAGS.bias_value is not None:
+        # use user-defined bias
+        assert model_to_change.model_type in [
+            "ener"
+        ], "User-defined bias is only available for energy model!"
+        assert (
+            len(FLAGS.bias_value) == len(type_map)
+        ), f"The number of elements in the bias should be the same as that in the type_map: {type_map}."
+        old_bias = model_to_change.get_out_bias()
+        bias_to_set = paddle.to_tensor(
+            FLAGS.bias_value, dtype=old_bias.dtype, place=old_bias.place
+        ).reshape(old_bias.shape)
+        model_to_change.set_out_bias(bias_to_set)
+        log.info(
+            f"Change output bias of {type_map!s} "
+            f"from {to_numpy_array(old_bias).reshape(-1)!s} "
+            f"to {to_numpy_array(bias_to_set).reshape(-1)!s}."
+        )
+        updated_model = model_to_change
+    else:
+        # calculate bias on given systems
+        if FLAGS.datafile is not None:
+            with open(FLAGS.datafile) as datalist:
+                all_sys = datalist.read().splitlines()
+        else:
+            all_sys = expand_sys_str(FLAGS.system)
+        data_systems = process_systems(all_sys)
+        data_single = DpLoaderSet(
+            data_systems,
+            1,
+            type_map,
+        )
+        mock_loss = training.get_loss(
+            {"inference": True}, 1.0, len(type_map), model_to_change
+        )
+        data_requirement = mock_loss.label_requirement
+        data_requirement += training.get_additional_data_requirement(model_to_change)
+        data_single.add_data_requirement(data_requirement)
+        nbatches = FLAGS.numb_batch if FLAGS.numb_batch != 0 else float("inf")
+        sampled_data = make_stat_input(
+            data_single.systems,
+            data_single.dataloaders,
+            nbatches,
+        )
+        updated_model = training.model_change_out_bias(
+            model_to_change, sampled_data, _bias_adjust_mode=bias_adjust_mode
+        )
+
+    if not multi_task:
+        model = updated_model
+    else:
+        model[model_branch] = updated_model
+
+    if FLAGS.INPUT.endswith(".pd"):
+        output_path = (
+            FLAGS.output
+            if FLAGS.output is not None
+            else FLAGS.INPUT.replace(".pd", "_updated.pd")
+        )
+        wrapper = ModelWrapper(model)
+        if "model" in old_state_dict:
+            old_state_dict["model"] = wrapper.state_dict()
+            old_state_dict["model"]["_extra_state"] = model_state_dict["_extra_state"]
+        else:
+            old_state_dict = wrapper.state_dict()
+            old_state_dict["_extra_state"] = model_state_dict["_extra_state"]
+        paddle.save(old_state_dict, output_path)
+    else:
+        raise NotImplementedError("Only support .pd file now")
+
+    log.info(f"Saved model to {output_path}")
+
+
+# @record
+def main(args: Optional[Union[list[str], argparse.Namespace]] = None):
+    if not isinstance(args, argparse.Namespace):
+        FLAGS = parse_args(args=args)
+    else:
+        FLAGS = args
+
+    set_log_handles(
+        FLAGS.log_level,
+        Path(FLAGS.log_path) if FLAGS.log_path is not None else None,
+        mpi_log=None,
+    )
+    log.debug("Log handles were successfully set")
+    log.info("DeePMD version: %s", __version__)
+
+    if FLAGS.command == "train":
+        train(FLAGS)
+    elif FLAGS.command == "freeze":
+        if Path(FLAGS.checkpoint_folder).is_dir():
+            checkpoint_path = Path(FLAGS.checkpoint_folder)
+            latest_ckpt_file = (checkpoint_path / "checkpoint").read_text()
+            FLAGS.model = str(checkpoint_path.joinpath(latest_ckpt_file))
+        else:
+            FLAGS.model = FLAGS.checkpoint_folder
+        FLAGS.output = str(Path(FLAGS.output).with_suffix(""))
+        freeze(FLAGS)
+    elif FLAGS.command == "show":
+        show(FLAGS)
+    elif FLAGS.command == "change-bias":
+        change_bias(FLAGS)
+    else:
+        raise RuntimeError(f"Invalid command {FLAGS.command}!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/deepmd/pd/infer/__init__.py b/deepmd/pd/infer/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pd/infer/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pd/infer/deep_eval.py b/deepmd/pd/infer/deep_eval.py
new file mode 100644
index 0000000000..d939c6ef7f
--- /dev/null
+++ b/deepmd/pd/infer/deep_eval.py
@@ -0,0 +1,649 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.common import PRECISION_DICT as NP_PRECISION_DICT
+from deepmd.dpmodel.output_def import (
+    ModelOutputDef,
+    OutputVariableCategory,
+    OutputVariableDef,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
+from deepmd.infer.deep_eval import (
+    DeepEvalBackend,
+)
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_property import (
+    DeepProperty,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    GLOBAL_PD_FLOAT_PRECISION,
+    RESERVED_PRECISON_DICT,
+    enable_prim,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+if TYPE_CHECKING:
+    import ase.neighborlist
+
+
+class DeepEval(DeepEvalBackend):
+    """Paddle backend implementation of DeepEval.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    output_def : ModelOutputDef
+        The output definition of the model.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __init__(
+        self,
+        model_file: str,
+        output_def: ModelOutputDef,
+        *args: Any,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        head: Optional[Union[str, int]] = None,
+        **kwargs: Any,
+    ):
+        enable_prim(True)
+        self.output_def = output_def
+        self.model_path = model_file
+        if str(self.model_path).endswith(".pd"):
+            state_dict = paddle.load(model_file)
+            if "model" in state_dict:
+                state_dict = state_dict["model"]
+            self.input_param = state_dict["_extra_state"]["model_params"]
+            self.model_def_script = self.input_param
+            self.multi_task = "model_dict" in self.input_param
+            if self.multi_task:
+                model_keys = list(self.input_param["model_dict"].keys())
+                if isinstance(head, int):
+                    head = model_keys[0]
+                assert (
+                    head is not None
+                ), f"Head must be set for multitask model! Available heads are: {model_keys}"
+                assert (
+                    head in model_keys
+                ), f"No head named {head} in model! Available heads are: {model_keys}"
+                self.input_param = self.input_param["model_dict"][head]
+                state_dict_head = {"_extra_state": state_dict["_extra_state"]}
+                for item in state_dict:
+                    if f"model.{head}." in item:
+                        state_dict_head[
+                            item.replace(f"model.{head}.", "model.Default.")
+                        ] = state_dict[item].clone()
+                state_dict = state_dict_head
+            model = get_model(self.input_param).to(DEVICE)
+            # model = paddle.jit.to_static(model)
+            self.dp = ModelWrapper(model)
+            self.dp.set_state_dict(state_dict)
+        else:
+            # self.dp = paddle.jit.load(self.model_path.split(".json")[0])
+            raise ValueError(f"Unknown model file format: {self.model_path}!")
+        self.rcut = self.dp.model["Default"].get_rcut()
+        self.type_map = self.dp.model["Default"].get_type_map()
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+        self._has_spin = getattr(self.dp.model["Default"], "has_spin", False)
+        if callable(self._has_spin):
+            self._has_spin = self._has_spin()
+
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
+        return self.rcut
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return len(self.type_map)
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.type_map
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dp.model["Default"].get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.dp.model["Default"].get_dim_aparam()
+
+    def get_intensive(self) -> bool:
+        return self.dp.model["Default"].get_intensive()
+
+    @property
+    def model_type(self) -> type["DeepEvalWrapper"]:
+        """The the evaluator of the model type."""
+        model_output_type = self.dp.model["Default"].model_output_type()
+        if "energy" in model_output_type:
+            return DeepPot
+        elif "dos" in model_output_type:
+            return DeepDOS
+        elif "dipole" in model_output_type:
+            return DeepDipole
+        elif "polar" in model_output_type:
+            return DeepPolar
+        elif "global_polar" in model_output_type:
+            return DeepGlobalPolar
+        elif "wfc" in model_output_type:
+            return DeepWFC
+        elif "property" in model_output_type:
+            return DeepProperty
+        else:
+            raise RuntimeError("Unknown model type")
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.dp.model["Default"].get_sel_type()
+
+    def get_numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return self.dp.model["Default"].get_numb_dos()
+
+    def get_task_dim(self) -> int:
+        """Get the output dimension."""
+        return self.dp.model["Default"].get_task_dim()
+
+    def get_has_efield(self):
+        """Check if the model has efield."""
+        return False
+
+    def get_ntypes_spin(self):
+        """Get the number of spin atom types of this model. Only used in old implement."""
+        return 0
+
+    def get_has_spin(self):
+        """Check if the model has spin atom types."""
+        return self._has_spin
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        **kwargs
+            Other parameters
+
+        Returns
+        -------
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
+        """
+        # convert all of the input to numpy array
+        atom_types = np.array(atom_types, dtype=np.int32)
+        coords = np.array(coords)
+        if cells is not None:
+            cells = np.array(cells)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, len(atom_types.shape) > 1
+        )
+        request_defs = self._get_request_defs(atomic)
+        if "spin" not in kwargs or kwargs["spin"] is None:
+            out = self._eval_func(self._eval_model, numb_test, natoms)(
+                coords, cells, atom_types, fparam, aparam, request_defs
+            )
+        else:
+            out = self._eval_func(self._eval_model_spin, numb_test, natoms)(
+                coords,
+                cells,
+                atom_types,
+                np.array(kwargs["spin"]),
+                fparam,
+                aparam,
+                request_defs,
+            )
+        return dict(
+            zip(
+                [x.name for x in request_defs],
+                out,
+            )
+        )
+
+    def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
+        """Get the requested output definitions.
+
+        When atomic is True, all output_def are requested.
+        When atomic is False, only energy (tensor), force, and virial
+        are requested.
+
+        Parameters
+        ----------
+        atomic : bool
+            Whether to request the atomic output.
+
+        Returns
+        -------
+        list[OutputVariableDef]
+            The requested output definitions.
+        """
+        if atomic:
+            return list(self.output_def.var_defs.values())
+        else:
+            return [
+                x
+                for x in self.output_def.var_defs.values()
+                if x.category
+                in (
+                    OutputVariableCategory.OUT,
+                    OutputVariableCategory.REDU,
+                    OutputVariableCategory.DERV_R,
+                    OutputVariableCategory.DERV_C_REDU,
+                )
+            ]
+
+    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
+        """Wrapper method with auto batch size.
+
+        Parameters
+        ----------
+        inner_func : Callable
+            the method to be wrapped
+        numb_test : int
+            number of tests
+        natoms : int
+            number of atoms
+
+        Returns
+        -------
+        Callable
+            the wrapper
+        """
+        if self.auto_batch_size is not None:
+
+            def eval_func(*args, **kwargs):
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
+        else:
+            eval_func = inner_func
+        return eval_func
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: np.ndarray,
+        mixed_type: bool = False,
+    ) -> tuple[int, int]:
+        if mixed_type:
+            natoms = len(atom_types[0])
+        else:
+            natoms = len(atom_types)
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def _eval_model(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray],
+        aparam: Optional[np.ndarray],
+        request_defs: list[OutputVariableDef],
+    ):
+        model = self.dp.to(DEVICE)
+        prec = NP_PRECISION_DICT[RESERVED_PRECISON_DICT[GLOBAL_PD_FLOAT_PRECISION]]
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape([nframes, -1])
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = paddle.to_tensor(
+            coords.reshape([nframes, natoms, 3]).astype(prec),
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+            place=DEVICE,
+        )
+        type_input = paddle.to_tensor(
+            atom_types.astype(NP_PRECISION_DICT[RESERVED_PRECISON_DICT[paddle.int64]]),
+            dtype=paddle.int64,
+            place=DEVICE,
+        )
+        if cells is not None:
+            box_input = paddle.to_tensor(
+                cells.reshape([nframes, 3, 3]),
+                dtype=GLOBAL_PD_FLOAT_PRECISION,
+                place=DEVICE,
+            )
+        else:
+            box_input = None
+        if fparam is not None:
+            fparam_input = to_paddle_tensor(
+                fparam.reshape([nframes, self.get_dim_fparam()])
+            )
+        else:
+            fparam_input = None
+        if aparam is not None:
+            aparam_input = to_paddle_tensor(
+                aparam.reshape([nframes, natoms, self.get_dim_aparam()])
+            )
+        else:
+            aparam_input = None
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C for x in request_defs
+        )
+        batch_output = model(
+            coord_input,
+            type_input,
+            box=box_input,
+            do_atomic_virial=do_atomic_virial,
+            fparam=fparam_input,
+            aparam=aparam_input,
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            pd_name = self._OUTDEF_DP2BACKEND[odef.name]
+            if pd_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                out = batch_output[pd_name].reshape(shape).numpy()
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(
+                    np.full(np.abs(shape), np.nan, dtype=prec)
+                )  # this is kinda hacky
+        return tuple(results)
+
+    def _eval_model_spin(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        spins: np.ndarray,
+        fparam: Optional[np.ndarray],
+        aparam: Optional[np.ndarray],
+        request_defs: list[OutputVariableDef],
+    ):
+        model = self.dp.to(DEVICE)
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape([nframes, -1])
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = paddle.to_tensor(
+            coords.reshape([nframes, natoms, 3]),
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+            place=DEVICE,
+        )
+        type_input = paddle.to_tensor(atom_types, dtype=paddle.int64, place=DEVICE)
+        spin_input = paddle.to_tensor(
+            spins.reshape([nframes, natoms, 3]),
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+            place=DEVICE,
+        )
+        if cells is not None:
+            box_input = paddle.to_tensor(
+                cells.reshape([nframes, 3, 3]),
+                dtype=GLOBAL_PD_FLOAT_PRECISION,
+                place=DEVICE,
+            )
+        else:
+            box_input = None
+        if fparam is not None:
+            fparam_input = to_paddle_tensor(
+                fparam.reshape([nframes, self.get_dim_fparam()])
+            )
+        else:
+            fparam_input = None
+        if aparam is not None:
+            aparam_input = to_paddle_tensor(
+                aparam.reshape([nframes, natoms, self.get_dim_aparam()])
+            )
+        else:
+            aparam_input = None
+
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
+        )
+        batch_output = model(
+            coord_input,
+            type_input,
+            spin=spin_input,
+            box=box_input,
+            do_atomic_virial=do_atomic_virial,
+            fparam=fparam_input,
+            aparam=aparam_input,
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            pd_name = self._OUTDEF_DP2BACKEND[odef.name]
+            if pd_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                out = batch_output[pd_name].reshape(shape).numpy()
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(
+                    np.full(
+                        np.abs(shape),
+                        np.nan,
+                        dtype=NP_PRECISION_DICT[
+                            RESERVED_PRECISON_DICT[GLOBAL_PD_FLOAT_PRECISION]
+                        ],
+                    )
+                )  # this is kinda hacky
+        return tuple(results)
+
+    def _get_output_shape(self, odef, nframes, natoms):
+        if odef.category == OutputVariableCategory.DERV_C_REDU:
+            # virial
+            return [nframes, *odef.shape[:-1], 9]
+        elif odef.category == OutputVariableCategory.REDU:
+            # energy
+            return [nframes, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_C:
+            # atom_virial
+            return [nframes, *odef.shape[:-1], natoms, 9]
+        elif odef.category == OutputVariableCategory.DERV_R:
+            # force
+            return [nframes, *odef.shape[:-1], natoms, 3]
+        elif odef.category == OutputVariableCategory.OUT:
+            # atom_energy, atom_tensor
+            # Something wrong here?
+            # return [nframes, *shape, natoms, 1]
+            return [nframes, natoms, *odef.shape, 1]
+        else:
+            raise RuntimeError("unknown category")
+
+    def eval_typeebd(self) -> np.ndarray:
+        """Evaluate output of type embedding network by using this model.
+
+        Returns
+        -------
+        np.ndarray
+            The output of type embedding network. The shape is [ntypes, o_size] or [ntypes + 1, o_size],
+            where ntypes is the number of types, and o_size is the number of nodes
+            in the output layer. If there are multiple type embedding networks,
+            these outputs will be concatenated along the second axis.
+
+        Raises
+        ------
+        KeyError
+            If the model does not enable type embedding.
+
+        See Also
+        --------
+        deepmd.pd.model.network.network.TypeEmbedNetConsistent :
+            The type embedding network.
+        """
+        out = []
+        for mm in self.dp.model["Default"].modules():
+            if mm.original_name == TypeEmbedNetConsistent.__name__:
+                out.append(mm(DEVICE))
+        if not out:
+            raise KeyError("The model has no type embedding networks.")
+        typeebd = paddle.concat(out, axis=1)
+        return to_numpy_array(typeebd)
+
+    def get_model_def_script(self) -> str:
+        """Get model definition script."""
+        return self.model_def_script
+
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate descriptors by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+
+        Returns
+        -------
+        descriptor
+            Descriptors.
+        """
+        model = self.dp.model["Default"]
+        model.set_eval_descriptor_hook(True)
+        self.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic=False,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        descriptor = model.eval_descriptor()
+        model.set_eval_descriptor_hook(False)
+        return to_numpy_array(descriptor)
diff --git a/deepmd/pd/infer/inference.py b/deepmd/pd/infer/inference.py
new file mode 100644
index 0000000000..1ebadd24c9
--- /dev/null
+++ b/deepmd/pd/infer/inference.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from copy import (
+    deepcopy,
+)
+
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    JIT,
+)
+
+# if paddle.__version__.startswith("2"):
+#     import paddle._dynamo
+log = logging.getLogger(__name__)
+
+
+class Tester:
+    def __init__(
+        self,
+        model_ckpt,
+        head=None,
+    ):
+        """Construct a DeePMD tester.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        # Model
+        state_dict = paddle.load(model_ckpt)
+        if "model" in state_dict:
+            state_dict = state_dict["model"]
+        model_params = state_dict["_extra_state"]["model_params"]
+        self.multi_task = "model_dict" in model_params
+        if self.multi_task:
+            assert head is not None, "Head must be specified in multitask mode!"
+            self.head = head
+            assert head in model_params["model_dict"], (
+                f"Specified head {head} not found in model {model_ckpt}! "
+                f"Available ones are {list(model_params['model_dict'].keys())}."
+            )
+            model_params = model_params["model_dict"][head]
+            state_dict_head = {"_extra_state": state_dict["_extra_state"]}
+            for item in state_dict:
+                if f"model.{head}." in item:
+                    state_dict_head[
+                        item.replace(f"model.{head}.", "model.Default.")
+                    ] = state_dict[item].clone()
+            state_dict = state_dict_head
+
+        self.model_params = deepcopy(model_params)
+        self.model = get_model(model_params).to(DEVICE)
+
+        # Model Wrapper
+        self.wrapper = ModelWrapper(self.model)  # inference only
+        if JIT:
+            raise NotImplementedError
+            # self.wrapper = paddle.jit.to_static(self.wrapper)
+        self.wrapper.set_state_dict(state_dict)
diff --git a/deepmd/pd/loss/__init__.py b/deepmd/pd/loss/__init__.py
new file mode 100644
index 0000000000..78528bceaa
--- /dev/null
+++ b/deepmd/pd/loss/__init__.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .denoise import (
+    DenoiseLoss,
+)
+from .dos import (
+    DOSLoss,
+)
+from .ener import (
+    EnergyStdLoss,
+)
+from .ener_spin import (
+    EnergySpinLoss,
+)
+from .loss import (
+    TaskLoss,
+)
+from .property import (
+    PropertyLoss,
+)
+from .tensor import (
+    TensorLoss,
+)
+
+__all__ = [
+    "DenoiseLoss",
+    "EnergyStdLoss",
+    "EnergySpinLoss",
+    "TensorLoss",
+    "TaskLoss",
+    "DOSLoss",
+    "PropertyLoss",
+]
diff --git a/deepmd/pd/loss/denoise.py b/deepmd/pd/loss/denoise.py
new file mode 100644
index 0000000000..45e0ac73dd
--- /dev/null
+++ b/deepmd/pd/loss/denoise.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+import paddle.nn.functional as F
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+
+class DenoiseLoss(TaskLoss):
+    def __init__(
+        self,
+        ntypes,
+        masked_token_loss=1.0,
+        masked_coord_loss=1.0,
+        norm_loss=0.01,
+        use_l1=True,
+        beta=1.00,
+        mask_loss_coord=True,
+        mask_loss_token=True,
+        **kwargs,
+    ):
+        """Construct a layer to compute loss on coord, and type reconstruction."""
+        super().__init__()
+        self.ntypes = ntypes
+        self.masked_token_loss = masked_token_loss
+        self.masked_coord_loss = masked_coord_loss
+        self.norm_loss = norm_loss
+        self.has_coord = self.masked_coord_loss > 0.0
+        self.has_token = self.masked_token_loss > 0.0
+        self.has_norm = self.norm_loss > 0.0
+        self.use_l1 = use_l1
+        self.beta = beta
+        self.frac_beta = 1.00 / self.beta
+        self.mask_loss_coord = mask_loss_coord
+        self.mask_loss_token = mask_loss_token
+
+    def forward(self, model_pred, label, natoms, learning_rate, mae=False):
+        """Return loss on coord and type denoise.
+
+        Returns
+        -------
+        - loss: Loss to minimize.
+        """
+        updated_coord = model_pred["updated_coord"]
+        logits = model_pred["logits"]
+        clean_coord = label["clean_coord"]
+        clean_type = label["clean_type"]
+        coord_mask = label["coord_mask"]
+        type_mask = label["type_mask"]
+
+        loss = paddle.zeros([1], dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(env.DEVICE)[0]
+        more_loss = {}
+        if self.has_coord:
+            if self.mask_loss_coord:
+                masked_updated_coord = updated_coord[coord_mask]
+                masked_clean_coord = clean_coord[coord_mask]
+                if masked_updated_coord.shape[0] > 0:
+                    coord_loss = F.smooth_l1_loss(
+                        masked_updated_coord.reshape([-1, 3]),
+                        masked_clean_coord.reshape([-1, 3]),
+                        reduction="mean",
+                        beta=self.beta,
+                    )
+                else:
+                    coord_loss = paddle.zeros(
+                        [1], dtype=env.GLOBAL_PD_FLOAT_PRECISION
+                    ).to(env.DEVICE)[0]
+            else:
+                coord_loss = F.smooth_l1_loss(
+                    updated_coord.reshape([-1, 3]),
+                    clean_coord.reshape([-1, 3]),
+                    reduction="mean",
+                    beta=self.beta,
+                )
+            loss += self.masked_coord_loss * coord_loss
+            more_loss["coord_l1_error"] = coord_loss.detach()
+        if self.has_token:
+            if self.mask_loss_token:
+                masked_logits = logits[type_mask]
+                masked_target = clean_type[type_mask]
+                if masked_logits.shape[0] > 0:
+                    token_loss = F.nll_loss(
+                        F.log_softmax(masked_logits, axis=-1),
+                        masked_target,
+                        reduction="mean",
+                    )
+                else:
+                    token_loss = paddle.zeros(
+                        [1], dtype=env.GLOBAL_PD_FLOAT_PRECISION
+                    ).to(env.DEVICE)[0]
+            else:
+                token_loss = F.nll_loss(
+                    F.log_softmax(logits.reshape([-1, self.ntypes - 1]), axis=-1),
+                    clean_type.reshape([-1]),
+                    reduction="mean",
+                )
+            loss += self.masked_token_loss * token_loss
+            more_loss["token_error"] = token_loss.detach()
+        if self.has_norm:
+            norm_x = model_pred["norm_x"]
+            norm_delta_pair_rep = model_pred["norm_delta_pair_rep"]
+            loss += self.norm_loss * (norm_x + norm_delta_pair_rep)
+            more_loss["norm_loss"] = norm_x.detach() + norm_delta_pair_rep.detach()
+
+        return loss, more_loss
diff --git a/deepmd/pd/loss/dos.py b/deepmd/pd/loss/dos.py
new file mode 100644
index 0000000000..ae13e5e429
--- /dev/null
+++ b/deepmd/pd/loss/dos.py
@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class DOSLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate: float,
+        numb_dos: int,
+        start_pref_dos: float = 1.00,
+        limit_pref_dos: float = 1.00,
+        start_pref_cdf: float = 1000,
+        limit_pref_cdf: float = 1.00,
+        start_pref_ados: float = 0.0,
+        limit_pref_ados: float = 0.0,
+        start_pref_acdf: float = 0.0,
+        limit_pref_acdf: float = 0.0,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a loss for local and global tensors.
+
+        Parameters
+        ----------
+        tensor_name : str
+            The name of the tensor in the model predictions to compute the loss.
+        tensor_size : int
+            The size (dimension) of the tensor.
+        label_name : str
+            The name of the tensor in the labels to compute the loss.
+        pref_atomic : float
+            The prefactor of the weight of atomic loss. It should be larger than or equal to 0.
+        pref : float
+            The prefactor of the weight of global loss. It should be larger than or equal to 0.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.numb_dos = numb_dos
+        self.inference = inference
+
+        self.start_pref_dos = start_pref_dos
+        self.limit_pref_dos = limit_pref_dos
+        self.start_pref_cdf = start_pref_cdf
+        self.limit_pref_cdf = limit_pref_cdf
+
+        self.start_pref_ados = start_pref_ados
+        self.limit_pref_ados = limit_pref_ados
+        self.start_pref_acdf = start_pref_acdf
+        self.limit_pref_acdf = limit_pref_acdf
+
+        assert (
+            self.start_pref_dos >= 0.0
+            and self.limit_pref_dos >= 0.0
+            and self.start_pref_cdf >= 0.0
+            and self.limit_pref_cdf >= 0.0
+            and self.start_pref_ados >= 0.0
+            and self.limit_pref_ados >= 0.0
+            and self.start_pref_acdf >= 0.0
+            and self.limit_pref_acdf >= 0.0
+        ), "Can not assign negative weight to `pref` and `pref_atomic`"
+
+        self.has_dos = (start_pref_dos != 0.0 and limit_pref_dos != 0.0) or inference
+        self.has_cdf = (start_pref_cdf != 0.0 and limit_pref_cdf != 0.0) or inference
+        self.has_ados = (start_pref_ados != 0.0 and limit_pref_ados != 0.0) or inference
+        self.has_acdf = (start_pref_acdf != 0.0 and limit_pref_acdf != 0.0) or inference
+
+        assert (
+            self.has_dos or self.has_cdf or self.has_ados or self.has_acdf
+        ), AssertionError("Can not assian zero weight both to `pref` and `pref_atomic`")
+
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
+        """Return loss on local and global tensors.
+
+        Parameters
+        ----------
+        input_dict : dict[str, paddle.Tensor]
+            Model inputs.
+        model : paddle.nn.Layer
+            Model to be used to output the predictions.
+        label : dict[str, paddle.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, paddle.Tensor]
+            Model predictions.
+        loss: paddle.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, paddle.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+
+        coef = learning_rate / self.starter_learning_rate
+        pref_dos = (
+            self.limit_pref_dos + (self.start_pref_dos - self.limit_pref_dos) * coef
+        )
+        pref_cdf = (
+            self.limit_pref_cdf + (self.start_pref_cdf - self.limit_pref_cdf) * coef
+        )
+        pref_ados = (
+            self.limit_pref_ados + (self.start_pref_ados - self.limit_pref_ados) * coef
+        )
+        pref_acdf = (
+            self.limit_pref_acdf + (self.start_pref_acdf - self.limit_pref_acdf) * coef
+        )
+
+        loss = paddle.zeros([1], dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(env.DEVICE)[0]
+        more_loss = {}
+        if self.has_ados and "atom_dos" in model_pred and "atom_dos" in label:
+            find_local = label.get("find_atom_dos", 0.0)
+            pref_ados = pref_ados * find_local
+            local_tensor_pred_dos = model_pred["atom_dos"].reshape(
+                [-1, natoms, self.numb_dos]
+            )
+            local_tensor_label_dos = label["atom_dos"].reshape(
+                [-1, natoms, self.numb_dos]
+            )
+            diff = (local_tensor_pred_dos - local_tensor_label_dos).reshape(
+                [-1, self.numb_dos]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).astype("bool")]
+            l2_local_loss_dos = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss["l2_local_dos_loss"] = self.display_if_exist(
+                    l2_local_loss_dos.detach(), find_local
+                )
+            loss += pref_ados * l2_local_loss_dos
+            rmse_local_dos = l2_local_loss_dos.sqrt()
+            more_loss["rmse_local_dos"] = self.display_if_exist(
+                rmse_local_dos.detach(), find_local
+            )
+        if self.has_acdf and "atom_dos" in model_pred and "atom_dos" in label:
+            find_local = label.get("find_atom_dos", 0.0)
+            pref_acdf = pref_acdf * find_local
+            local_tensor_pred_cdf = paddle.cumsum(
+                model_pred["atom_dos"].reshape([-1, natoms, self.numb_dos]), axis=-1
+            )
+            local_tensor_label_cdf = paddle.cumsum(
+                label["atom_dos"].reshape([-1, natoms, self.numb_dos]), axis=-1
+            )
+            diff = (local_tensor_pred_cdf - local_tensor_label_cdf).reshape(
+                [-1, self.numb_dos]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).astype("bool")]
+            l2_local_loss_cdf = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss["l2_local_cdf_loss"] = self.display_if_exist(
+                    l2_local_loss_cdf.detach(), find_local
+                )
+            loss += pref_acdf * l2_local_loss_cdf
+            rmse_local_cdf = l2_local_loss_cdf.sqrt()
+            more_loss["rmse_local_cdf"] = self.display_if_exist(
+                rmse_local_cdf.detach(), find_local
+            )
+        if self.has_dos and "dos" in model_pred and "dos" in label:
+            find_global = label.get("find_dos", 0.0)
+            pref_dos = pref_dos * find_global
+            global_tensor_pred_dos = model_pred["dos"].reshape([-1, self.numb_dos])
+            global_tensor_label_dos = label["dos"].reshape([-1, self.numb_dos])
+            diff = global_tensor_pred_dos - global_tensor_label_dos
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss_dos = paddle.mean(
+                    paddle.sum(
+                        paddle.square(diff) * atom_num.astype(diff.dtype), axis=0
+                    )
+                    / (atom_num.sum().astype(diff.dtype))
+                )
+                atom_num = paddle.mean(atom_num.astype(diff.dtype))
+            else:
+                atom_num = natoms
+                l2_global_loss_dos = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss["l2_global_dos_loss"] = self.display_if_exist(
+                    l2_global_loss_dos.detach(), find_global
+                )
+            loss += pref_dos * l2_global_loss_dos
+            rmse_global_dos = l2_global_loss_dos.sqrt() / atom_num
+            more_loss["rmse_global_dos"] = self.display_if_exist(
+                rmse_global_dos.detach(), find_global
+            )
+        if self.has_cdf and "dos" in model_pred and "dos" in label:
+            find_global = label.get("find_dos", 0.0)
+            pref_cdf = pref_cdf * find_global
+            global_tensor_pred_cdf = paddle.cumsum(
+                model_pred["dos"].reshape([-1, self.numb_dos]), axis=-1
+            )
+            global_tensor_label_cdf = paddle.cumsum(
+                label["dos"].reshape([-1, self.numb_dos]), axis=-1
+            )
+            diff = global_tensor_pred_cdf - global_tensor_label_cdf
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss_cdf = paddle.mean(
+                    paddle.sum(paddle.square(diff) * atom_num, axis=0)
+                    / (atom_num.sum().astype(diff.dtype))
+                )
+                atom_num = paddle.mean(float(atom_num))
+            else:
+                atom_num = natoms
+                l2_global_loss_cdf = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss["l2_global_cdf_loss"] = self.display_if_exist(
+                    l2_global_loss_cdf.detach(), find_global
+                )
+            loss += pref_cdf * l2_global_loss_cdf
+            rmse_global_dos = l2_global_loss_cdf.sqrt() / atom_num
+            more_loss["rmse_global_cdf"] = self.display_if_exist(
+                rmse_global_dos.detach(), find_global
+            )
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_ados or self.has_acdf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_dos",
+                    ndof=self.numb_dos,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_dos or self.has_cdf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "dos",
+                    ndof=self.numb_dos,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pd/loss/ener.py b/deepmd/pd/loss/ener.py
new file mode 100644
index 0000000000..7c5d848b45
--- /dev/null
+++ b/deepmd/pd/loss/ener.py
@@ -0,0 +1,428 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+import paddle.nn.functional as F
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env import (
+    GLOBAL_PD_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class EnergyStdLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate=1.0,
+        start_pref_e=0.0,
+        limit_pref_e=0.0,
+        start_pref_f=0.0,
+        limit_pref_f=0.0,
+        start_pref_v=0.0,
+        limit_pref_v=0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        start_pref_pf: float = 0.0,
+        limit_pref_pf: float = 0.0,
+        relative_f: Optional[float] = None,
+        enable_atom_ener_coeff: bool = False,
+        start_pref_gf: float = 0.0,
+        limit_pref_gf: float = 0.0,
+        numb_generalized_coord: int = 0,
+        use_l1_all: bool = False,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a layer to compute loss on energy, force and virial.
+
+        Parameters
+        ----------
+        starter_learning_rate : float
+            The learning rate at the start of the training.
+        start_pref_e : float
+            The prefactor of energy loss at the start of the training.
+        limit_pref_e : float
+            The prefactor of energy loss at the end of the training.
+        start_pref_f : float
+            The prefactor of force loss at the start of the training.
+        limit_pref_f : float
+            The prefactor of force loss at the end of the training.
+        start_pref_v : float
+            The prefactor of virial loss at the start of the training.
+        limit_pref_v : float
+            The prefactor of virial loss at the end of the training.
+        start_pref_ae : float
+            The prefactor of atomic energy loss at the start of the training.
+        limit_pref_ae : float
+            The prefactor of atomic energy loss at the end of the training.
+        start_pref_pf : float
+            The prefactor of atomic prefactor force loss at the start of the training.
+        limit_pref_pf : float
+            The prefactor of atomic prefactor force loss at the end of the training.
+        relative_f : float
+            If provided, relative force error will be used in the loss. The difference
+            of force will be normalized by the magnitude of the force in the label with
+            a shift given by relative_f
+        enable_atom_ener_coeff : bool
+            if true, the energy will be computed as \sum_i c_i E_i
+        start_pref_gf : float
+            The prefactor of generalized force loss at the start of the training.
+        limit_pref_gf : float
+            The prefactor of generalized force loss at the end of the training.
+        numb_generalized_coord : int
+            The dimension of generalized coordinates.
+        use_l1_all : bool
+            Whether to use L1 loss, if False (default), it will use L2 loss.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference
+        self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference
+        self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
+        self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
+        self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
+        self.has_gf = start_pref_gf != 0.0 and limit_pref_gf != 0.0
+
+        self.start_pref_e = start_pref_e
+        self.limit_pref_e = limit_pref_e
+        self.start_pref_f = start_pref_f
+        self.limit_pref_f = limit_pref_f
+        self.start_pref_v = start_pref_v
+        self.limit_pref_v = limit_pref_v
+        self.start_pref_ae = start_pref_ae
+        self.limit_pref_ae = limit_pref_ae
+        self.start_pref_pf = start_pref_pf
+        self.limit_pref_pf = limit_pref_pf
+        self.start_pref_gf = start_pref_gf
+        self.limit_pref_gf = limit_pref_gf
+        self.relative_f = relative_f
+        self.enable_atom_ener_coeff = enable_atom_ener_coeff
+        self.numb_generalized_coord = numb_generalized_coord
+        if self.has_gf and self.numb_generalized_coord < 1:
+            raise RuntimeError(
+                "When generalized force loss is used, the dimension of generalized coordinates should be larger than 0"
+            )
+        self.use_l1_all = use_l1_all
+        self.inference = inference
+
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
+        """Return loss on energy and force.
+
+        Parameters
+        ----------
+        input_dict : dict[str, paddle.Tensor]
+            Model inputs.
+        model : paddle.nn.Layer
+            Model to be used to output the predictions.
+        label : dict[str, paddle.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, paddle.Tensor]
+            Model predictions.
+        loss: paddle.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, paddle.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        coef = learning_rate / self.starter_learning_rate
+        pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
+        pref_f = self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * coef
+        pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef
+        pref_ae = self.limit_pref_ae + (self.start_pref_ae - self.limit_pref_ae) * coef
+        pref_pf = self.limit_pref_pf + (self.start_pref_pf - self.limit_pref_pf) * coef
+        pref_gf = self.limit_pref_gf + (self.start_pref_gf - self.limit_pref_gf) * coef
+
+        loss = paddle.zeros([1], dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(env.DEVICE)[0]
+        more_loss = {}
+        # more_loss['log_keys'] = []  # showed when validation on the fly
+        # more_loss['test_keys'] = []  # showed when doing dp test
+        atom_norm = 1.0 / natoms
+        if self.has_e and "energy" in model_pred and "energy" in label:
+            energy_pred = model_pred["energy"]
+            energy_label = label["energy"]
+            if self.enable_atom_ener_coeff and "atom_energy" in model_pred:
+                atom_ener_pred = model_pred["atom_energy"]
+                # when ener_coeff (\nu) is defined, the energy is defined as
+                # E = \sum_i \nu_i E_i
+                # instead of the sum of atomic energies.
+                #
+                # A case is that we want to train reaction energy
+                # A + B -> C + D
+                # E = - E(A) - E(B) + E(C) + E(D)
+                # A, B, C, D could be put far away from each other
+                atom_ener_coeff = label["atom_ener_coeff"]
+                atom_ener_coeff = atom_ener_coeff.reshape(atom_ener_pred.shape)
+                energy_pred = paddle.sum(atom_ener_coeff * atom_ener_pred, axis=1)
+            find_energy = label.get("find_energy", 0.0)
+            pref_e = pref_e * find_energy
+            if not self.use_l1_all:
+                l2_ener_loss = paddle.mean(paddle.square(energy_pred - energy_label))
+                if not self.inference:
+                    more_loss["l2_ener_loss"] = self.display_if_exist(
+                        l2_ener_loss.detach(), find_energy
+                    )
+                loss += atom_norm * (pref_e * l2_ener_loss)
+                rmse_e = l2_ener_loss.sqrt() * atom_norm
+                more_loss["rmse_e"] = self.display_if_exist(
+                    rmse_e.detach(), find_energy
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            else:  # use l1 and for all atoms
+                l1_ener_loss = F.l1_loss(
+                    energy_pred.reshape([-1]),
+                    energy_label.reshape([-1]),
+                    reduction="sum",
+                )
+                loss += pref_e * l1_ener_loss
+                more_loss["mae_e"] = self.display_if_exist(
+                    F.l1_loss(
+                        energy_pred.reshape([-1]),
+                        energy_label.reshape([-1]),
+                        reduction="mean",
+                    ).detach(),
+                    find_energy,
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            if mae:
+                mae_e = paddle.mean(paddle.abs(energy_pred - energy_label)) * atom_norm
+                more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy)
+                mae_e_all = paddle.mean(paddle.abs(energy_pred - energy_label))
+                more_loss["mae_e_all"] = self.display_if_exist(
+                    mae_e_all.detach(), find_energy
+                )
+
+        if (
+            (self.has_f or self.has_pf or self.relative_f or self.has_gf)
+            and "force" in model_pred
+            and "force" in label
+        ):
+            find_force = label.get("find_force", 0.0)
+            pref_f = pref_f * find_force
+            force_pred = model_pred["force"]
+            force_label = label["force"]
+            diff_f = (force_label - force_pred).reshape([-1])
+
+            if self.relative_f is not None:
+                force_label_3 = force_label.reshape([-1, 3])
+                # norm_f = force_label_3.norm(axis=1, keepdim=True) + self.relative_f
+                norm_f = (
+                    decomp.norm(force_label_3, axis=1, keepdim=True) + self.relative_f
+                )
+                diff_f_3 = diff_f.reshape([-1, 3])
+                diff_f_3 = diff_f_3 / norm_f
+                diff_f = diff_f_3.reshape([-1])
+
+            if self.has_f:
+                if not self.use_l1_all:
+                    l2_force_loss = paddle.mean(paddle.square(diff_f))
+                    if not self.inference:
+                        more_loss["l2_force_loss"] = self.display_if_exist(
+                            l2_force_loss.detach(), find_force
+                        )
+                    loss += (pref_f * l2_force_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                    rmse_f = l2_force_loss.sqrt()
+                    more_loss["rmse_f"] = self.display_if_exist(
+                        rmse_f.detach(), find_force
+                    )
+                else:
+                    l1_force_loss = F.l1_loss(force_label, force_pred, reduction="none")
+                    more_loss["mae_f"] = self.display_if_exist(
+                        l1_force_loss.mean().detach(), find_force
+                    )
+                    l1_force_loss = l1_force_loss.sum(-1).mean(-1).sum()
+                    loss += (pref_f * l1_force_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                if mae:
+                    mae_f = paddle.mean(paddle.abs(diff_f))
+                    more_loss["mae_f"] = self.display_if_exist(
+                        mae_f.detach(), find_force
+                    )
+
+            if self.has_pf and "atom_pref" in label:
+                atom_pref = label["atom_pref"]
+                find_atom_pref = label.get("find_atom_pref", 0.0)
+                pref_pf = pref_pf * find_atom_pref
+                atom_pref_reshape = atom_pref.reshape([-1])
+                l2_pref_force_loss = (paddle.square(diff_f) * atom_pref_reshape).mean()
+                if not self.inference:
+                    more_loss["l2_pref_force_loss"] = self.display_if_exist(
+                        l2_pref_force_loss.detach(), find_atom_pref
+                    )
+                loss += (pref_pf * l2_pref_force_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                rmse_pf = l2_pref_force_loss.sqrt()
+                more_loss["rmse_pf"] = self.display_if_exist(
+                    rmse_pf.detach(), find_atom_pref
+                )
+
+            if self.has_gf and "drdq" in label:
+                drdq = label["drdq"]
+                find_drdq = label.get("find_drdq", 0.0)
+                pref_gf = pref_gf * find_drdq
+                force_reshape_nframes = force_pred.reshape([-1, natoms * 3])
+                force_label_reshape_nframes = force_label.reshape([-1, natoms * 3])
+                drdq_reshape = drdq.reshape(
+                    [-1, natoms * 3, self.numb_generalized_coord]
+                )
+
+                # gen_force_label = paddle.einsum(
+                #     "bij,bi->bj", drdq_reshape, force_label_reshape_nframes
+                # )
+                gen_force_label = (
+                    drdq_reshape * force_label_reshape_nframes.unsqueeze(-1)
+                ).sum([-2])
+
+                # gen_force = paddle.einsum(
+                #     "bij,bi->bj", drdq_reshape, force_reshape_nframes
+                # )
+                gen_force = (drdq_reshape * force_reshape_nframes.unsqueeze(-1)).sum(
+                    [-2]
+                )
+
+                diff_gen_force = gen_force_label - gen_force
+                l2_gen_force_loss = paddle.square(diff_gen_force).mean()
+                if not self.inference:
+                    more_loss["l2_gen_force_loss"] = self.display_if_exist(
+                        l2_gen_force_loss.detach(), find_drdq
+                    )
+                loss += (pref_gf * l2_gen_force_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                rmse_gf = l2_gen_force_loss.sqrt()
+                more_loss["rmse_gf"] = self.display_if_exist(
+                    rmse_gf.detach(), find_drdq
+                )
+
+        if self.has_v and "virial" in model_pred and "virial" in label:
+            find_virial = label.get("find_virial", 0.0)
+            pref_v = pref_v * find_virial
+            diff_v = label["virial"] - model_pred["virial"].reshape([-1, 9])
+            l2_virial_loss = paddle.mean(paddle.square(diff_v))
+            if not self.inference:
+                more_loss["l2_virial_loss"] = self.display_if_exist(
+                    l2_virial_loss.detach(), find_virial
+                )
+            loss += atom_norm * (pref_v * l2_virial_loss)
+            rmse_v = l2_virial_loss.sqrt() * atom_norm
+            more_loss["rmse_v"] = self.display_if_exist(rmse_v.detach(), find_virial)
+            if mae:
+                mae_v = paddle.mean(paddle.abs(diff_v)) * atom_norm
+                more_loss["mae_v"] = self.display_if_exist(mae_v.detach(), find_virial)
+
+        if self.has_ae and "atom_energy" in model_pred and "atom_ener" in label:
+            atom_ener = model_pred["atom_energy"]
+            atom_ener_label = label["atom_ener"]
+            find_atom_ener = label.get("find_atom_ener", 0.0)
+            pref_ae = pref_ae * find_atom_ener
+            atom_ener_reshape = atom_ener.reshape([-1])
+            atom_ener_label_reshape = atom_ener_label.reshape([-1])
+            l2_atom_ener_loss = paddle.square(
+                atom_ener_label_reshape - atom_ener_reshape
+            ).mean()
+            if not self.inference:
+                more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+                    l2_atom_ener_loss.detach(), find_atom_ener
+                )
+            loss += (pref_ae * l2_atom_ener_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+            rmse_ae = l2_atom_ener_loss.sqrt()
+            more_loss["rmse_ae"] = self.display_if_exist(
+                rmse_ae.detach(), find_atom_ener
+            )
+
+        if not self.inference:
+            more_loss["rmse"] = paddle.sqrt(loss.detach())
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_e:
+            label_requirement.append(
+                DataRequirementItem(
+                    "energy",
+                    ndof=1,
+                    atomic=False,
+                    must=False,
+                    high_prec=True,
+                )
+            )
+        if self.has_f:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_v:
+            label_requirement.append(
+                DataRequirementItem(
+                    "virial",
+                    ndof=9,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_ae:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_pf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_pref",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                    repeat=3,
+                )
+            )
+        if self.has_gf > 0:
+            label_requirement.append(
+                DataRequirementItem(
+                    "drdq",
+                    ndof=self.numb_generalized_coord * 3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.enable_atom_ener_coeff:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener_coeff",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                    default=1.0,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pd/loss/ener_spin.py b/deepmd/pd/loss/ener_spin.py
new file mode 100644
index 0000000000..4722ee6b84
--- /dev/null
+++ b/deepmd/pd/loss/ener_spin.py
@@ -0,0 +1,329 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+import paddle.nn.functional as F
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    GLOBAL_PD_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class EnergySpinLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate=1.0,
+        start_pref_e=0.0,
+        limit_pref_e=0.0,
+        start_pref_fr=0.0,
+        limit_pref_fr=0.0,
+        start_pref_fm=0.0,
+        limit_pref_fm=0.0,
+        start_pref_v=0.0,
+        limit_pref_v=0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        enable_atom_ener_coeff: bool = False,
+        use_l1_all: bool = False,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a layer to compute loss on energy, real force, magnetic force and virial.
+
+        Parameters
+        ----------
+        starter_learning_rate : float
+            The learning rate at the start of the training.
+        start_pref_e : float
+            The prefactor of energy loss at the start of the training.
+        limit_pref_e : float
+            The prefactor of energy loss at the end of the training.
+        start_pref_fr : float
+            The prefactor of real force loss at the start of the training.
+        limit_pref_fr : float
+            The prefactor of real force loss at the end of the training.
+        start_pref_fm : float
+            The prefactor of magnetic force loss at the start of the training.
+        limit_pref_fm : float
+            The prefactor of magnetic force loss at the end of the training.
+        start_pref_v : float
+            The prefactor of virial loss at the start of the training.
+        limit_pref_v : float
+            The prefactor of virial loss at the end of the training.
+        start_pref_ae : float
+            The prefactor of atomic energy loss at the start of the training.
+        limit_pref_ae : float
+            The prefactor of atomic energy loss at the end of the training.
+        enable_atom_ener_coeff : bool
+            if true, the energy will be computed as \sum_i c_i E_i
+        use_l1_all : bool
+            Whether to use L1 loss, if False (default), it will use L2 loss.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference
+        self.has_fr = (start_pref_fr != 0.0 and limit_pref_fr != 0.0) or inference
+        self.has_fm = (start_pref_fm != 0.0 and limit_pref_fm != 0.0) or inference
+        self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
+        self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
+
+        self.start_pref_e = start_pref_e
+        self.limit_pref_e = limit_pref_e
+        self.start_pref_fr = start_pref_fr
+        self.limit_pref_fr = limit_pref_fr
+        self.start_pref_fm = start_pref_fm
+        self.limit_pref_fm = limit_pref_fm
+        self.start_pref_v = start_pref_v
+        self.limit_pref_v = limit_pref_v
+        self.start_pref_ae = start_pref_ae
+        self.limit_pref_ae = limit_pref_ae
+        self.enable_atom_ener_coeff = enable_atom_ener_coeff
+        self.use_l1_all = use_l1_all
+        self.inference = inference
+
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
+        """Return energy loss with magnetic labels.
+
+        Parameters
+        ----------
+        input_dict : dict[str, paddle.Tensor]
+            Model inputs.
+        model : paddle.nn.Layer
+            Model to be used to output the predictions.
+        label : dict[str, paddle.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, paddle.Tensor]
+            Model predictions.
+        loss: paddle.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, paddle.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        coef = learning_rate / self.starter_learning_rate
+        pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
+        pref_fr = self.limit_pref_fr + (self.start_pref_fr - self.limit_pref_fr) * coef
+        pref_fm = self.limit_pref_fm + (self.start_pref_fm - self.limit_pref_fm) * coef
+        pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef
+        pref_ae = self.limit_pref_ae + (self.start_pref_ae - self.limit_pref_ae) * coef
+        loss = paddle.to_tensor(0.0, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(env.DEVICE)
+        more_loss = {}
+        # more_loss['log_keys'] = []  # showed when validation on the fly
+        # more_loss['test_keys'] = []  # showed when doing dp test
+        atom_norm = 1.0 / natoms
+        if self.has_e and "energy" in model_pred and "energy" in label:
+            energy_pred = model_pred["energy"]
+            energy_label = label["energy"]
+            if self.enable_atom_ener_coeff and "atom_energy" in model_pred:
+                atom_ener_pred = model_pred["atom_energy"]
+                # when ener_coeff (\nu) is defined, the energy is defined as
+                # E = \sum_i \nu_i E_i
+                # instead of the sum of atomic energies.
+                #
+                # A case is that we want to train reaction energy
+                # A + B -> C + D
+                # E = - E(A) - E(B) + E(C) + E(D)
+                # A, B, C, D could be put far away from each other
+                atom_ener_coeff = label["atom_ener_coeff"]
+                atom_ener_coeff = atom_ener_coeff.reshape(atom_ener_pred.shape)
+                energy_pred = paddle.sum(atom_ener_coeff * atom_ener_pred, axis=1)
+            find_energy = label.get("find_energy", 0.0)
+            pref_e = pref_e * find_energy
+            if not self.use_l1_all:
+                l2_ener_loss = paddle.mean(paddle.square(energy_pred - energy_label))
+                if not self.inference:
+                    more_loss["l2_ener_loss"] = self.display_if_exist(
+                        l2_ener_loss.detach(), find_energy
+                    )
+                loss += atom_norm * (pref_e * l2_ener_loss)
+                rmse_e = l2_ener_loss.sqrt() * atom_norm
+                more_loss["rmse_e"] = self.display_if_exist(
+                    rmse_e.detach(), find_energy
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            else:  # use l1 and for all atoms
+                l1_ener_loss = F.l1_loss(
+                    energy_pred.reshape([-1]),
+                    energy_label.reshape([-1]),
+                    reduction="sum",
+                )
+                loss += pref_e * l1_ener_loss
+                more_loss["mae_e"] = self.display_if_exist(
+                    F.l1_loss(
+                        energy_pred.reshape([-1]),
+                        energy_label.reshape([-1]),
+                        reduction="mean",
+                    ).detach(),
+                    find_energy,
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            if mae:
+                mae_e = paddle.mean(paddle.abs(energy_pred - energy_label)) * atom_norm
+                more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy)
+                mae_e_all = paddle.mean(paddle.abs(energy_pred - energy_label))
+                more_loss["mae_e_all"] = self.display_if_exist(
+                    mae_e_all.detach(), find_energy
+                )
+
+        if self.has_fr and "force" in model_pred and "force" in label:
+            find_force_r = label.get("find_force", 0.0)
+            pref_fr = pref_fr * find_force_r
+            if not self.use_l1_all:
+                diff_fr = label["force"] - model_pred["force"]
+                l2_force_real_loss = paddle.mean(paddle.square(diff_fr))
+                if not self.inference:
+                    more_loss["l2_force_r_loss"] = self.display_if_exist(
+                        l2_force_real_loss.detach(), find_force_r
+                    )
+                loss += (pref_fr * l2_force_real_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                rmse_fr = l2_force_real_loss.sqrt()
+                more_loss["rmse_fr"] = self.display_if_exist(
+                    rmse_fr.detach(), find_force_r
+                )
+                if mae:
+                    mae_fr = paddle.mean(paddle.abs(diff_fr))
+                    more_loss["mae_fr"] = self.display_if_exist(
+                        mae_fr.detach(), find_force_r
+                    )
+            else:
+                l1_force_real_loss = F.l1_loss(
+                    label["force"], model_pred["force"], reduction="none"
+                )
+                more_loss["mae_fr"] = self.display_if_exist(
+                    l1_force_real_loss.mean().detach(), find_force_r
+                )
+                l1_force_real_loss = l1_force_real_loss.sum(-1).mean(-1).sum()
+                loss += (pref_fr * l1_force_real_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+
+        if self.has_fm and "force_mag" in model_pred and "force_mag" in label:
+            find_force_m = label.get("find_force_mag", 0.0)
+            pref_fm = pref_fm * find_force_m
+            nframes = model_pred["force_mag"].shape[0]
+            atomic_mask = model_pred["mask_mag"].expand([-1, -1, 3])
+            label_force_mag = label["force_mag"][atomic_mask].reshape([nframes, -1, 3])
+            model_pred_force_mag = model_pred["force_mag"][atomic_mask].reshape(
+                [nframes, -1, 3]
+            )
+            if not self.use_l1_all:
+                diff_fm = label_force_mag - model_pred_force_mag
+                l2_force_mag_loss = paddle.mean(paddle.square(diff_fm))
+                if not self.inference:
+                    more_loss["l2_force_m_loss"] = self.display_if_exist(
+                        l2_force_mag_loss.detach(), find_force_m
+                    )
+                loss += (pref_fm * l2_force_mag_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+                rmse_fm = l2_force_mag_loss.sqrt()
+                more_loss["rmse_fm"] = self.display_if_exist(
+                    rmse_fm.detach(), find_force_m
+                )
+                if mae:
+                    mae_fm = paddle.mean(paddle.abs(diff_fm))
+                    more_loss["mae_fm"] = self.display_if_exist(
+                        mae_fm.detach(), find_force_m
+                    )
+            else:
+                l1_force_mag_loss = F.l1_loss(
+                    label_force_mag, model_pred_force_mag, reduction="none"
+                )
+                more_loss["mae_fm"] = self.display_if_exist(
+                    l1_force_mag_loss.mean().detach(), find_force_m
+                )
+                l1_force_mag_loss = l1_force_mag_loss.sum(-1).mean(-1).sum()
+                loss += (pref_fm * l1_force_mag_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+
+        if self.has_ae and "atom_energy" in model_pred and "atom_ener" in label:
+            atom_ener = model_pred["atom_energy"]
+            atom_ener_label = label["atom_ener"]
+            find_atom_ener = label.get("find_atom_ener", 0.0)
+            pref_ae = pref_ae * find_atom_ener
+            atom_ener_reshape = atom_ener.reshape([-1])
+            atom_ener_label_reshape = atom_ener_label.reshape([-1])
+            l2_atom_ener_loss = paddle.square(
+                atom_ener_label_reshape - atom_ener_reshape
+            ).mean()
+            if not self.inference:
+                more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+                    l2_atom_ener_loss.detach(), find_atom_ener
+                )
+            loss += (pref_ae * l2_atom_ener_loss).to(GLOBAL_PD_FLOAT_PRECISION)
+            rmse_ae = l2_atom_ener_loss.sqrt()
+            more_loss["rmse_ae"] = self.display_if_exist(
+                rmse_ae.detach(), find_atom_ener
+            )
+
+        if not self.inference:
+            more_loss["rmse"] = paddle.sqrt(loss.detach())
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_e:
+            label_requirement.append(
+                DataRequirementItem(
+                    "energy",
+                    ndof=1,
+                    atomic=False,
+                    must=False,
+                    high_prec=True,
+                )
+            )
+        if self.has_fr:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_fm:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force_mag",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_v:
+            label_requirement.append(
+                DataRequirementItem(
+                    "virial",
+                    ndof=9,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_ae:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pd/loss/loss.py b/deepmd/pd/loss/loss.py
new file mode 100644
index 0000000000..c083996720
--- /dev/null
+++ b/deepmd/pd/loss/loss.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+
+import paddle
+
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class TaskLoss(paddle.nn.Layer, ABC):
+    def __init__(self, **kwargs):
+        """Construct loss."""
+        super().__init__()
+
+    def forward(self, input_dict, model, label, natoms, learning_rate):
+        """Return loss ."""
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        pass
+
+    @staticmethod
+    def display_if_exist(loss: paddle.Tensor, find_property: float) -> paddle.Tensor:
+        """Display NaN if labeled property is not found.
+
+        Parameters
+        ----------
+        loss : paddle.Tensor
+            the loss tensor
+        find_property : float
+            whether the property is found
+        """
+        return loss if bool(find_property) else paddle.to_tensor(float("nan"))
diff --git a/deepmd/pd/loss/property.py b/deepmd/pd/loss/property.py
new file mode 100644
index 0000000000..0c2c561569
--- /dev/null
+++ b/deepmd/pd/loss/property.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+
+import paddle
+import paddle.nn.functional as F
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+log = logging.getLogger(__name__)
+
+
+class PropertyLoss(TaskLoss):
+    def __init__(
+        self,
+        task_dim,
+        loss_func: str = "smooth_mae",
+        metric: list = ["mae"],
+        beta: float = 1.00,
+        **kwargs,
+    ):
+        r"""Construct a layer to compute loss on property.
+
+        Parameters
+        ----------
+        task_dim : float
+            The output dimension of property fitting net.
+        loss_func : str
+            The loss function, such as "smooth_mae", "mae", "rmse".
+        metric : list
+            The metric such as mae, rmse which will be printed.
+        beta:
+            The 'beta' parameter in 'smooth_mae' loss.
+        """
+        super().__init__()
+        self.task_dim = task_dim
+        self.loss_func = loss_func
+        self.metric = metric
+        self.beta = beta
+
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
+        """Return loss on properties .
+
+        Parameters
+        ----------
+        input_dict : dict[str, paddle.Tensor]
+            Model inputs.
+        model : paddle.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, paddle.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, paddle.Tensor]
+            Model predictions.
+        loss: paddle.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, paddle.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        assert label["property"].shape[-1] == self.task_dim
+        assert model_pred["property"].shape[-1] == self.task_dim
+        loss = paddle.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[
+            0
+        ]
+        more_loss = {}
+
+        # loss
+        if self.loss_func == "smooth_mae":
+            loss += F.smooth_l1_loss(
+                label["property"],
+                model_pred["property"],
+                reduction="sum",
+                beta=self.beta,
+            )
+        elif self.loss_func == "mae":
+            loss += F.l1_loss(
+                label["property"], model_pred["property"], reduction="sum"
+            )
+        elif self.loss_func == "mse":
+            loss += F.mse_loss(
+                label["property"],
+                model_pred["property"],
+                reduction="sum",
+            )
+        elif self.loss_func == "rmse":
+            loss += paddle.sqrt(
+                F.mse_loss(
+                    label["property"],
+                    model_pred["property"],
+                    reduction="mean",
+                )
+            )
+        else:
+            raise RuntimeError(f"Unknown loss function : {self.loss_func}")
+
+        # more loss
+        if "smooth_mae" in self.metric:
+            more_loss["smooth_mae"] = F.smooth_l1_loss(
+                label["property"],
+                model_pred["property"],
+                reduction="mean",
+                beta=self.beta,
+            ).detach()
+        if "mae" in self.metric:
+            more_loss["mae"] = F.l1_loss(
+                label["property"],
+                model_pred["property"],
+                reduction="mean",
+            ).detach()
+        if "mse" in self.metric:
+            more_loss["mse"] = F.mse_loss(
+                label["property"],
+                model_pred["property"],
+                reduction="mean",
+            ).detach()
+        if "rmse" in self.metric:
+            more_loss["rmse"] = paddle.sqrt(
+                F.mse_loss(
+                    label["property"],
+                    model_pred["property"],
+                    reduction="mean",
+                )
+            ).detach()
+
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        label_requirement.append(
+            DataRequirementItem(
+                "property",
+                ndof=self.task_dim,
+                atomic=False,
+                must=False,
+                high_prec=True,
+            )
+        )
+        return label_requirement
diff --git a/deepmd/pd/loss/tensor.py b/deepmd/pd/loss/tensor.py
new file mode 100644
index 0000000000..b549a0be8e
--- /dev/null
+++ b/deepmd/pd/loss/tensor.py
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class TensorLoss(TaskLoss):
+    def __init__(
+        self,
+        tensor_name: str,
+        tensor_size: int,
+        label_name: str,
+        pref_atomic: float = 0.0,
+        pref: float = 0.0,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a loss for local and global tensors.
+
+        Parameters
+        ----------
+        tensor_name : str
+            The name of the tensor in the model predictions to compute the loss.
+        tensor_size : int
+            The size (dimension) of the tensor.
+        label_name : str
+            The name of the tensor in the labels to compute the loss.
+        pref_atomic : float
+            The prefactor of the weight of atomic loss. It should be larger than or equal to 0.
+        pref : float
+            The prefactor of the weight of global loss. It should be larger than or equal to 0.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.tensor_name = tensor_name
+        self.tensor_size = tensor_size
+        self.label_name = label_name
+        self.local_weight = pref_atomic
+        self.global_weight = pref
+        self.inference = inference
+
+        assert (
+            self.local_weight >= 0.0 and self.global_weight >= 0.0
+        ), "Can not assign negative weight to `pref` and `pref_atomic`"
+        self.has_local_weight = self.local_weight > 0.0 or inference
+        self.has_global_weight = self.global_weight > 0.0 or inference
+        assert self.has_local_weight or self.has_global_weight, AssertionError(
+            "Can not assian zero weight both to `pref` and `pref_atomic`"
+        )
+
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
+        """Return loss on local and global tensors.
+
+        Parameters
+        ----------
+        input_dict : dict[str, paddle.Tensor]
+            Model inputs.
+        model : paddle.nn.Layer
+            Model to be used to output the predictions.
+        label : dict[str, paddle.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, paddle.Tensor]
+            Model predictions.
+        loss: paddle.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, paddle.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        del learning_rate, mae
+        loss = paddle.zeros([1], dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(env.DEVICE)[0]
+        more_loss = {}
+        if (
+            self.has_local_weight
+            and self.tensor_name in model_pred
+            and "atom_" + self.label_name in label
+        ):
+            find_local = label.get("find_" + "atom_" + self.label_name, 0.0)
+            local_weight = self.local_weight * find_local
+            local_tensor_pred = model_pred[self.tensor_name].reshape(
+                [-1, natoms, self.tensor_size]
+            )
+            local_tensor_label = label["atom_" + self.label_name].reshape(
+                [-1, natoms, self.tensor_size]
+            )
+            diff = (local_tensor_pred - local_tensor_label).reshape(
+                [-1, self.tensor_size]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).astype("bool")]
+            l2_local_loss = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss[f"l2_local_{self.tensor_name}_loss"] = self.display_if_exist(
+                    l2_local_loss.detach(), find_local
+                )
+            loss += local_weight * l2_local_loss
+            rmse_local = l2_local_loss.sqrt()
+            more_loss[f"rmse_local_{self.tensor_name}"] = self.display_if_exist(
+                rmse_local.detach(), find_local
+            )
+        if (
+            self.has_global_weight
+            and "global_" + self.tensor_name in model_pred
+            and self.label_name in label
+        ):
+            find_global = label.get("find_" + self.label_name, 0.0)
+            global_weight = self.global_weight * find_global
+            global_tensor_pred = model_pred["global_" + self.tensor_name].reshape(
+                [-1, self.tensor_size]
+            )
+            global_tensor_label = label[self.label_name].reshape([-1, self.tensor_size])
+            diff = global_tensor_pred - global_tensor_label
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss = paddle.mean(
+                    paddle.sum(
+                        paddle.square(diff) * atom_num.astype(diff.dtype), axis=0
+                    )
+                    / (atom_num.sum().astype(diff.dtype))
+                )
+                atom_num = paddle.mean(atom_num.astype(diff.dtype))
+            else:
+                atom_num = natoms
+                l2_global_loss = paddle.mean(paddle.square(diff))
+            if not self.inference:
+                more_loss[f"l2_global_{self.tensor_name}_loss"] = self.display_if_exist(
+                    l2_global_loss.detach(), find_global
+                )
+            loss += global_weight * l2_global_loss
+            rmse_global = l2_global_loss.sqrt() / atom_num
+            more_loss[f"rmse_global_{self.tensor_name}"] = self.display_if_exist(
+                rmse_global.detach(), find_global
+            )
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_local_weight:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atomic_" + self.label_name,
+                    ndof=self.tensor_size,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_global_weight:
+            label_requirement.append(
+                DataRequirementItem(
+                    self.label_name,
+                    ndof=self.tensor_size,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pd/model/__init__.py b/deepmd/pd/model/__init__.py
new file mode 100644
index 0000000000..171d147114
--- /dev/null
+++ b/deepmd/pd/model/__init__.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.utils.entry_point import (
+    load_entry_point,
+)
+
+load_entry_point("deepmd.pd")
diff --git a/deepmd/pd/model/atomic_model/__init__.py b/deepmd/pd/model/atomic_model/__init__.py
new file mode 100644
index 0000000000..2aa4b1cdb2
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/__init__.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The atomic model provides the prediction of some property on each
+atom.  All the atomic models are not supposed to be directly accessed
+by users, but it provides a convenient interface for the
+implementation of models.
+
+Taking the energy models for example, the developeres only needs to
+implement the atomic energy prediction via an atomic model, and the
+model can be automatically made by the `deepmd.dpmodel.make_model`
+method. The `DPModel` is made by
+```
+DPModel = make_model(DPAtomicModel)
+```
+
+"""
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dipole_atomic_model import (
+    DPDipoleAtomicModel,
+)
+from .dos_atomic_model import (
+    DPDOSAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .energy_atomic_model import (
+    DPEnergyAtomicModel,
+)
+from .linear_atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+    LinearEnergyAtomicModel,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+from .polar_atomic_model import (
+    DPPolarAtomicModel,
+)
+from .property_atomic_model import (
+    DPPropertyAtomicModel,
+)
+
+__all__ = [
+    "BaseAtomicModel",
+    "DPAtomicModel",
+    "DPDOSAtomicModel",
+    "DPEnergyAtomicModel",
+    "DPPropertyAtomicModel",
+    "PairTabAtomicModel",
+    "LinearEnergyAtomicModel",
+    "DPPolarAtomicModel",
+    "DPDipoleAtomicModel",
+    "DPZBLLinearEnergyAtomicModel",
+]
diff --git a/deepmd/pd/model/atomic_model/base_atomic_model.py b/deepmd/pd/model/atomic_model/base_atomic_model.py
new file mode 100644
index 0000000000..44553482c6
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/base_atomic_model.py
@@ -0,0 +1,578 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import copy
+import logging
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.atomic_model import (
+    make_base_atomic_model,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.utils import (
+    AtomExcludeMask,
+    PairExcludeMask,
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pd.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_atom_exclude_types,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+BaseAtomicModel_ = make_base_atomic_model(paddle.Tensor)
+
+
+class BaseAtomicModel(paddle.nn.Layer, BaseAtomicModel_):
+    """The base of atomic model.
+
+    Parameters
+    ----------
+    type_map
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    atom_exclude_types
+        Exclude the atomic contribution of the given types
+    pair_exclude_types
+        Exclude the pair of atoms of the given types from computing the output
+        of the atomic model. Implemented by removing the pairs from the nlist.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    preset_out_bias : Dict[str, list[Optional[paddle.Tensor]]], optional
+        Specifying atomic energy contribution in vacuum. Given by key:value pairs.
+        The value is a list specifying the bias. the elements can be None or np.array of output shape.
+        For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
+        The `set_davg_zero` key in the descrptor should be set.
+
+    """
+
+    def __init__(
+        self,
+        type_map: list[str],
+        atom_exclude_types: list[int] = [],
+        pair_exclude_types: list[tuple[int, int]] = [],
+        rcond: Optional[float] = None,
+        preset_out_bias: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        paddle.nn.Layer.__init__(self)
+        BaseAtomicModel_.__init__(self)
+        self.type_map = type_map
+        self.reinit_atom_exclude(atom_exclude_types)
+        self.reinit_pair_exclude(pair_exclude_types)
+        self.rcond = rcond
+        self.preset_out_bias = preset_out_bias
+
+    def init_out_stat(self):
+        """Initialize the output bias."""
+        ntypes = self.get_ntypes()
+        self.bias_keys: list[str] = list(self.fitting_output_def().keys())
+        self.max_out_size = max(
+            [self.atomic_output_def()[kk].size for kk in self.bias_keys]
+        )
+        self.n_out = len(self.bias_keys)
+        out_bias_data = self._default_bias()
+        out_std_data = self._default_std()
+        self.register_buffer("out_bias", out_bias_data)
+        self.register_buffer("out_std", out_std_data)
+
+    def set_out_bias(self, out_bias: paddle.Tensor) -> None:
+        self.out_bias = out_bias
+
+    def __setitem__(self, key, value):
+        if key in ["out_bias"]:
+            self.out_bias = value
+        elif key in ["out_std"]:
+            self.out_std = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["out_bias"]:
+            return self.out_bias
+        elif key in ["out_std"]:
+            return self.out_std
+        else:
+            raise KeyError(key)
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def reinit_atom_exclude(
+        self,
+        exclude_types: list[int] = [],
+    ):
+        self.atom_exclude_types = exclude_types
+        if exclude_types == []:
+            self.atom_excl = None
+        else:
+            self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types)
+
+    def reinit_pair_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.pair_exclude_types = exclude_types
+        if exclude_types == []:
+            self.pair_excl = None
+        else:
+            self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types)
+
+    # to make jit happy...
+    def make_atom_mask(
+        self,
+        atype: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """The atoms with type < 0 are treated as virutal atoms,
+        which serves as place-holders for multi-frame calculations
+        with different number of atoms in different frames.
+
+        Parameters
+        ----------
+        atype
+            Atom types. >= 0 for real atoms <0 for virtual atoms.
+
+        Returns
+        -------
+        mask
+            True for real atoms and False for virutal atoms.
+
+        """
+        # supposed to be supported by all backends
+        return atype >= 0
+
+    def atomic_output_def(self) -> FittingOutputDef:
+        old_def = self.fitting_output_def()
+        old_list = list(old_def.get_data().values())
+        return FittingOutputDef(
+            old_list  # noqa:RUF005
+            + [
+                OutputVariableDef(
+                    name="mask",
+                    shape=[1],
+                    reducible=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                )
+            ]
+        )
+
+    def forward_common_atomic(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ) -> dict[str, paddle.Tensor]:
+        """Common interface for atomic inference.
+
+        This method accept extended coordinates, extended atom typs, neighbor list,
+        and predict the atomic contribution of the fit property.
+
+        Parameters
+        ----------
+        extended_coord
+            extended coodinates, shape: nf x (nall x 3)
+        extended_atype
+            extended atom typs, shape: nf x nall
+            for a type < 0 indicating the atomic is virtual.
+        nlist
+            neighbor list, shape: nf x nloc x nsel
+        mapping
+            extended to local index mapping, shape: nf x nall
+        fparam
+            frame parameters, shape: nf x dim_fparam
+        aparam
+            atomic parameter, shape: nf x nloc x dim_aparam
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        ret_dict
+            dict of output atomic properties.
+            should implement the definition of `fitting_output_def`.
+            ret_dict["mask"] of shape nf x nloc will be provided.
+            ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real.
+            ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual.
+
+        """
+        _, nloc, _ = nlist.shape
+        atype = extended_atype[:, :nloc]
+
+        if self.pair_excl is not None:
+            pair_mask = self.pair_excl(nlist, extended_atype)
+            # exclude neighbors in the nlist
+            nlist = paddle.where(pair_mask == 1, nlist, -1)
+
+        ext_atom_mask = self.make_atom_mask(extended_atype)
+        ret_dict = self.forward_atomic(
+            extended_coord,
+            paddle.where(
+                ext_atom_mask, extended_atype, paddle.zeros_like(extended_atype)
+            ),
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            comm_dict=comm_dict,
+        )
+        ret_dict = self.apply_out_stat(ret_dict, atype)
+
+        # nf x nloc
+        atom_mask = ext_atom_mask[:, :nloc].astype(paddle.int32)
+        if self.atom_excl is not None:
+            atom_mask *= self.atom_excl(atype)
+
+        for kk in ret_dict.keys():
+            out_shape = ret_dict[kk].shape
+            out_shape2 = 1
+            for ss in out_shape[2:]:
+                out_shape2 *= ss
+            ret_dict[kk] = (
+                ret_dict[kk].reshape([out_shape[0], out_shape[1], out_shape2])
+                * atom_mask.unsqueeze(2).astype(ret_dict[kk].dtype)
+            ).reshape(out_shape)
+        ret_dict["mask"] = atom_mask
+
+        return ret_dict
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ) -> dict[str, paddle.Tensor]:
+        return self.forward_common_atomic(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            comm_dict=comm_dict,
+        )
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        self.type_map = type_map
+        self.reinit_atom_exclude(
+            map_atom_exclude_types(self.atom_exclude_types, remap_index)
+        )
+        self.reinit_pair_exclude(
+            map_pair_exclude_types(self.pair_exclude_types, remap_index)
+        )
+        if has_new_type:
+            extend_shape = [
+                self.out_bias.shape[0],
+                len(type_map),
+                *list(self.out_bias.shape[2:]),
+            ]
+            extend_bias = paddle.zeros(extend_shape, dtype=self.out_bias.dtype).to(
+                device=self.out_bias.place
+            )
+            self.out_bias = paddle.concat([self.out_bias, extend_bias], axis=1)
+            extend_std = paddle.ones(extend_shape, dtype=self.out_std.dtype).to(
+                device=self.out_std.place
+            )
+            self.out_std = paddle.concat([self.out_std, extend_std], axis=1)
+        self.out_bias = self.out_bias[:, remap_index, :]
+        self.out_std = self.out_std[:, remap_index, :]
+
+    def serialize(self) -> dict:
+        return {
+            "type_map": self.type_map,
+            "atom_exclude_types": self.atom_exclude_types,
+            "pair_exclude_types": self.pair_exclude_types,
+            "rcond": self.rcond,
+            "preset_out_bias": self.preset_out_bias,
+            "@variables": {
+                "out_bias": to_numpy_array(self.out_bias),
+                "out_std": to_numpy_array(self.out_std),
+            },
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "BaseAtomicModel":
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables", None)
+        variables = (
+            {"out_bias": None, "out_std": None} if variables is None else variables
+        )
+        obj = cls(**data)
+        obj["out_bias"] = (
+            to_paddle_tensor(variables["out_bias"])
+            if variables["out_bias"] is not None
+            else obj._default_bias()
+        )
+        obj["out_std"] = (
+            to_paddle_tensor(variables["out_std"])
+            if variables["out_std"] is not None
+            else obj._default_std()
+        )
+        return obj
+
+    def compute_or_load_stat(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        raise NotImplementedError
+
+    def compute_or_load_out_stat(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        self.change_out_bias(
+            merged,
+            stat_file_path=stat_file_path,
+            bias_adjust_mode="set-by-statistic",
+        )
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, paddle.Tensor],
+        atype: paddle.Tensor,
+    ):
+        """Apply the stat to each atomic output.
+        The developer may override the method to define how the bias is applied
+        to the atomic output of the model.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc
+
+        """
+        out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+        for kk in self.bias_keys:
+            # nf x nloc x odims, out_bias: ntypes x odims
+            ret[kk] = ret[kk] + out_bias[kk][atype]
+        return ret
+
+    def change_out_bias(
+        self,
+        sample_merged,
+        stat_file_path: Optional[DPPath] = None,
+        bias_adjust_mode="change-by-statistic",
+    ) -> None:
+        """Change the output bias according to the input data and the pretrained model.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        bias_adjust_mode : str
+            The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on labels of target dataset,
+                    and do least square on the errors to obtain the target shift as bias.
+            'set-by-statistic' : directly use the statistic output bias in the target dataset.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+        """
+        if bias_adjust_mode == "change-by-statistic":
+            delta_bias, out_std = compute_output_stats(
+                sample_merged,
+                self.get_ntypes(),
+                keys=list(self.atomic_output_def().keys()),
+                stat_file_path=stat_file_path,
+                model_forward=self._get_forward_wrapper_func(),
+                rcond=self.rcond,
+                preset_bias=self.preset_out_bias,
+                atomic_output=self.atomic_output_def(),
+            )
+            self._store_out_stat(delta_bias, out_std, add=True)
+        elif bias_adjust_mode == "set-by-statistic":
+            bias_out, std_out = compute_output_stats(
+                sample_merged,
+                self.get_ntypes(),
+                keys=list(self.atomic_output_def().keys()),
+                stat_file_path=stat_file_path,
+                rcond=self.rcond,
+                preset_bias=self.preset_out_bias,
+                atomic_output=self.atomic_output_def(),
+            )
+            self._store_out_stat(bias_out, std_out)
+        else:
+            raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
+
+    def _get_forward_wrapper_func(self) -> Callable[..., paddle.Tensor]:
+        """Get a forward wrapper of the atomic model for output bias calculation."""
+
+        def model_forward(coord, atype, box, fparam=None, aparam=None):
+            with (
+                paddle.no_grad()
+            ):  # it's essential for pure paddle forward function to use auto_batchsize
+                (
+                    extended_coord,
+                    extended_atype,
+                    mapping,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    coord,
+                    atype,
+                    self.get_rcut(),
+                    self.get_sel(),
+                    mixed_types=self.mixed_types(),
+                    box=box,
+                )
+                atomic_ret = self.forward_common_atomic(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                    mapping=mapping,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                return {kk: vv.detach() for kk, vv in atomic_ret.items()}
+
+        return model_forward
+
+    def _default_bias(self):
+        ntypes = self.get_ntypes()
+        return paddle.zeros([self.n_out, ntypes, self.max_out_size], dtype=dtype).to(
+            device=device
+        )
+
+    def _default_std(self):
+        ntypes = self.get_ntypes()
+        return paddle.ones([self.n_out, ntypes, self.max_out_size], dtype=dtype).to(
+            device=device
+        )
+
+    def _varsize(
+        self,
+        shape: list[int],
+    ) -> int:
+        output_size = 1
+        len_shape = len(shape)
+        for i in range(len_shape):
+            output_size *= shape[i]
+        return output_size
+
+    def _get_bias_index(
+        self,
+        kk: str,
+    ) -> int:
+        res: list[int] = []
+        for i, e in enumerate(self.bias_keys):
+            if e == kk:
+                res.append(i)
+        assert len(res) == 1
+        return res[0]
+
+    def _store_out_stat(
+        self,
+        out_bias: dict[str, paddle.Tensor],
+        out_std: dict[str, paddle.Tensor],
+        add: bool = False,
+    ):
+        ntypes = self.get_ntypes()
+        out_bias_data = paddle.clone(self.out_bias)
+        out_std_data = paddle.clone(self.out_std)
+        for kk in out_bias.keys():
+            assert kk in out_std.keys()
+            idx = self._get_bias_index(kk)
+            size = self._varsize(self.atomic_output_def()[kk].shape)
+            if not add:
+                out_bias_data[idx, :, :size] = out_bias[kk].reshape([ntypes, size])
+            else:
+                out_bias_data[idx, :, :size] += out_bias[kk].reshape([ntypes, size])
+            out_std_data[idx, :, :size] = out_std[kk].reshape([ntypes, size])
+        paddle.assign(out_bias_data, self.out_bias)
+        paddle.assign(out_std_data, self.out_std)
+
+    def _fetch_out_stat(
+        self,
+        keys: list[str],
+    ) -> tuple[dict[str, paddle.Tensor], dict[str, paddle.Tensor]]:
+        ret_bias = {}
+        ret_std = {}
+        ntypes = self.get_ntypes()
+        for kk in keys:
+            idx = self._get_bias_index(kk)
+            isize = self._varsize(self.atomic_output_def()[kk].shape)
+            ret_bias[kk] = self.out_bias[idx, :, :isize].reshape(
+                [ntypes] + list(self.atomic_output_def()[kk].shape)  # noqa: RUF005
+            )
+            ret_std[kk] = self.out_std[idx, :, :isize].reshape(
+                [ntypes] + list(self.atomic_output_def()[kk].shape)  # noqa: RUF005
+            )
+        return ret_bias, ret_std
diff --git a/deepmd/pd/model/atomic_model/dipole_atomic_model.py b/deepmd/pd/model/atomic_model/dipole_atomic_model.py
new file mode 100644
index 0000000000..3fc5204749
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/dipole_atomic_model.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.model.task.dipole import (
+    DipoleFittingNet,
+)
+
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+
+
+class DPDipoleAtomicModel(DPAtomicModel):
+    def __init__(self, descriptor, fitting, type_map, **kwargs):
+        assert isinstance(fitting, DipoleFittingNet)
+        super().__init__(descriptor, fitting, type_map, **kwargs)
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, paddle.Tensor],
+        atype: paddle.Tensor,
+    ):
+        # dipole not applying bias
+        return ret
diff --git a/deepmd/pd/model/atomic_model/dos_atomic_model.py b/deepmd/pd/model/atomic_model/dos_atomic_model.py
new file mode 100644
index 0000000000..9c622ce7bb
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/dos_atomic_model.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.pd.model.task.dos import (
+    DOSFittingNet,
+)
+
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+
+
+class DPDOSAtomicModel(DPAtomicModel):
+    def __init__(self, descriptor, fitting, type_map, **kwargs):
+        assert isinstance(fitting, DOSFittingNet)
+        super().__init__(descriptor, fitting, type_map, **kwargs)
diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
new file mode 100644
index 0000000000..45eb9ca1cb
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -0,0 +1,333 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import functools
+import logging
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+)
+from deepmd.pd.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pd.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+log = logging.getLogger(__name__)
+
+
+@BaseAtomicModel.register("standard")
+class DPAtomicModel(BaseAtomicModel):
+    """Model give atomic prediction of some physical property.
+
+    Parameters
+    ----------
+    descriptor
+            Descriptor
+    fitting_net
+            Fitting net
+    type_map
+            Mapping atom type to the name (str) of the type.
+            For example `type_map[1]` gives the name of the type 1.
+    """
+
+    eval_descriptor_list: list[paddle.Tensor]
+
+    def __init__(
+        self,
+        descriptor,
+        fitting,
+        type_map: list[str],
+        **kwargs,
+    ):
+        super().__init__(type_map, **kwargs)
+        ntypes = len(type_map)
+        self.type_map = type_map
+        self.ntypes = ntypes
+        self.descriptor = descriptor
+        self.rcut = self.descriptor.get_rcut()
+        self.sel = self.descriptor.get_sel()
+        self.fitting_net = fitting
+        super().init_out_stat()
+        self.enable_eval_descriptor_hook = False
+        self.eval_descriptor_list = []
+
+        # register 'type_map' as buffer
+        def _string_to_array(s: str) -> list[int]:
+            return [ord(c) for c in s]
+
+        self.register_buffer(
+            "buffer_type_map",
+            paddle.to_tensor(_string_to_array(" ".join(self.type_map)), dtype="int32"),
+        )
+        self.buffer_type_map.name = "buffer_type_map"
+        if hasattr(self.descriptor, "has_message_passing"):
+            # register 'has_message_passing' as buffer(cast to int32 as problems may meets with vector<bool>)
+            self.register_buffer(
+                "buffer_has_message_passing",
+                paddle.to_tensor(self.descriptor.has_message_passing(), dtype="int32"),
+            )
+            self.buffer_has_message_passing.name = "buffer_has_message_passing"
+        # register 'ntypes' as buffer
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int32")
+        )
+        self.buffer_ntypes.name = "buffer_ntypes"
+        # register 'rcut' as buffer
+        self.register_buffer(
+            "buffer_rcut", paddle.to_tensor(self.rcut, dtype="float64")
+        )
+        self.buffer_rcut.name = "buffer_rcut"
+        if hasattr(self.fitting_net, "get_dim_fparam"):
+            # register 'dfparam' as buffer
+            self.register_buffer(
+                "buffer_dfparam",
+                paddle.to_tensor(self.fitting_net.get_dim_fparam(), dtype="int32"),
+            )
+            self.buffer_dfparam.name = "buffer_dfparam"
+        if hasattr(self.fitting_net, "get_dim_aparam"):
+            # register 'daparam' as buffer
+            self.register_buffer(
+                "buffer_daparam",
+                paddle.to_tensor(self.fitting_net.get_dim_aparam(), dtype="int32"),
+            )
+            self.buffer_daparam.name = "buffer_daparam"
+        # register 'aparam_nall' as buffer
+        self.register_buffer(
+            "buffer_aparam_nall",
+            paddle.to_tensor(False, dtype="int32"),
+        )
+        self.buffer_aparam_nall.name = "buffer_aparam_nall"
+
+    def set_eval_descriptor_hook(self, enable: bool) -> None:
+        """Set the hook for evaluating descriptor and clear the cache for descriptor list."""
+        self.enable_eval_descriptor_hook = enable
+        self.eval_descriptor_list = []
+
+    def eval_descriptor(self) -> paddle.Tensor:
+        """Evaluate the descriptor."""
+        return paddle.concat(self.eval_descriptor_list)
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of the fitting net."""
+        return (
+            self.fitting_net.output_def()
+            if self.fitting_net is not None
+            else self.coord_denoise_net.output_def()
+        )
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.rcut
+
+    def get_sel(self) -> list[int]:
+        """Get the neighbor selection."""
+        return self.sel
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.descriptor.mixed_types()
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        super().change_type_map(
+            type_map=type_map, model_with_new_type_stat=model_with_new_type_stat
+        )
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+        self.descriptor.change_type_map(
+            type_map=type_map,
+            model_with_new_type_stat=model_with_new_type_stat.descriptor
+            if model_with_new_type_stat is not None
+            else None,
+        )
+        self.fitting_net.change_type_map(type_map=type_map)
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the atomic model has message passing."""
+        return self.descriptor.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the atomic model needs sorted nlist when using `forward_lower`."""
+        return self.descriptor.need_sorted_nlist_for_lower()
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 2,
+                "type": "standard",
+                "type_map": self.type_map,
+                "descriptor": self.descriptor.serialize(),
+                "fitting": self.fitting_net.serialize(),
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        descriptor_obj = BaseDescriptor.deserialize(data.pop("descriptor"))
+        fitting_obj = BaseFitting.deserialize(data.pop("fitting"))
+        data["descriptor"] = descriptor_obj
+        data["fitting"] = fitting_obj
+        obj = super().deserialize(data)
+        return obj
+
+    def forward_atomic(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ) -> dict[str, paddle.Tensor]:
+        """Return atomic prediction.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region
+        extended_atype
+            atomic type in extended region
+        nlist
+            neighbor list. nf x nloc x nsel
+        mapping
+            mapps the extended indices to local indices
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the `FittingOutputDef`.
+
+        """
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.stop_gradient = False
+        descriptor, rot_mat, g2, h2, sw = self.descriptor(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            comm_dict=comm_dict,
+        )
+        assert descriptor is not None
+        if self.enable_eval_descriptor_hook:
+            self.eval_descriptor_list.append(descriptor)
+        # energy, force
+        fit_ret = self.fitting_net(
+            descriptor,
+            atype,
+            gr=rot_mat,
+            g2=g2,
+            h2=h2,
+            fparam=fparam,
+            aparam=aparam,
+        )
+        return fit_ret
+
+    def get_out_bias(self) -> paddle.Tensor:
+        return self.out_bias
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+        if stat_file_path is not None and self.type_map is not None:
+            # descriptors and fitting net with different type_map
+            # should not share the same parameters
+            stat_file_path /= " ".join(self.type_map)
+
+        @functools.lru_cache
+        def wrapped_sampler():
+            sampled = sampled_func()
+            if self.pair_excl is not None:
+                pair_exclude_types = self.pair_excl.get_exclude_types()
+                for sample in sampled:
+                    sample["pair_exclude_types"] = list(pair_exclude_types)
+            if self.atom_excl is not None:
+                atom_exclude_types = self.atom_excl.get_exclude_types()
+                for sample in sampled:
+                    sample["atom_exclude_types"] = list(atom_exclude_types)
+            return sampled
+
+        self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
+        self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.fitting_net.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.fitting_net.get_dim_aparam()
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.fitting_net.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/pd/model/atomic_model/energy_atomic_model.py b/deepmd/pd/model/atomic_model/energy_atomic_model.py
new file mode 100644
index 0000000000..2d0ef4db4c
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/energy_atomic_model.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.pd.model.task.ener import (
+    EnergyFittingNet,
+    EnergyFittingNetDirect,
+    InvarFitting,
+)
+
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+
+
+class DPEnergyAtomicModel(DPAtomicModel):
+    def __init__(self, descriptor, fitting, type_map, **kwargs):
+        assert (
+            isinstance(fitting, EnergyFittingNet)
+            or isinstance(fitting, EnergyFittingNetDirect)
+            or isinstance(fitting, InvarFitting)
+        )
+        super().__init__(descriptor, fitting, type_map, **kwargs)
diff --git a/deepmd/pd/model/atomic_model/linear_atomic_model.py b/deepmd/pd/model/atomic_model/linear_atomic_model.py
new file mode 100644
index 0000000000..c6550cb427
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/linear_atomic_model.py
@@ -0,0 +1,563 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+    nlist_distinguish_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+
+class LinearEnergyAtomicModel(BaseAtomicModel):
+    """Linear model make linear combinations of several existing models.
+
+    Parameters
+    ----------
+    models : list[DPAtomicModel or PairTabAtomicModel]
+        A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel.
+    type_map : list[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    """
+
+    def __init__(
+        self,
+        models: list[BaseAtomicModel],
+        type_map: list[str],
+        **kwargs,
+    ):
+        super().__init__(type_map, **kwargs)
+        super().init_out_stat()
+
+        # check all sub models are of mixed type.
+        model_mixed_type = []
+        for m in models:
+            if not m.mixed_types():
+                model_mixed_type.append(m)
+        if len(model_mixed_type) > 0:
+            raise ValueError(
+                f"LinearAtomicModel only supports AtomicModel of mixed type, the following models are not mixed type: {model_mixed_type}."
+            )
+
+        self.models = paddle.nn.LayerList(models)
+        sub_model_type_maps = [md.get_type_map() for md in models]
+        err_msg = []
+        self.mapping_list = []
+        common_type_map = set(type_map)
+        self.type_map = type_map
+        for tpmp in sub_model_type_maps:
+            if not common_type_map.issubset(set(tpmp)):
+                err_msg.append(
+                    f"type_map {tpmp} is not a subset of type_map {type_map}"
+                )
+            self.mapping_list.append(self.remap_atype(tpmp, self.type_map))
+        assert len(err_msg) == 0, "\n".join(err_msg)
+
+        self.mixed_types_list = [model.mixed_types() for model in self.models]
+        self.rcuts = paddle.to_tensor(self.get_model_rcuts(), dtype=paddle.float64).to(
+            device=env.DEVICE
+        )
+        self.nsels = paddle.to_tensor(self.get_model_nsels()).to(device=env.DEVICE)  # pylint: disable=no-explicit-dtype
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the atomic model has message passing."""
+        return any(model.has_message_passing() for model in self.models)
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the atomic model needs sorted nlist when using `forward_lower`."""
+        return True
+
+    def get_out_bias(self) -> paddle.Tensor:
+        return self.out_bias
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return max(self.get_model_rcuts())
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        super().change_type_map(
+            type_map=type_map, model_with_new_type_stat=model_with_new_type_stat
+        )
+        for ii, model in enumerate(self.models):
+            model.change_type_map(
+                type_map=type_map,
+                model_with_new_type_stat=model_with_new_type_stat.models[ii]
+                if model_with_new_type_stat is not None
+                else None,
+            )
+
+    def get_model_rcuts(self) -> list[float]:
+        """Get the cut-off radius for each individual models."""
+        return [model.get_rcut() for model in self.models]
+
+    def get_sel(self) -> list[int]:
+        return [max([model.get_nsel() for model in self.models])]
+
+    def get_model_nsels(self) -> list[int]:
+        """Get the processed sels for each individual models. Not distinguishing types."""
+        return [model.get_nsel() for model in self.models]
+
+    def get_model_sels(self) -> list[list[int]]:
+        """Get the sels for each individual models."""
+        return [model.get_sel() for model in self.models]
+
+    def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]:
+        # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut.
+        zipped = paddle.stack(
+            [
+                self.rcuts,
+                self.nsels.astype(self.rcuts.dtype),
+            ],
+            axis=0,
+        ).T
+        inner_sorting = paddle.argsort(zipped[:, 1], axis=0)
+        inner_sorted = zipped[inner_sorting]
+        outer_sorting = paddle.argsort(inner_sorted[:, 0])
+        outer_sorted = inner_sorted[outer_sorting]
+        sorted_rcuts: list[float] = outer_sorted[:, 0].tolist()
+        sorted_sels: list[int] = outer_sorted[:, 1].to(paddle.int64).tolist()
+        return sorted_rcuts, sorted_sels
+
+    def forward_atomic(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ) -> dict[str, paddle.Tensor]:
+        """Return atomic prediction.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region, (nframes, nall * 3)
+        extended_atype
+            atomic type in extended region, (nframes, nall)
+        nlist
+            neighbor list, (nframes, nloc, nsel).
+        mapping
+            mapps the extended indices to local indices.
+        fparam
+            frame parameter. (nframes, ndf)
+        aparam
+            atomic parameter. (nframes, nloc, nda)
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the fitting net output def.
+        """
+        nframes, nloc, nnei = nlist.shape
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.stop_gradient = False
+        extended_coord = extended_coord.reshape([nframes, -1, 3])
+        sorted_rcuts, sorted_sels = self._sort_rcuts_sels()
+        nlists = build_multiple_neighbor_list(
+            extended_coord,
+            nlist,
+            sorted_rcuts,
+            sorted_sels,
+        )
+        raw_nlists = [
+            nlists[get_multiple_nlist_key(rcut, sel)]
+            for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels())
+        ]
+        nlists_ = [
+            nl if mt else nlist_distinguish_types(nl, extended_atype, sel)
+            for mt, nl, sel in zip(
+                self.mixed_types_list, raw_nlists, self.get_model_sels()
+            )
+        ]
+        ener_list = []
+
+        for i, model in enumerate(self.models):
+            type_map_model = self.mapping_list[i].to(extended_atype.place)
+            # apply bias to each individual model
+            ener_list.append(
+                model.forward_common_atomic(
+                    extended_coord,
+                    type_map_model[extended_atype],
+                    nlists_[i],
+                    mapping,
+                    fparam,
+                    aparam,
+                )["energy"]
+            )
+        weights = self._compute_weight(extended_coord, extended_atype, nlists_)
+
+        fit_ret = {
+            "energy": paddle.sum(
+                paddle.stack(ener_list)
+                * paddle.stack(weights).to(extended_atype.place),
+                axis=0,
+            ),
+        }  # (nframes, nloc, 1)
+        return fit_ret
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, paddle.Tensor],
+        atype: paddle.Tensor,
+    ):
+        """Apply the stat to each atomic output.
+        The developer may override the method to define how the bias is applied
+        to the atomic output of the model.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc
+
+        """
+        return ret
+
+    @staticmethod
+    def remap_atype(ori_map: list[str], new_map: list[str]) -> paddle.Tensor:
+        """
+        This method is used to map the atype from the common type_map to the original type_map of
+        indivial AtomicModels. It creates a index mapping for the conversion.
+
+        Parameters
+        ----------
+        ori_map : List[str]
+            The original type map of an AtomicModel.
+        new_map : List[str]
+            The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method,
+            must be a subset of the ori_map.
+
+        Returns
+        -------
+        paddle.Tensor
+        """
+        type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)}
+        # this maps the atype in the new map to the original map
+        mapping = paddle.to_tensor(  # pylint: disable=no-explicit-dtype
+            [type_2_idx[new_map[idx]] for idx in range(len(new_map))]
+        ).to(device=env.DEVICE)
+        return mapping
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def serialize(self) -> dict:
+        dd = super().serialize()
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 2,
+                "type": "linear",
+                "models": [model.serialize() for model in self.models],
+                "type_map": self.type_map,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.get("@version", 2), 2, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        models = [
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
+        ]
+        data["models"] = models
+        return super().deserialize(data)
+
+    def _compute_weight(
+        self, extended_coord, extended_atype, nlists_
+    ) -> list[paddle.Tensor]:
+        """This should be a list of user defined weights that matches the number of models to be combined."""
+        nmodels = len(self.models)
+        nframes, nloc, _ = nlists_[0].shape
+        return [
+            paddle.ones((nframes, nloc, 1), dtype=paddle.float64).to(device=env.DEVICE)
+            / nmodels
+            for _ in range(nmodels)
+        ]
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        # tricky...
+        return max([model.get_dim_fparam() for model in self.models])
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return max([model.get_dim_aparam() for model in self.models])
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        if any(model.get_sel_type() == [] for model in self.models):
+            return []
+        # join all the selected types
+        # make paddle.jit happy...
+        return paddle.unique(
+            paddle.concat(
+                [
+                    paddle.to_tensor(model.get_sel_type(), dtype=paddle.int32)
+                    for model in self.models
+                ]
+            )
+        ).tolist()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
+
+    def compute_or_load_out_stat(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        for md in self.models:
+            md.compute_or_load_out_stat(merged, stat_file_path)
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+        for md in self.models:
+            md.compute_or_load_stat(sampled_func, stat_file_path)
+
+
+class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel):
+    """Model linearly combine a list of AtomicModels.
+
+    Parameters
+    ----------
+    dp_model
+        The DPAtomicModel being combined.
+    zbl_model
+        The PairTable model being combined.
+    sw_rmin
+        The lower boundary of the interpolation between short-range tabulated interaction and DP.
+    sw_rmax
+        The upper boundary of the interpolation between short-range tabulated interaction and DP.
+    type_map
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    smin_alpha
+        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor.
+        This distance is calculated by softmin.
+    """
+
+    def __init__(
+        self,
+        dp_model: DPAtomicModel,
+        zbl_model: PairTabAtomicModel,
+        sw_rmin: float,
+        sw_rmax: float,
+        type_map: list[str],
+        smin_alpha: Optional[float] = 0.1,
+        **kwargs,
+    ):
+        models = [dp_model, zbl_model]
+        kwargs["models"] = models
+        kwargs["type_map"] = type_map
+        super().__init__(**kwargs)
+
+        self.sw_rmin = sw_rmin
+        self.sw_rmax = sw_rmax
+        self.smin_alpha = smin_alpha
+
+        # this is a placeholder being updated in _compute_weight, to handle Jit attribute init error.
+        self.zbl_weight = paddle.empty([0], dtype=paddle.float64).to(device=env.DEVICE)
+
+    def serialize(self) -> dict:
+        dd = super().serialize()
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 2,
+                "type": "zbl",
+                "sw_rmin": self.sw_rmin,
+                "sw_rmax": self.sw_rmax,
+                "smin_alpha": self.smin_alpha,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        models = [
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
+        ]
+        data["dp_model"], data["zbl_model"] = models[0], models[1]
+        data.pop("@class", None)
+        data.pop("type", None)
+        return super().deserialize(data)
+
+    def _compute_weight(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlists_: list[paddle.Tensor],
+    ) -> list[paddle.Tensor]:
+        """ZBL weight.
+
+        Returns
+        -------
+        List[paddle.Tensor]
+            the atomic ZBL weight for interpolation. (nframes, nloc, 1)
+        """
+        assert (
+            self.sw_rmax > self.sw_rmin
+        ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`."
+
+        dp_nlist = nlists_[0]
+        zbl_nlist = nlists_[1]
+
+        zbl_nnei = zbl_nlist.shape[-1]
+        dp_nnei = dp_nlist.shape[-1]
+
+        # use the larger rr based on nlist
+        nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist
+        masked_nlist = paddle.clip(nlist_larger, 0)
+        pairwise_rr = PairTabAtomicModel._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )
+        numerator = paddle.sum(
+            paddle.where(
+                nlist_larger != -1,
+                pairwise_rr * paddle.exp(-pairwise_rr / self.smin_alpha),
+                paddle.zeros_like(nlist_larger, dtype=pairwise_rr.dtype),
+            ),
+            axis=-1,
+        )
+        denominator = paddle.sum(
+            paddle.where(
+                nlist_larger != -1,
+                paddle.exp(-pairwise_rr / self.smin_alpha),
+                paddle.zeros_like(nlist_larger).astype(pairwise_rr.dtype),
+            ),
+            axis=-1,
+        )  # handle masked nnei.
+
+        sigma = numerator / paddle.clip(denominator, 1e-20)  # nfrmes, nloc
+        u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
+        coef = paddle.zeros_like(u)
+        left_mask = sigma < self.sw_rmin
+        mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax)
+        right_mask = sigma >= self.sw_rmax
+        # coef[left_mask] = 1
+        coef = paddle.where(left_mask, paddle.ones_like(coef), coef)
+        smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1
+        # coef[mid_mask] = smooth[mid_mask]
+        coef = paddle.where(mid_mask, smooth, coef)
+        # coef[right_mask] = 0
+        coef = paddle.where(right_mask, paddle.zeros_like(coef), coef)
+
+        # to handle masked atoms
+        coef = paddle.where(sigma != 0, coef, paddle.zeros_like(coef))
+        self.zbl_weight = coef  # nframes, nloc
+        return [1 - coef.unsqueeze(-1), coef.unsqueeze(-1)]  # to match the model order.
diff --git a/deepmd/pd/model/atomic_model/pairtab_atomic_model.py b/deepmd/pd/model/atomic_model/pairtab_atomic_model.py
new file mode 100644
index 0000000000..6c6c498050
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/pairtab_atomic_model.py
@@ -0,0 +1,498 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+
+@BaseAtomicModel.register("pairtab")
+class PairTabAtomicModel(BaseAtomicModel):
+    """Pairwise tabulation energy model.
+
+    This model can be used to tabulate the pairwise energy between atoms for either
+    short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not
+    be used alone, but rather as one submodel of a linear (sum) model, such as
+    DP+D3.
+
+    Do not put the model on the first model of a linear model, since the linear
+    model fetches the type map from the first model.
+
+    At this moment, the model does not smooth the energy at the cutoff radius, so
+    one needs to make sure the energy has been smoothed to zero.
+
+    Parameters
+    ----------
+    tab_file : str
+        The path to the tabulation file.
+    rcut : float
+        The cutoff radius.
+    sel : int or list[int]
+        The maxmum number of atoms in the cut-off radius.
+    type_map : list[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    atom_ener
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+
+    """
+
+    def __init__(
+        self,
+        tab_file: str,
+        rcut: float,
+        sel: Union[int, list[int]],
+        type_map: list[str],
+        **kwargs,
+    ):
+        super().__init__(type_map, **kwargs)
+        super().init_out_stat()
+        self.tab_file = tab_file
+        self.rcut = rcut
+        self.tab = self._set_pairtab(tab_file, rcut)
+
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+
+        # handle deserialization with no input file
+        if self.tab_file is not None:
+            (
+                tab_info,
+                tab_data,
+            ) = self.tab.get()  # this returns -> tuple[np.array, np.array]
+            nspline, ntypes_tab = tab_info[-2:].astype(int)
+            self.register_buffer("tab_info", paddle.to_tensor(tab_info))
+            self.register_buffer(
+                "tab_data",
+                paddle.to_tensor(tab_data).reshape(
+                    [ntypes_tab, ntypes_tab, nspline, 4]
+                ),
+            )
+            if self.ntypes != ntypes_tab:
+                raise ValueError(
+                    "The `type_map` provided does not match the number of columns in the table."
+                )
+        else:
+            self.register_buffer("tab_info", None)
+            self.register_buffer("tab_data", None)
+        self.bias_atom_e = paddle.zeros(
+            [self.ntypes, 1], dtype=env.GLOBAL_PD_ENER_FLOAT_PRECISION
+        ).to(device=env.DEVICE)
+
+        # self.model_type = "ener"
+        # self.model_version = MODEL_VERSION ## this shoud be in the parent class
+
+        if isinstance(sel, int):
+            self.sel = sel
+        elif isinstance(sel, list):
+            self.sel = sum(sel)
+        else:
+            raise TypeError("sel must be int or list[int]")
+
+    # @paddle.jit.ignore
+    def _set_pairtab(self, tab_file: str, rcut: float) -> PairTab:
+        return PairTab(tab_file, rcut)
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def get_out_bias(self) -> paddle.Tensor:
+        return self.out_bias
+
+    def get_rcut(self) -> float:
+        return self.rcut
+
+    def get_type_map(self) -> list[str]:
+        return self.type_map
+
+    def get_sel(self) -> list[int]:
+        return [self.sel]
+
+    def get_nsel(self) -> int:
+        return self.sel
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        # to match DPA1 and DPA2.
+        return True
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the atomic model has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the atomic model needs sorted nlist when using `forward_lower`."""
+        return False
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert type_map == self.type_map, (
+            "PairTabAtomicModel does not support changing type map now. "
+            "This feature is currently not implemented because it would require additional work to change the tab file. "
+            "We may consider adding this support in the future if there is a clear demand for it."
+        )
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 2,
+                "type": "pairtab",
+                "tab": self.tab.serialize(),
+                "rcut": self.rcut,
+                "sel": self.sel,
+                "type_map": self.type_map,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "PairTabAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        tab = PairTab.deserialize(data.pop("tab"))
+        data.pop("@class", None)
+        data.pop("type", None)
+        data["tab_file"] = None
+        tab_model = super().deserialize(data)
+
+        tab_model.tab = tab
+        tab_model.register_buffer("tab_info", paddle.to_tensor(tab_model.tab.tab_info))
+        nspline, ntypes = tab_model.tab.tab_info[-2:].astype(int)
+        tab_model.register_buffer(
+            "tab_data",
+            paddle.to_tensor(tab_model.tab.tab_data).reshape(
+                [ntypes, ntypes, nspline, 4]
+            ),
+        )
+        return tab_model
+
+    def compute_or_load_stat(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        self.compute_or_load_out_stat(merged, stat_file_path)
+
+    def forward_atomic(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ) -> dict[str, paddle.Tensor]:
+        nframes, nloc, nnei = nlist.shape
+        extended_coord = extended_coord.reshape([nframes, -1, 3])
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.stop_gradient = False
+
+        # this will mask all -1 in the nlist
+        mask = nlist >= 0
+        masked_nlist = nlist * mask.astype(nlist.dtype)
+
+        atype = extended_atype[:, :nloc]  # (nframes, nloc)
+        pairwise_rr = self._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )  # (nframes, nloc, nnei)
+        self.tab_data = self.tab_data.to(device=extended_coord.place).reshape(
+            [int(self.tab_info[-1]), int(self.tab_info[-1]), int(self.tab_info[2]), 4]
+        )
+
+        # to calculate the atomic_energy, we need 3 tensors, i_type, j_type, pairwise_rr
+        # i_type : (nframes, nloc), this is atype.
+        # j_type : (nframes, nloc, nnei)
+        j_type = extended_atype[
+            paddle.arange(extended_atype.shape[0]).to(device=extended_coord.place)[  # pylint: disable=no-explicit-dtype
+                :, None, None
+            ],
+            masked_nlist,
+        ]
+
+        raw_atomic_energy = self._pair_tabulated_inter(
+            nlist, atype, j_type, pairwise_rr
+        )
+
+        atomic_energy = 0.5 * paddle.sum(
+            paddle.where(
+                nlist != -1, raw_atomic_energy, paddle.zeros_like(raw_atomic_energy)
+            ),
+            axis=-1,
+        ).unsqueeze(-1)
+
+        return {"energy": atomic_energy}
+
+    def _pair_tabulated_inter(
+        self,
+        nlist: paddle.Tensor,
+        i_type: paddle.Tensor,
+        j_type: paddle.Tensor,
+        rr: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """Pairwise tabulated energy.
+
+        Parameters
+        ----------
+        nlist : paddle.Tensor
+            The unmasked neighbour list. (nframes, nloc)
+        i_type : paddle.Tensor
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : paddle.Tensor
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        rr : paddle.Tensor
+            The salar distance vector between two atoms. (nframes, nloc, nnei)
+
+        Returns
+        -------
+        paddle.Tensor
+            The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+
+        Raises
+        ------
+        Exception
+            If the distance is beyond the table.
+
+        Notes
+        -----
+        This function is used to calculate the pairwise energy between two atoms.
+        It uses a table containing cubic spline coefficients calculated in PairTab.
+        """
+        nframes, nloc, nnei = nlist.shape
+        rmin = self.tab_info[0]
+        hh = self.tab_info[1]
+        hi = 1.0 / hh
+
+        nspline = int(self.tab_info[2] + 0.1)
+
+        uu = (rr - rmin) * hi  # this is broadcasted to (nframes,nloc,nnei)
+
+        # if nnei of atom 0 has -1 in the nlist, uu would be 0.
+        # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms.
+        uu = paddle.where(nlist != -1, uu, paddle.full_like(uu, nspline + 1))
+
+        if paddle.any(uu < 0):
+            raise Exception("coord go beyond table lower boundary")
+
+        idx = uu.to(paddle.int32)
+        uu -= idx.astype(uu.dtype)
+
+        table_coef = self._extract_spline_coefficient(
+            i_type, j_type, idx, self.tab_data, nspline
+        )
+        table_coef = table_coef.reshape([nframes, nloc, nnei, 4])
+        ener = self._calculate_ener(table_coef, uu)
+
+        # here we need to overwrite energy to zero at rcut and beyond.
+        mask_beyond_rcut = rr >= self.rcut
+        # also overwrite values beyond extrapolation to zero
+        extrapolation_mask = rr >= rmin + nspline * hh
+        ener[mask_beyond_rcut] = 0
+        ener[extrapolation_mask] = 0
+
+        return ener
+
+    @staticmethod
+    def _get_pairwise_dist(
+        coords: paddle.Tensor, nlist: paddle.Tensor
+    ) -> paddle.Tensor:
+        """Get pairwise distance `dr`.
+
+        Parameters
+        ----------
+        coords : paddle.Tensor
+            The coordinate of the atoms, shape of (nframes, nall, 3).
+        nlist
+            The masked nlist, shape of (nframes, nloc, nnei)
+
+        Returns
+        -------
+        paddle.Tensor
+            The pairwise distance between the atoms (nframes, nloc, nnei).
+        """
+        nframes, nloc, nnei = nlist.shape
+        coord_l = coords[:, :nloc].reshape([nframes, -1, 1, 3])
+        index = nlist.reshape([nframes, -1]).unsqueeze(-1).expand([-1, -1, 3])
+        # coord_r = paddle.take_along_axis(coords, axis=1, indices=index)
+        coord_r = decomp.take_along_axis(coords, axis=1, indices=index)
+        coord_r = coord_r.reshape([nframes, nloc, nnei, 3])
+        diff = coord_r - coord_l
+        # pairwise_rr = paddle.linalg.norm(diff, axis=-1, keepdim=True).squeeze(-1)
+        pairwise_rr = decomp.norm(diff, axis=-1, keepdim=True).squeeze(-1)
+        return pairwise_rr
+
+    @staticmethod
+    def _extract_spline_coefficient(
+        i_type: paddle.Tensor,
+        j_type: paddle.Tensor,
+        idx: paddle.Tensor,
+        tab_data: paddle.Tensor,
+        nspline: int,
+    ) -> paddle.Tensor:
+        """Extract the spline coefficient from the table.
+
+        Parameters
+        ----------
+        i_type : paddle.Tensor
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : paddle.Tensor
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        idx : paddle.Tensor
+            The index of the spline coefficient. (nframes, nloc, nnei)
+        tab_data : paddle.Tensor
+            The table storing all the spline coefficient. (ntype, ntype, nspline, 4)
+        nspline : int
+            The number of splines in the table.
+
+        Returns
+        -------
+        paddle.Tensor
+            The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed.
+
+        """
+        # (nframes, nloc, nnei)
+        expanded_i_type = i_type.unsqueeze(-1).expand([-1, -1, j_type.shape[-1]])
+
+        # handle the case where idx is beyond the number of splines
+        clipped_indices = paddle.clip(idx, 0, nspline - 1).to(paddle.int64)
+
+        nframes = i_type.shape[0]
+        nloc = i_type.shape[1]
+        nnei = j_type.shape[2]
+        ntypes = tab_data.shape[0]
+        # tab_data_idx: (nframes, nloc, nnei)
+        tab_data_idx = (
+            expanded_i_type * ntypes * nspline + j_type * nspline + clipped_indices
+        )
+        # tab_data: (ntype, ntype, nspline, 4)
+        tab_data = tab_data.reshape([ntypes * ntypes * nspline, 4])
+        # tab_data_idx: (nframes * nloc * nnei, 4)
+        tab_data_idx = tab_data_idx.reshape([nframes * nloc * nnei, 1]).expand([-1, 4])
+        # (nframes, nloc, nnei, 4)
+        # final_coef = paddle.take_along_axis(
+        #     tab_data, axis=0, indices=tab_data_idx
+        # ).reshape([nframes, nloc, nnei, 4])
+        final_coef = decomp.take_along_axis(
+            tab_data, axis=0, indices=tab_data_idx
+        ).reshape([nframes, nloc, nnei, 4])
+
+        # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`.
+        final_coef[idx > nspline] = 0
+        return final_coef
+
+    @staticmethod
+    def _calculate_ener(coef: paddle.Tensor, uu: paddle.Tensor) -> paddle.Tensor:
+        """Calculate energy using spline coeeficients.
+
+        Parameters
+        ----------
+        coef : paddle.Tensor
+            The spline coefficients. (nframes, nloc, nnei, 4)
+        uu : paddle.Tensor
+            The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei)
+
+        Returns
+        -------
+        paddle.Tensor
+            The atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+        """
+        a3, a2, a1, a0 = paddle.unbind(coef, axis=-1)
+        etmp = (a3 * uu.astype(a3.dtype) + a2) * uu.astype(
+            coef.dtype
+        ) + a1  # this should be elementwise operations.
+        ener = (
+            etmp * uu.astype(coef.dtype) + a0
+        )  # this energy has the extrapolated value when rcut > rmax
+        return ener
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return 0
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return 0
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return []
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/pd/model/atomic_model/polar_atomic_model.py b/deepmd/pd/model/atomic_model/polar_atomic_model.py
new file mode 100644
index 0000000000..0a65760c70
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/polar_atomic_model.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.model.task.polarizability import (
+    PolarFittingNet,
+)
+
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+
+
+class DPPolarAtomicModel(DPAtomicModel):
+    def __init__(self, descriptor, fitting, type_map, **kwargs):
+        assert isinstance(fitting, PolarFittingNet)
+        super().__init__(descriptor, fitting, type_map, **kwargs)
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, paddle.Tensor],
+        atype: paddle.Tensor,
+    ):
+        """Apply the stat to each atomic output.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc
+
+        """
+        out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+
+        if self.fitting_net.shift_diag:
+            nframes, nloc = atype.shape
+            device = out_bias[self.bias_keys[0]].place
+            dtype = out_bias[self.bias_keys[0]].dtype
+            for kk in self.bias_keys:
+                ntypes = out_bias[kk].shape[0]
+                temp = paddle.zeros([ntypes], dtype=dtype).to(device=device)
+                for i in range(ntypes):
+                    temp[i] = paddle.mean(
+                        paddle.diagonal(out_bias[kk][i].reshape([3, 3]))
+                    )
+                modified_bias = temp[atype]
+
+                # (nframes, nloc, 1)
+                modified_bias = (
+                    modified_bias.unsqueeze(-1)
+                    * (self.fitting_net.scale.to(atype.place))[atype]
+                )
+
+                eye = paddle.eye(3, dtype=dtype).to(device=device)
+                eye = eye.tile([nframes, nloc, 1, 1])
+                # (nframes, nloc, 3, 3)
+                modified_bias = modified_bias.unsqueeze(-1) * eye
+
+                # nf x nloc x odims, out_bias: ntypes x odims
+                ret[kk] = ret[kk] + modified_bias
+        return ret
diff --git a/deepmd/pd/model/atomic_model/property_atomic_model.py b/deepmd/pd/model/atomic_model/property_atomic_model.py
new file mode 100644
index 0000000000..419fb27dfa
--- /dev/null
+++ b/deepmd/pd/model/atomic_model/property_atomic_model.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.model.task.property import (
+    PropertyFittingNet,
+)
+
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+
+
+class DPPropertyAtomicModel(DPAtomicModel):
+    def __init__(self, descriptor, fitting, type_map, **kwargs):
+        assert isinstance(fitting, PropertyFittingNet)
+        super().__init__(descriptor, fitting, type_map, **kwargs)
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, paddle.Tensor],
+        atype: paddle.Tensor,
+    ):
+        """Apply the stat to each atomic output.
+        This function defines how the bias is applied to the atomic output of the model.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc
+
+        """
+        if self.fitting_net.get_bias_method() == "normal":
+            out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+            for kk in self.bias_keys:
+                # nf x nloc x odims, out_bias: ntypes x odims
+                ret[kk] = ret[kk] + out_bias[kk][atype]
+            return ret
+        elif self.fitting_net.get_bias_method() == "no_bias":
+            return ret
+        else:
+            raise NotImplementedError(
+                "Only 'normal' and 'no_bias' is supported for parameter 'bias_method'."
+            )
diff --git a/deepmd/pd/model/backbone/__init__.py b/deepmd/pd/model/backbone/__init__.py
new file mode 100644
index 0000000000..a76bdb2a2d
--- /dev/null
+++ b/deepmd/pd/model/backbone/__init__.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .backbone import (
+    BackBone,
+)
+from .evoformer2b import (
+    Evoformer2bBackBone,
+)
+
+__all__ = [
+    "BackBone",
+    "Evoformer2bBackBone",
+]
diff --git a/deepmd/pd/model/backbone/backbone.py b/deepmd/pd/model/backbone/backbone.py
new file mode 100644
index 0000000000..f37346a44f
--- /dev/null
+++ b/deepmd/pd/model/backbone/backbone.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+
+
+class BackBone(paddle.nn.Layer):
+    def __init__(self, **kwargs):
+        """BackBone base method."""
+        super().__init__()
+
+    def forward(self, **kwargs):
+        """Calculate backBone."""
+        raise NotImplementedError
diff --git a/deepmd/pd/model/backbone/evoformer2b.py b/deepmd/pd/model/backbone/evoformer2b.py
new file mode 100644
index 0000000000..698bc741d4
--- /dev/null
+++ b/deepmd/pd/model/backbone/evoformer2b.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.pd.model.backbone import (
+    BackBone,
+)
+from deepmd.pd.model.network.network import (
+    Evoformer2bEncoder,
+)
+
+
+class Evoformer2bBackBone(BackBone):
+    def __init__(
+        self,
+        nnei,
+        layer_num=6,
+        attn_head=8,
+        atomic_dim=1024,
+        pair_dim=100,
+        feature_dim=1024,
+        ffn_dim=2048,
+        post_ln=False,
+        final_layer_norm=True,
+        final_head_layer_norm=False,
+        emb_layer_norm=False,
+        atomic_residual=False,
+        evo_residual=False,
+        residual_factor=1.0,
+        activation_function="gelu",
+        **kwargs,
+    ):
+        """Construct an evoformer backBone."""
+        super().__init__()
+        self.nnei = nnei
+        self.layer_num = layer_num
+        self.attn_head = attn_head
+        self.atomic_dim = atomic_dim
+        self.pair_dim = pair_dim
+        self.feature_dim = feature_dim
+        self.head_dim = feature_dim // attn_head
+        assert (
+            feature_dim % attn_head == 0
+        ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!"
+        self.ffn_dim = ffn_dim
+        self.post_ln = post_ln
+        self.final_layer_norm = final_layer_norm
+        self.final_head_layer_norm = final_head_layer_norm
+        self.emb_layer_norm = emb_layer_norm
+        self.activation_function = activation_function
+        self.atomic_residual = atomic_residual
+        self.evo_residual = evo_residual
+        self.residual_factor = float(residual_factor)
+        self.encoder = Evoformer2bEncoder(
+            nnei=self.nnei,
+            layer_num=self.layer_num,
+            attn_head=self.attn_head,
+            atomic_dim=self.atomic_dim,
+            pair_dim=self.pair_dim,
+            feature_dim=self.feature_dim,
+            ffn_dim=self.ffn_dim,
+            post_ln=self.post_ln,
+            final_layer_norm=self.final_layer_norm,
+            final_head_layer_norm=self.final_head_layer_norm,
+            emb_layer_norm=self.emb_layer_norm,
+            atomic_residual=self.atomic_residual,
+            evo_residual=self.evo_residual,
+            residual_factor=self.residual_factor,
+            activation_function=self.activation_function,
+        )
+
+    def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask):
+        """Encoder the atomic and pair representations.
+
+        Args:
+        - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim].
+        - nlist: Neighbor list with shape [nframes, nloc, nnei].
+        - nlist_type: Neighbor types with shape [nframes, nloc, nnei].
+        - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank.
+
+        Returns
+        -------
+        - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim].
+        - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - norm_x: Normalization loss of atomic_rep.
+        - norm_delta_pair_rep: Normalization loss of delta_pair_rep.
+        """
+        (
+            atomic_rep,
+            transformed_atomic_rep,
+            pair_rep,
+            delta_pair_rep,
+            norm_x,
+            norm_delta_pair_rep,
+        ) = self.encoder(atomic_rep, pair_rep, nlist, nlist_type, nlist_mask)
+        return (
+            atomic_rep,
+            transformed_atomic_rep,
+            pair_rep,
+            delta_pair_rep,
+            norm_x,
+            norm_delta_pair_rep,
+        )
diff --git a/deepmd/pd/model/descriptor/__init__.py b/deepmd/pd/model/descriptor/__init__.py
new file mode 100644
index 0000000000..4ffa937bcb
--- /dev/null
+++ b/deepmd/pd/model/descriptor/__init__.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    DescriptorBlock,
+    make_default_type_embedding,
+)
+from .dpa1 import (
+    DescrptBlockSeAtten,
+    DescrptDPA1,
+)
+from .dpa2 import (
+    DescrptDPA2,
+)
+from .env_mat import (
+    prod_env_mat,
+)
+from .hybrid import (
+    DescrptHybrid,
+)
+from .repformers import (
+    DescrptBlockRepformers,
+)
+from .se_a import (
+    DescrptBlockSeA,
+    DescrptSeA,
+)
+from .se_atten_v2 import (
+    DescrptSeAttenV2,
+)
+from .se_r import (
+    DescrptSeR,
+)
+from .se_t import (
+    DescrptSeT,
+)
+from .se_t_tebd import (
+    DescrptBlockSeTTebd,
+    DescrptSeTTebd,
+)
+
+__all__ = [
+    "BaseDescriptor",
+    "DescriptorBlock",
+    "make_default_type_embedding",
+    "DescrptBlockSeA",
+    "DescrptBlockSeAtten",
+    "DescrptSeAttenV2",
+    "DescrptSeTTebd",
+    "DescrptBlockSeTTebd",
+    "DescrptSeA",
+    "DescrptSeR",
+    "DescrptSeT",
+    "DescrptDPA1",
+    "DescrptDPA2",
+    "DescrptHybrid",
+    "prod_env_mat",
+    "DescrptBlockRepformers",
+]
diff --git a/deepmd/pd/model/descriptor/base_descriptor.py b/deepmd/pd/model/descriptor/base_descriptor.py
new file mode 100644
index 0000000000..8f0b799f87
--- /dev/null
+++ b/deepmd/pd/model/descriptor/base_descriptor.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+
+from deepmd.dpmodel.descriptor import (
+    make_base_descriptor,
+)
+
+BaseDescriptor = make_base_descriptor(paddle.Tensor, "forward")
diff --git a/deepmd/pd/model/descriptor/descriptor.py b/deepmd/pd/model/descriptor/descriptor.py
new file mode 100644
index 0000000000..b27facd0ae
--- /dev/null
+++ b/deepmd/pd/model/descriptor/descriptor.py
@@ -0,0 +1,230 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.plugin import (
+    make_plugin_registry,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DescriptorBlock(paddle.nn.Layer, ABC, make_plugin_registry("DescriptorBlock")):
+    """The building block of descriptor.
+    Given the input descriptor, provide with the atomic coordinates,
+    atomic types and neighbor list, calculate the new descriptor.
+    """
+
+    local_cluster = False
+
+    def __new__(cls, *args, **kwargs):
+        if cls is DescriptorBlock:
+            try:
+                descrpt_type = kwargs["type"]
+            except KeyError as e:
+                raise KeyError(
+                    "the type of DescriptorBlock should be set by `type`"
+                ) from e
+            cls = cls.get_class_by_type(descrpt_type)
+        return super().__new__(cls)
+
+    @abstractmethod
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        pass
+
+    @abstractmethod
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        pass
+
+    @abstractmethod
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        pass
+
+    @abstractmethod
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        pass
+
+    @abstractmethod
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        pass
+
+    @abstractmethod
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        pass
+
+    @abstractmethod
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        pass
+
+    @abstractmethod
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension."""
+        pass
+
+    @abstractmethod
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        pass
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        raise NotImplementedError
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        raise NotImplementedError
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "mean"):
+                if not resume:
+                    # in case of change params during resume
+                    base_env = EnvMatStatSe(base_class)
+                    base_env.stats = base_class.stats
+                    for kk in base_class.get_stats():
+                        base_env.stats[kk] += self.get_stats()[kk]
+                    mean, stddev = base_env()
+                    if not base_class.set_davg_zero:
+                        paddle.assign(
+                            paddle.to_tensor(mean).to(device=env.DEVICE),
+                            base_class.mean,
+                        )  # pylint: disable=no-explicit-dtype
+                    paddle.assign(
+                        paddle.to_tensor(stddev).to(device=env.DEVICE),
+                        base_class.stddev,
+                    )  # pylint: disable=no-explicit-dtype
+                # must share, even if not do stat
+                self.mean = base_class.mean
+                self.stddev = base_class.stddev
+            # self.set_state_dict(base_class.state_dict()) # this does not work, because it only inits the model
+            # the following will successfully link all the params except buffers
+            for item in self._sub_layers:
+                self._sub_layers[item] = base_class._sub_layers[item]
+        else:
+            raise NotImplementedError
+
+    @abstractmethod
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """Calculate DescriptorBlock."""
+        pass
+
+    @abstractmethod
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+
+    @abstractmethod
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+
+
+def make_default_type_embedding(
+    ntypes,
+):
+    decomp = {}
+    decomp["tebd_dim"] = 8
+    return TypeEmbedNet(ntypes, decomp["tebd_dim"]), decomp
+
+
+def extend_descrpt_stat(des, type_map, des_with_stat=None):
+    r"""
+    Extend the statistics of a descriptor block with types from newly provided `type_map`.
+
+    After extending, the type related dimension of the extended statistics will have a length of
+    `len(old_type_map) + len(type_map)`, where `old_type_map` represents the type map in `des`.
+    The `get_index_between_two_maps()` function can then be used to correctly select statistics for types
+    from `old_type_map` or `type_map`.
+    Positive indices from 0 to `len(old_type_map) - 1` will select old statistics of types in `old_type_map`,
+    while negative indices from `-len(type_map)` to -1 will select new statistics of types in `type_map`.
+
+    Parameters
+    ----------
+    des : DescriptorBlock
+        The descriptor block to be extended.
+    type_map : List[str]
+        The name of each type of atoms to be extended.
+    des_with_stat : DescriptorBlock, Optional
+        The descriptor block has additional statistics of types from newly provided `type_map`.
+        If None, the default statistics will be used.
+        Otherwise, the statistics provided in this DescriptorBlock will be used.
+
+    """
+    if des_with_stat is not None:
+        extend_davg = des_with_stat["davg"]
+        extend_dstd = des_with_stat["dstd"]
+    else:
+        extend_shape = [len(type_map), *list(des["davg"].shape[1:])]
+        extend_davg = paddle.zeros(extend_shape, dtype=des["davg"].dtype).to(
+            device=des["davg"].place
+        )
+        extend_dstd = paddle.ones(extend_shape, dtype=des["dstd"].dtype).to(
+            device=des["dstd"].place
+        )
+    des["davg"] = paddle.concat([des["davg"], extend_davg], axis=0)
+    des["dstd"] = paddle.concat([des["dstd"], extend_dstd], axis=0)
diff --git a/deepmd/pd/model/descriptor/dpa1.py b/deepmd/pd/model/descriptor/dpa1.py
new file mode 100644
index 0000000000..c0d50fbf8b
--- /dev/null
+++ b/deepmd/pd/model/descriptor/dpa1.py
@@ -0,0 +1,645 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.mlp import (
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    extend_descrpt_stat,
+)
+from .se_atten import (
+    DescrptBlockSeAtten,
+    NeighborGatedAttention,
+)
+
+
+@BaseDescriptor.register("dpa1")
+@BaseDescriptor.register("se_atten")
+class DescrptDPA1(BaseDescriptor, paddle.nn.Layer):
+    r"""Attention-based descriptor which is proposed in the pretrainable DPA-1[1] model.
+
+    This descriptor, :math:`\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}`, is given by
+
+    .. math::
+        \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<,
+
+    where :math:`\hat{\mathcal{G}}^i` represents the embedding matrix:math:`\mathcal{G}^i`
+    after additional self-attention mechanism and :math:`\mathcal{R}^i` is defined by the full case in the se_e2_a descriptor.
+    Note that we obtain :math:`\mathcal{G}^i` using the type embedding method by default in this descriptor.
+
+    To perform the self-attention mechanism, the queries :math:`\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}`,
+    keys :math:`\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}`,
+    and values :math:`\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}` are first obtained:
+
+    .. math::
+        \left(\mathcal{Q}^{i,l}\right)_{j}=Q_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+
+    .. math::
+        \left(\mathcal{K}^{i,l}\right)_{j}=K_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+
+    .. math::
+        \left(\mathcal{V}^{i,l}\right)_{j}=V_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+
+    where :math:`Q_{l}`, :math:`K_{l}`, :math:`V_{l}` represent three trainable linear transformations
+    that output the queries and keys of dimension :math:`d_k` and values of dimension :math:`d_v`, and :math:`l`
+    is the index of the attention layer.
+    The input embedding matrix to the attention layers,  denoted by :math:`\mathcal{G}^{i,0}`,
+    is chosen as the two-body embedding matrix.
+
+    Then the scaled dot-product attention method is adopted:
+
+    .. math::
+        A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})=\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right)\mathcal{V}^{i,l},
+
+    where :math:`\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) \in \mathbb{R}^{N_c\times N_c}` is attention weights.
+    In the original attention method,
+    one typically has :math:`\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}\right)=\mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right)`,
+    with :math:`\sqrt{d_{k}}` being the normalization temperature.
+    This is slightly modified to incorporate the angular information:
+
+    .. math::
+        \varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) = \mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right) \odot \hat{\mathcal{R}}^{i}(\hat{\mathcal{R}}^{i})^{T},
+
+    where :math:`\hat{\mathcal{R}}^{i} \in \mathbb{R}^{N_c\times 3}` denotes normalized relative coordinates,
+     :math:`\hat{\mathcal{R}}^{i}_{j} = \frac{\boldsymbol{r}_{ij}}{\lVert \boldsymbol{r}_{ij} \lVert}`
+     and :math:`\odot` means element-wise multiplication.
+
+    Then layer normalization is added in a residual way to finally obtain the self-attention local embedding matrix
+     :math:`\hat{\mathcal{G}}^{i} = \mathcal{G}^{i,L_a}` after :math:`L_a` attention layers:[^1]
+
+    .. math::
+        \mathcal{G}^{i,l} = \mathcal{G}^{i,l-1} + \mathrm{LayerNorm}(A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})).
+
+    Parameters
+    ----------
+    rcut: float
+            The cut-off radius :math:`r_c`
+    rcut_smth: float
+            From where the environment matrix should be smoothed :math:`r_s`
+    sel : list[int], int
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+    ntypes : int
+            Number of element types
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+    axis_neuron: int
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+    tebd_dim: int
+            Dimension of the type embedding
+    tebd_input_mode: str
+            The input mode of the type embedding. Supported modes are ["concat", "strip"].
+            - "concat": Concatenate the type embedding with the smoothed radial information as the union input for the embedding network.
+            - "strip": Use a separated embedding network for the type embedding and combine the output with the radial embedding network output.
+    resnet_dt: bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    trainable: bool
+            If the weights of this descriptors are trainable.
+    trainable_ln: bool
+            Whether to use trainable shift and scale weights in layer normalization.
+    ln_eps: float, Optional
+            The epsilon value for layer normalization.
+    type_one_side: bool
+            If 'False', type embeddings of both neighbor and central atoms are considered.
+            If 'True', only type embeddings of neighbor atoms are considered.
+            Default is 'False'.
+    attn: int
+            Hidden dimension of the attention vectors
+    attn_layer: int
+            Number of attention layers
+    attn_dotr: bool
+            If dot the angular gate to the attention weights
+    attn_mask: bool
+            (Only support False to keep consistent with other backend references.)
+            (Not used in this version. True option is not implemented.)
+            If mask the diagonal of attention weights
+    exclude_types : list[list[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    env_protection: float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+    set_davg_zero: bool
+            Set the shift of embedding net input to zero.
+    activation_function: str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision: str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    scaling_factor: float
+            The scaling factor of normalization in calculations of attention weights.
+            If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5
+    normalize: bool
+            Whether to normalize the hidden vectors in attention weights calculation.
+    temperature: float
+            If not None, the scaling of attention weights is `temperature` itself.
+    smooth_type_embedding: bool
+            Whether to use smooth process in attention weights calculation.
+    concat_output_tebd: bool
+            Whether to concat type embedding at the output of the descriptor.
+    stripped_type_embedding: bool, Optional
+            (Deprecated, kept only for compatibility.)
+            Whether to strip the type embedding into a separate embedding network.
+            Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
+            Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
+            The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
+    seed: int, Optional
+            Random seed for parameter initialization.
+    use_econf_tebd: bool, Optional
+            Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+    type_map: list[str], Optional
+            A list of strings. Give the name to each type of atoms.
+    spin
+            (Only support None to keep consistent with other backend references.)
+            (Not used in this version. Not-none option is not implemented.)
+            The old implementation of deepspin.
+
+    Limitations
+    -----------
+    The currently implementation will not support the following deprecated features
+    1. spin is not None
+    2. attn_mask == True
+
+    References
+    ----------
+    .. [1] Duo Zhang, Hangrui Bi, Fu-Zhi Dai, Wanrun Jiang, Linfeng Zhang, and Han Wang. 2022.
+       DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation.
+       arXiv preprint arXiv:2208.08236.
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        set_davg_zero: bool = True,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        scaling_factor: int = 1.0,
+        normalize=True,
+        temperature=None,
+        concat_output_tebd: bool = True,
+        trainable: bool = True,
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        smooth_type_embedding: bool = True,
+        type_one_side: bool = False,
+        stripped_type_embedding: Optional[bool] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
+        type_map: Optional[list[str]] = None,
+        # not implemented
+        spin=None,
+        type: Optional[str] = None,
+    ):
+        super().__init__()
+        # Ensure compatibility with the deprecated stripped_type_embedding option.
+        if stripped_type_embedding is not None:
+            # Use the user-set stripped_type_embedding parameter first
+            tebd_input_mode = "strip" if stripped_type_embedding else "concat"
+        if spin is not None:
+            raise NotImplementedError("old implementation of spin is not supported.")
+        if attn_mask:
+            raise NotImplementedError(
+                "old implementation of attn_mask is not supported."
+            )
+        #  to keep consistent with default value in this backends
+        if ln_eps is None:
+            ln_eps = 1e-5
+
+        del type, spin, attn_mask
+        self.se_atten = DescrptBlockSeAtten(
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode=tebd_input_mode,
+            set_davg_zero=set_davg_zero,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=False,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            scaling_factor=scaling_factor,
+            normalize=normalize,
+            temperature=temperature,
+            smooth=smooth_type_embedding,
+            type_one_side=type_one_side,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            trainable_ln=trainable_ln,
+            ln_eps=ln_eps,
+            seed=child_seed(seed, 1),
+        )
+        self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
+        self.type_map = type_map
+        self.type_embedding = TypeEmbedNet(
+            ntypes,
+            tebd_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+            use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
+            type_map=type_map,
+        )
+        self.tebd_dim = tebd_dim
+        self.concat_output_tebd = concat_output_tebd
+        self.trainable = trainable
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.se_atten.get_rcut()
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.se_atten.get_rcut_smth()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.se_atten.get_nsel()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.se_atten.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.se_atten.get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        ret = self.se_atten.get_dim_out()
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        return self.se_atten.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.se_atten.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.se_atten.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return self.se_atten.need_sorted_nlist_for_lower()
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.se_atten.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA1 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in both type_embedding and se_atten
+        if shared_level == 0:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.se_atten.share_params(base_class.se_atten, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.se_atten.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.se_atten.mean = mean
+        self.se_atten.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.se_atten.mean, self.se_atten.stddev
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        obj = self.se_atten
+        obj.ntypes = len(type_map)
+        self.type_map = type_map
+        self.type_embedding.change_type_map(type_map=type_map)
+        obj.reinit_exclude(map_pair_exclude_types(obj.exclude_types, remap_index))
+        if has_new_type:
+            # the avg and std of new types need to be updated
+            extend_descrpt_stat(
+                obj,
+                type_map,
+                des_with_stat=model_with_new_type_stat.se_atten
+                if model_with_new_type_stat is not None
+                else None,
+            )
+        obj["davg"] = obj["davg"][remap_index]
+        obj["dstd"] = obj["dstd"][remap_index]
+
+    def serialize(self) -> dict:
+        obj = self.se_atten
+        data = {
+            "@class": "Descriptor",
+            "type": "dpa1",
+            "@version": 2,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "ntypes": obj.ntypes,
+            "neuron": obj.neuron,
+            "axis_neuron": obj.axis_neuron,
+            "tebd_dim": obj.tebd_dim,
+            "tebd_input_mode": obj.tebd_input_mode,
+            "set_davg_zero": obj.set_davg_zero,
+            "attn": obj.attn_dim,
+            "attn_layer": obj.attn_layer,
+            "attn_dotr": obj.attn_dotr,
+            "attn_mask": False,
+            "activation_function": obj.activation_function,
+            "resnet_dt": obj.resnet_dt,
+            "scaling_factor": obj.scaling_factor,
+            "normalize": obj.normalize,
+            "temperature": obj.temperature,
+            "trainable_ln": obj.trainable_ln,
+            "ln_eps": obj.ln_eps,
+            "smooth_type_embedding": obj.smooth,
+            "type_one_side": obj.type_one_side,
+            "concat_output_tebd": self.concat_output_tebd,
+            "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
+            "type_map": self.type_map,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "attention_layers": obj.dpa1_attention.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "trainable": self.trainable,
+            "spin": None,
+        }
+        if obj.tebd_input_mode in ["strip"]:
+            data.update({"embeddings_strip": obj.filter_layers_strip.serialize()})
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptDPA1":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 2, 1)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        type_embedding = data.pop("type_embedding")
+        attention_layers = data.pop("attention_layers")
+        env_mat = data.pop("env_mat")
+        tebd_input_mode = data["tebd_input_mode"]
+        if tebd_input_mode in ["strip"]:
+            embeddings_strip = data.pop("embeddings_strip")
+        else:
+            embeddings_strip = None
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.se_atten.prec).to(device=env.DEVICE)
+
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        obj.se_atten["davg"] = t_cvt(variables["davg"])
+        obj.se_atten["dstd"] = t_cvt(variables["dstd"])
+        obj.se_atten.filter_layers = NetworkCollection.deserialize(embeddings)
+        if tebd_input_mode in ["strip"]:
+            obj.se_atten.filter_layers_strip = NetworkCollection.deserialize(
+                embeddings_strip
+            )
+        obj.se_atten.dpa1_attention = NeighborGatedAttention.deserialize(
+            attention_layers
+        )
+        return obj
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        g1, g2, h2, rot_mat, sw = self.se_atten(
+            nlist,
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping=None,
+        )
+        if self.concat_output_tebd:
+            g1 = paddle.concat([g1, g1_inp], axis=-1)
+
+        return g1, rot_mat, g2, h2, sw
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, sel = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], True
+        )
+        local_jdata_cpy["sel"] = sel[0]
+        return local_jdata_cpy, min_nbor_dist
diff --git a/deepmd/pd/model/descriptor/dpa2.py b/deepmd/pd/model/descriptor/dpa2.py
new file mode 100644
index 0000000000..8fbffe2d90
--- /dev/null
+++ b/deepmd/pd/model/descriptor/dpa2.py
@@ -0,0 +1,858 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.descriptor.dpa2 import (
+    RepformerArgs,
+    RepinitArgs,
+)
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.mlp import (
+    Identity,
+    MLPLayer,
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    extend_descrpt_stat,
+)
+from .repformer_layer import (
+    RepformerLayer,
+)
+from .repformers import (
+    DescrptBlockRepformers,
+)
+from .se_atten import (
+    DescrptBlockSeAtten,
+)
+from .se_t_tebd import (
+    DescrptBlockSeTTebd,
+)
+
+
+@BaseDescriptor.register("dpa2")
+class DescrptDPA2(BaseDescriptor, paddle.nn.Layer):
+    def __init__(
+        self,
+        ntypes: int,
+        # args for repinit
+        repinit: Union[RepinitArgs, dict],
+        # args for repformer
+        repformer: Union[RepformerArgs, dict],
+        # kwargs for descriptor
+        concat_output_tebd: bool = True,
+        precision: str = "float64",
+        smooth: bool = True,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        add_tebd_to_repinit_out: bool = False,
+        use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
+        type_map: Optional[list[str]] = None,
+    ):
+        r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
+
+        Parameters
+        ----------
+        repinit : Union[RepinitArgs, dict]
+            The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information.
+        repformer : Union[RepformerArgs, dict]
+            The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information.
+        concat_output_tebd : bool, optional
+            Whether to concat type embedding at the output of the descriptor.
+        precision : str, optional
+            The precision of the embedding net parameters.
+        smooth : bool, optional
+            Whether to use smoothness in processes such as attention weights calculation.
+        exclude_types : list[list[int]], optional
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float, optional
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+            For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
+        trainable : bool, optional
+            If the parameters are trainable.
+        seed : int, optional
+            Random seed for parameter initialization.
+        add_tebd_to_repinit_out : bool, optional
+            Whether to add type embedding to the output representation from repinit before inputting it into repformer.
+        use_econf_tebd : bool, Optional
+            Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+        type_map : list[str], Optional
+            A list of strings. Give the name to each type of atoms.
+
+        Returns
+        -------
+        descriptor:         paddle.Tensor
+            the descriptor of shape nb x nloc x g1_dim.
+            invariant single-atom representation.
+        g2:                 paddle.Tensor
+            invariant pair-atom representation.
+        h2:                 paddle.Tensor
+            equivariant pair-atom representation.
+        rot_mat:            paddle.Tensor
+            rotation matrix for equivariant fittings
+        sw:                 paddle.Tensor
+            The switch function for decaying inverse distance.
+
+        """
+        super().__init__()
+
+        def init_subclass_params(sub_data, sub_class):
+            if isinstance(sub_data, dict):
+                return sub_class(**sub_data)
+            elif isinstance(sub_data, sub_class):
+                return sub_data
+            else:
+                raise ValueError(
+                    f"Input args must be a {sub_class.__name__} class or a dict!"
+                )
+
+        self.repinit_args = init_subclass_params(repinit, RepinitArgs)
+        self.repformer_args = init_subclass_params(repformer, RepformerArgs)
+
+        self.repinit = DescrptBlockSeAtten(
+            self.repinit_args.rcut,
+            self.repinit_args.rcut_smth,
+            self.repinit_args.nsel,
+            ntypes,
+            attn_layer=0,
+            neuron=self.repinit_args.neuron,
+            axis_neuron=self.repinit_args.axis_neuron,
+            tebd_dim=self.repinit_args.tebd_dim,
+            tebd_input_mode=self.repinit_args.tebd_input_mode,
+            set_davg_zero=self.repinit_args.set_davg_zero,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            activation_function=self.repinit_args.activation_function,
+            precision=precision,
+            resnet_dt=self.repinit_args.resnet_dt,
+            smooth=smooth,
+            type_one_side=self.repinit_args.type_one_side,
+            seed=child_seed(seed, 0),
+        )
+        self.use_three_body = self.repinit_args.use_three_body
+        if self.use_three_body:
+            self.repinit_three_body = DescrptBlockSeTTebd(
+                self.repinit_args.three_body_rcut,
+                self.repinit_args.three_body_rcut_smth,
+                self.repinit_args.three_body_sel,
+                ntypes,
+                neuron=self.repinit_args.three_body_neuron,
+                tebd_dim=self.repinit_args.tebd_dim,
+                tebd_input_mode=self.repinit_args.tebd_input_mode,
+                set_davg_zero=self.repinit_args.set_davg_zero,
+                exclude_types=exclude_types,
+                env_protection=env_protection,
+                activation_function=self.repinit_args.activation_function,
+                precision=precision,
+                resnet_dt=self.repinit_args.resnet_dt,
+                smooth=smooth,
+                seed=child_seed(seed, 5),
+            )
+        else:
+            self.repinit_three_body = None
+        self.repformers = DescrptBlockRepformers(
+            self.repformer_args.rcut,
+            self.repformer_args.rcut_smth,
+            self.repformer_args.nsel,
+            ntypes,
+            nlayers=self.repformer_args.nlayers,
+            g1_dim=self.repformer_args.g1_dim,
+            g2_dim=self.repformer_args.g2_dim,
+            axis_neuron=self.repformer_args.axis_neuron,
+            direct_dist=self.repformer_args.direct_dist,
+            update_g1_has_conv=self.repformer_args.update_g1_has_conv,
+            update_g1_has_drrd=self.repformer_args.update_g1_has_drrd,
+            update_g1_has_grrg=self.repformer_args.update_g1_has_grrg,
+            update_g1_has_attn=self.repformer_args.update_g1_has_attn,
+            update_g2_has_g1g1=self.repformer_args.update_g2_has_g1g1,
+            update_g2_has_attn=self.repformer_args.update_g2_has_attn,
+            update_h2=self.repformer_args.update_h2,
+            attn1_hidden=self.repformer_args.attn1_hidden,
+            attn1_nhead=self.repformer_args.attn1_nhead,
+            attn2_hidden=self.repformer_args.attn2_hidden,
+            attn2_nhead=self.repformer_args.attn2_nhead,
+            attn2_has_gate=self.repformer_args.attn2_has_gate,
+            activation_function=self.repformer_args.activation_function,
+            update_style=self.repformer_args.update_style,
+            update_residual=self.repformer_args.update_residual,
+            update_residual_init=self.repformer_args.update_residual_init,
+            set_davg_zero=self.repformer_args.set_davg_zero,
+            smooth=smooth,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            precision=precision,
+            trainable_ln=self.repformer_args.trainable_ln,
+            ln_eps=self.repformer_args.ln_eps,
+            use_sqrt_nnei=self.repformer_args.use_sqrt_nnei,
+            g1_out_conv=self.repformer_args.g1_out_conv,
+            g1_out_mlp=self.repformer_args.g1_out_mlp,
+            seed=child_seed(seed, 1),
+        )
+        self.rcsl_list = [
+            (self.repformers.get_rcut(), self.repformers.get_nsel()),
+            (self.repinit.get_rcut(), self.repinit.get_nsel()),
+        ]
+        if self.use_three_body:
+            self.rcsl_list.append(
+                (self.repinit_three_body.get_rcut(), self.repinit_three_body.get_nsel())
+            )
+        self.rcsl_list.sort()
+        for ii in range(1, len(self.rcsl_list)):
+            assert (
+                self.rcsl_list[ii - 1][1] <= self.rcsl_list[ii][1]
+            ), "rcut and sel are not in the same order"
+        self.rcut_list = [ii[0] for ii in self.rcsl_list]
+        self.nsel_list = [ii[1] for ii in self.rcsl_list]
+        self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
+        self.type_map = type_map
+        self.type_embedding = TypeEmbedNet(
+            ntypes,
+            self.repinit_args.tebd_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+            use_econf_tebd=self.use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
+            type_map=type_map,
+        )
+        self.concat_output_tebd = concat_output_tebd
+        self.precision = precision
+        self.smooth = smooth
+        self.exclude_types = exclude_types
+        self.env_protection = env_protection
+        self.trainable = trainable
+        self.add_tebd_to_repinit_out = add_tebd_to_repinit_out
+
+        self.repinit_out_dim = self.repinit.dim_out
+        if self.repinit_args.use_three_body:
+            assert self.repinit_three_body is not None
+            self.repinit_out_dim += self.repinit_three_body.dim_out
+
+        if self.repinit_out_dim == self.repformers.dim_in:
+            self.g1_shape_tranform = Identity()
+        else:
+            self.g1_shape_tranform = MLPLayer(
+                self.repinit_out_dim,
+                self.repformers.dim_in,
+                bias=False,
+                precision=precision,
+                init="glorot",
+                seed=child_seed(seed, 3),
+            )
+        self.tebd_transform = None
+        if self.add_tebd_to_repinit_out:
+            self.tebd_transform = MLPLayer(
+                self.repinit_args.tebd_dim,
+                self.repformers.dim_in,
+                bias=False,
+                precision=precision,
+                seed=child_seed(seed, 4),
+            )
+        assert self.repinit.rcut > self.repformers.rcut
+        assert self.repinit.sel[0] > self.repformers.sel[0]
+
+        self.tebd_dim = self.repinit_args.tebd_dim
+        self.rcut = self.repinit.get_rcut()
+        self.rcut_smth = self.repinit.get_rcut_smth()
+        self.ntypes = ntypes
+        self.sel = self.repinit.sel
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension of this descriptor."""
+        ret = self.repformers.dim_out
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension of this descriptor."""
+        return self.repformers.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return any(
+            [self.repinit.has_message_passing(), self.repformers.has_message_passing()]
+        )
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        # the env_protection of repinit is the same as that of the repformer
+        return self.repinit.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA2 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in type_embedding, repinit and repformers
+        if shared_level == 0:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+            self._sub_layers["g1_shape_tranform"] = base_class._sub_layers[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding and repinit
+        elif shared_level == 1:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+        # shared_level: 2
+        # share all parameters in type_embedding and repformers
+        elif shared_level == 2:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self._sub_layers["g1_shape_tranform"] = base_class._sub_layers[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 3
+        # share all parameters in type_embedding
+        elif shared_level == 3:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        self.type_map = type_map
+        self.type_embedding.change_type_map(type_map=type_map)
+        self.exclude_types = map_pair_exclude_types(self.exclude_types, remap_index)
+        self.ntypes = len(type_map)
+        repinit = self.repinit
+        repformers = self.repformers
+        repinit_three_body = self.repinit_three_body
+        if has_new_type:
+            # the avg and std of new types need to be updated
+            extend_descrpt_stat(
+                repinit,
+                type_map,
+                des_with_stat=model_with_new_type_stat.repinit
+                if model_with_new_type_stat is not None
+                else None,
+            )
+            extend_descrpt_stat(
+                repformers,
+                type_map,
+                des_with_stat=model_with_new_type_stat.repformers
+                if model_with_new_type_stat is not None
+                else None,
+            )
+            if self.use_three_body:
+                extend_descrpt_stat(
+                    repinit_three_body,
+                    type_map,
+                    des_with_stat=model_with_new_type_stat.repinit_three_body
+                    if model_with_new_type_stat is not None
+                    else None,
+                )
+        repinit.ntypes = self.ntypes
+        repformers.ntypes = self.ntypes
+        repinit.reinit_exclude(self.exclude_types)
+        repformers.reinit_exclude(self.exclude_types)
+        repinit["davg"] = repinit["davg"][remap_index]
+        repinit["dstd"] = repinit["dstd"][remap_index]
+        repformers["davg"] = repformers["davg"][remap_index]
+        repformers["dstd"] = repformers["dstd"][remap_index]
+        if self.use_three_body:
+            repinit_three_body.ntypes = self.ntypes
+            repinit_three_body.reinit_exclude(self.exclude_types)
+            repinit_three_body["davg"] = repinit_three_body["davg"][remap_index]
+            repinit_three_body["dstd"] = repinit_three_body["dstd"][remap_index]
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        descrpt_list = [self.repinit, self.repformers]
+        if self.use_three_body:
+            descrpt_list.append(self.repinit_three_body)
+        for ii, descrpt in enumerate(descrpt_list):
+            descrpt.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: list[paddle.Tensor],
+        stddev: list[paddle.Tensor],
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        descrpt_list = [self.repinit, self.repformers]
+        if self.use_three_body:
+            descrpt_list.append(self.repinit_three_body)
+        for ii, descrpt in enumerate(descrpt_list):
+            descrpt.mean = mean[ii]
+            descrpt.stddev = stddev[ii]
+
+    def get_stat_mean_and_stddev(
+        self,
+    ) -> tuple[list[paddle.Tensor], list[paddle.Tensor]]:
+        """Get mean and stddev for descriptor."""
+        mean_list = [self.repinit.mean, self.repformers.mean]
+        stddev_list = [
+            self.repinit.stddev,
+            self.repformers.stddev,
+        ]
+        if self.use_three_body:
+            mean_list.append(self.repinit_three_body.mean)
+            stddev_list.append(self.repinit_three_body.stddev)
+        return mean_list, stddev_list
+
+    def serialize(self) -> dict:
+        repinit = self.repinit
+        repformers = self.repformers
+        repinit_three_body = self.repinit_three_body
+        data = {
+            "@class": "Descriptor",
+            "type": "dpa2",
+            "@version": 3,
+            "ntypes": self.ntypes,
+            "repinit_args": self.repinit_args.serialize(),
+            "repformer_args": self.repformer_args.serialize(),
+            "concat_output_tebd": self.concat_output_tebd,
+            "precision": self.precision,
+            "smooth": self.smooth,
+            "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
+            "trainable": self.trainable,
+            "add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
+            "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
+            "type_map": self.type_map,
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "g1_shape_tranform": self.g1_shape_tranform.serialize(),
+        }
+        if self.add_tebd_to_repinit_out:
+            data.update(
+                {
+                    "tebd_transform": self.tebd_transform.serialize(),
+                }
+            )
+        repinit_variable = {
+            "embeddings": repinit.filter_layers.serialize(),
+            "env_mat": DPEnvMat(repinit.rcut, repinit.rcut_smth).serialize(),
+            "@variables": {
+                "davg": to_numpy_array(repinit["davg"]),
+                "dstd": to_numpy_array(repinit["dstd"]),
+            },
+        }
+        if repinit.tebd_input_mode in ["strip"]:
+            repinit_variable.update(
+                {"embeddings_strip": repinit.filter_layers_strip.serialize()}
+            )
+        repformers_variable = {
+            "g2_embd": repformers.g2_embd.serialize(),
+            "repformer_layers": [layer.serialize() for layer in repformers.layers],
+            "env_mat": DPEnvMat(repformers.rcut, repformers.rcut_smth).serialize(),
+            "@variables": {
+                "davg": to_numpy_array(repformers["davg"]),
+                "dstd": to_numpy_array(repformers["dstd"]),
+            },
+        }
+        data.update(
+            {
+                "repinit_variable": repinit_variable,
+                "repformers_variable": repformers_variable,
+            }
+        )
+        if self.use_three_body:
+            repinit_three_body_variable = {
+                "embeddings": repinit_three_body.filter_layers.serialize(),
+                "env_mat": DPEnvMat(
+                    repinit_three_body.rcut, repinit_three_body.rcut_smth
+                ).serialize(),
+                "@variables": {
+                    "davg": to_numpy_array(repinit_three_body["davg"]),
+                    "dstd": to_numpy_array(repinit_three_body["dstd"]),
+                },
+            }
+            if repinit_three_body.tebd_input_mode in ["strip"]:
+                repinit_three_body_variable.update(
+                    {
+                        "embeddings_strip": repinit_three_body.filter_layers_strip.serialize()
+                    }
+                )
+            data.update(
+                {
+                    "repinit_three_body_variable": repinit_three_body_variable,
+                }
+            )
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptDPA2":
+        data = data.copy()
+        version = data.pop("@version")
+        check_version_compatibility(version, 3, 1)
+        data.pop("@class")
+        data.pop("type")
+        repinit_variable = data.pop("repinit_variable").copy()
+        repformers_variable = data.pop("repformers_variable").copy()
+        repinit_three_body_variable = (
+            data.pop("repinit_three_body_variable").copy()
+            if "repinit_three_body_variable" in data
+            else None
+        )
+        type_embedding = data.pop("type_embedding")
+        g1_shape_tranform = data.pop("g1_shape_tranform")
+        tebd_transform = data.pop("tebd_transform", None)
+        add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
+        if version < 3:
+            # compat with old version
+            data["repformer_args"]["use_sqrt_nnei"] = False
+            data["repformer_args"]["g1_out_conv"] = False
+            data["repformer_args"]["g1_out_mlp"] = False
+        data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
+        data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
+        obj = cls(**data)
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        if add_tebd_to_repinit_out:
+            assert isinstance(tebd_transform, dict)
+            obj.tebd_transform = MLPLayer.deserialize(tebd_transform)
+        if obj.repinit.dim_out != obj.repformers.dim_in:
+            obj.g1_shape_tranform = MLPLayer.deserialize(g1_shape_tranform)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.repinit.prec, place=env.DEVICE)
+
+        # deserialize repinit
+        statistic_repinit = repinit_variable.pop("@variables")
+        env_mat = repinit_variable.pop("env_mat")
+        tebd_input_mode = data["repinit"].tebd_input_mode
+        obj.repinit.filter_layers = NetworkCollection.deserialize(
+            repinit_variable.pop("embeddings")
+        )
+        if tebd_input_mode in ["strip"]:
+            obj.repinit.filter_layers_strip = NetworkCollection.deserialize(
+                repinit_variable.pop("embeddings_strip")
+            )
+        obj.repinit["davg"] = t_cvt(statistic_repinit["davg"])
+        obj.repinit["dstd"] = t_cvt(statistic_repinit["dstd"])
+
+        if data["repinit"].use_three_body:
+            # deserialize repinit_three_body
+            statistic_repinit_three_body = repinit_three_body_variable.pop("@variables")
+            env_mat = repinit_three_body_variable.pop("env_mat")
+            tebd_input_mode = data["repinit"].tebd_input_mode
+            obj.repinit_three_body.filter_layers = NetworkCollection.deserialize(
+                repinit_three_body_variable.pop("embeddings")
+            )
+            if tebd_input_mode in ["strip"]:
+                obj.repinit_three_body.filter_layers_strip = (
+                    NetworkCollection.deserialize(
+                        repinit_three_body_variable.pop("embeddings_strip")
+                    )
+                )
+            obj.repinit_three_body["davg"] = t_cvt(statistic_repinit_three_body["davg"])
+            obj.repinit_three_body["dstd"] = t_cvt(statistic_repinit_three_body["dstd"])
+
+        # deserialize repformers
+        statistic_repformers = repformers_variable.pop("@variables")
+        env_mat = repformers_variable.pop("env_mat")
+        repformer_layers = repformers_variable.pop("repformer_layers")
+        obj.repformers.g2_embd = MLPLayer.deserialize(
+            repformers_variable.pop("g2_embd")
+        )
+        obj.repformers["davg"] = t_cvt(statistic_repformers["davg"])
+        obj.repformers["dstd"] = t_cvt(statistic_repformers["dstd"])
+        obj.repformers.layers = paddle.nn.LayerList(
+            [RepformerLayer.deserialize(layer) for layer in repformer_layers]
+        )
+        return obj
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, mapps extended region index to local region.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        use_three_body = self.use_three_body
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        # nlists
+        nlist_dict = build_multiple_neighbor_list(
+            extended_coord,
+            nlist,
+            self.rcut_list,
+            self.nsel_list,
+        )
+        # repinit
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        g1, _, _, _, _ = self.repinit(
+            nlist_dict[
+                get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel())
+            ],
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping,
+        )
+        if use_three_body:
+            assert self.repinit_three_body is not None
+            g1_three_body, __, __, __, __ = self.repinit_three_body(
+                nlist_dict[
+                    get_multiple_nlist_key(
+                        self.repinit_three_body.get_rcut(),
+                        self.repinit_three_body.get_nsel(),
+                    )
+                ],
+                extended_coord,
+                extended_atype,
+                g1_ext,
+                mapping,
+            )
+            g1 = paddle.concat([g1, g1_three_body], axis=-1)
+        # linear to change shape
+        g1 = self.g1_shape_tranform(g1)
+        if self.add_tebd_to_repinit_out:
+            assert self.tebd_transform is not None
+            g1 = g1 + self.tebd_transform(g1_inp)
+        # mapping g1
+        if comm_dict is None:
+            assert mapping is not None
+            mapping_ext = (
+                mapping.reshape([nframes, nall])
+                .unsqueeze(-1)
+                .expand([-1, -1, g1.shape[-1]])
+            )
+            g1_ext = decomp.take_along_axis(g1, mapping_ext, 1)
+            g1 = g1_ext
+        # repformer
+        g1, g2, h2, rot_mat, sw = self.repformers(
+            nlist_dict[
+                get_multiple_nlist_key(
+                    self.repformers.get_rcut(), self.repformers.get_nsel()
+                )
+            ],
+            extended_coord,
+            extended_atype,
+            g1,
+            mapping,
+            comm_dict,
+        )
+        if self.concat_output_tebd:
+            g1 = paddle.concat([g1, g1_inp], axis=-1)
+        return g1, rot_mat, g2, h2, sw
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        update_sel = UpdateSel()
+        min_nbor_dist, repinit_sel = update_sel.update_one_sel(
+            train_data,
+            type_map,
+            local_jdata_cpy["repinit"]["rcut"],
+            local_jdata_cpy["repinit"]["nsel"],
+            True,
+        )
+        local_jdata_cpy["repinit"]["nsel"] = repinit_sel[0]
+        min_nbor_dist, repformer_sel = update_sel.update_one_sel(
+            train_data,
+            type_map,
+            local_jdata_cpy["repformer"]["rcut"],
+            local_jdata_cpy["repformer"]["nsel"],
+            True,
+        )
+        local_jdata_cpy["repformer"]["nsel"] = repformer_sel[0]
+        return local_jdata_cpy, min_nbor_dist
diff --git a/deepmd/pd/model/descriptor/env_mat.py b/deepmd/pd/model/descriptor/env_mat.py
new file mode 100644
index 0000000000..3a9daec1e8
--- /dev/null
+++ b/deepmd/pd/model/descriptor/env_mat.py
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.utils import (
+    decomp,
+)
+from deepmd.pd.utils.preprocess import (
+    compute_smooth_weight,
+)
+
+
+def _make_env_mat(
+    nlist,
+    coord,
+    rcut: float,
+    ruct_smth: float,
+    radial_only: bool = False,
+    protection: float = 0.0,
+):
+    """Make smooth environment matrix."""
+    bsz, natoms, nnei = nlist.shape
+    coord = coord.reshape([bsz, -1, 3])
+    nall = coord.shape[1]
+    mask = nlist >= 0
+    # nlist = nlist * mask  ## this impl will contribute nans in Hessian calculation.
+    nlist = paddle.where(mask, nlist, nall - 1)
+    coord_l = coord[:, :natoms].reshape([bsz, -1, 1, 3])
+    index = nlist.reshape([bsz, -1]).unsqueeze(-1).expand([-1, -1, 3])
+    # coord_r = paddle.take_along_axis(coord, axis=1, indices=index)
+    coord_r = decomp.take_along_axis(coord, axis=1, indices=index)
+    coord_r = coord_r.reshape([bsz, natoms, nnei, 3])
+    diff = coord_r - coord_l
+    # length = paddle.linalg.norm(diff, axis=-1, keepdim=True)
+    length = decomp.norm(diff, axis=-1, keepdim=True)
+    # for index 0 nloc atom
+    length = length + (~mask.unsqueeze(-1)).astype(length.dtype)
+    t0 = 1 / (length + protection)
+    t1 = diff / (length + protection) ** 2
+    weight = compute_smooth_weight(length, ruct_smth, rcut)
+    weight = weight * mask.unsqueeze(-1).astype(weight.dtype)
+    if radial_only:
+        env_mat = t0 * weight
+    else:
+        env_mat = paddle.concat([t0.astype(t1.dtype), t1], axis=-1) * weight
+    return env_mat, diff * mask.unsqueeze(-1).astype(diff.dtype), weight
+
+
+def prod_env_mat(
+    extended_coord,
+    nlist,
+    atype,
+    mean,
+    stddev,
+    rcut: float,
+    rcut_smth: float,
+    radial_only: bool = False,
+    protection: float = 0.0,
+):
+    """Generate smooth environment matrix from atom coordinates and other context.
+
+    Args:
+    - extended_coord: Copied atom coordinates with shape [nframes, nall*3].
+    - atype: Atom types with shape [nframes, nloc].
+    - mean: Average value of descriptor per element type with shape [len(sec), nnei, 4 or 1].
+    - stddev: Standard deviation of descriptor per element type with shape [len(sec), nnei, 4 or 1].
+    - rcut: Cut-off radius.
+    - rcut_smth: Smooth hyper-parameter for pair force & energy.
+    - radial_only: Whether to return a full description or a radial-only descriptor.
+    - protection: Protection parameter to prevent division by zero errors during calculations.
+
+    Returns
+    -------
+    - env_mat: Shape is [nframes, natoms[1]*nnei*4].
+    """
+    _env_mat_se_a, diff, switch = _make_env_mat(
+        nlist,
+        extended_coord,
+        rcut,
+        rcut_smth,
+        radial_only,
+        protection=protection,
+    )  # shape [n_atom, dim, 4 or 1]
+    t_avg = mean[atype]  # [n_atom, dim, 4 or 1]
+    t_std = stddev[atype]  # [n_atom, dim, 4 or 1]
+    env_mat_se_a = (_env_mat_se_a - t_avg) / t_std
+    return env_mat_se_a, diff, switch
diff --git a/deepmd/pd/model/descriptor/gaussian_lcc.py b/deepmd/pd/model/descriptor/gaussian_lcc.py
new file mode 100644
index 0000000000..038198dfac
--- /dev/null
+++ b/deepmd/pd/model/descriptor/gaussian_lcc.py
@@ -0,0 +1,325 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+import paddle.nn as nn
+
+from deepmd.pd.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pd.model.network.network import (
+    Evoformer3bEncoder,
+    GaussianEmbedding,
+    TypeEmbedNet,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+class DescrptGaussianLcc(paddle.nn.Layer, BaseDescriptor):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        num_pair: int,
+        embed_dim: int = 768,
+        kernel_num: int = 128,
+        pair_embed_dim: int = 64,
+        num_block: int = 1,
+        layer_num: int = 12,
+        attn_head: int = 48,
+        pair_hidden_dim: int = 16,
+        ffn_embedding_dim: int = 768,
+        dropout: float = 0.0,
+        droppath_prob: float = 0.1,
+        pair_dropout: float = 0.25,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        pre_ln: bool = True,
+        do_tag_embedding: bool = False,
+        tag_ener_pref: bool = False,
+        atomic_sum_gbf: bool = False,
+        pre_add_seq: bool = True,
+        tri_update: bool = True,
+        **kwargs,
+    ):
+        """Construct a descriptor of Gaussian Based Local Cluster.
+
+        Args:
+        - rcut: Cut-off radius.
+        - rcut_smth: Smooth hyper-parameter for pair force & energy. **Not used in this descriptor**.
+        - sel: For each element type, how many atoms is selected as neighbors.
+        - ntypes: Number of atom types.
+        - num_pair: Number of atom type pairs. Default is 2 * ntypes.
+        - kernel_num: Number of gaussian kernels.
+        - embed_dim: Dimension of atomic representation.
+        - pair_embed_dim: Dimension of pair representation.
+        - num_block: Number of evoformer blocks.
+        - layer_num: Number of attention layers.
+        - attn_head: Number of attention heads.
+        - pair_hidden_dim: Hidden dimension of pair representation during attention process.
+        - ffn_embedding_dim: Dimension during feed forward network.
+        - dropout: Dropout probability of atomic representation.
+        - droppath_prob: If not zero, it will use drop paths (Stochastic Depth) per sample and ignore `dropout`.
+        - pair_dropout: Dropout probability of pair representation during triangular update.
+        - attention_dropout: Dropout probability during attetion process.
+        - activation_dropout: Dropout probability of pair feed forward network.
+        - pre_ln: Do previous layer norm or not.
+        - do_tag_embedding: Add tag embedding to atomic and pair representations. (`tags`, `tags2`, `tags3` must exist)
+        - atomic_sum_gbf: Add sum of gaussian outputs to atomic representation or not.
+        - pre_add_seq: Add output of other descriptor (if has) to the atomic representation before attention.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.embed_dim = embed_dim
+        self.num_pair = num_pair
+        self.kernel_num = kernel_num
+        self.pair_embed_dim = pair_embed_dim
+        self.num_block = num_block
+        self.layer_num = layer_num
+        self.attention_heads = attn_head
+        self.pair_hidden_dim = pair_hidden_dim
+        self.ffn_embedding_dim = ffn_embedding_dim
+        self.dropout = dropout
+        self.droppath_prob = droppath_prob
+        self.pair_dropout = pair_dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.pre_ln = pre_ln
+        self.do_tag_embedding = do_tag_embedding
+        self.tag_ener_pref = tag_ener_pref
+        self.atomic_sum_gbf = atomic_sum_gbf
+        self.local_cluster = True
+        self.pre_add_seq = pre_add_seq
+        self.tri_update = tri_update
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sec = paddle.to_tensor(sel)  # pylint: disable=no-explicit-dtype,no-explicit-device
+        self.nnei = sum(sel)
+
+        if self.do_tag_embedding:
+            self.tag_encoder = nn.Embedding(3, self.embed_dim)
+            self.tag_encoder2 = nn.Embedding(2, self.embed_dim)
+            self.tag_type_embedding = TypeEmbedNet(10, pair_embed_dim)
+        self.edge_type_embedding = nn.Embedding(
+            (ntypes + 1) * (ntypes + 1),
+            pair_embed_dim,
+            padding_idx=(ntypes + 1) * (ntypes + 1) - 1,
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        )
+        self.gaussian_encoder = GaussianEmbedding(
+            rcut,
+            kernel_num,
+            num_pair,
+            embed_dim,
+            pair_embed_dim,
+            sel,
+            ntypes,
+            atomic_sum_gbf,
+        )
+        self.backbone = Evoformer3bEncoder(
+            self.nnei,
+            layer_num=self.layer_num,
+            attn_head=self.attention_heads,
+            atomic_dim=self.embed_dim,
+            pair_dim=self.pair_embed_dim,
+            pair_hidden_dim=self.pair_hidden_dim,
+            ffn_embedding_dim=self.ffn_embedding_dim,
+            dropout=self.dropout,
+            droppath_prob=self.droppath_prob,
+            pair_dropout=self.pair_dropout,
+            attention_dropout=self.attention_dropout,
+            activation_dropout=self.activation_dropout,
+            pre_ln=self.pre_ln,
+            tri_update=self.tri_update,
+        )
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of atomic representation."""
+        return self.embed_dim
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.embed_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of pair representation."""
+        return self.pair_embed_dim
+
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        pass
+
+    def forward(
+        self,
+        extended_coord,
+        nlist,
+        atype,
+        nlist_type,
+        nlist_loc=None,
+        atype_tebd=None,
+        nlist_tebd=None,
+        seq_input=None,
+    ):
+        """Calculate the atomic and pair representations of this descriptor.
+
+        Args:
+        - extended_coord: Copied atom coordinates with shape [nframes, nall, 3].
+        - nlist: Neighbor list with shape [nframes, nloc, nnei].
+        - atype: Atom type with shape [nframes, nloc].
+        - nlist_type: Atom type of neighbors with shape [nframes, nloc, nnei].
+        - nlist_loc: Local index of neighbor list with shape [nframes, nloc, nnei].
+        - atype_tebd: Atomic type embedding with shape [nframes, nloc, tebd_dim].
+        - nlist_tebd: Type embeddings of neighbor with shape [nframes, nloc, nnei, tebd_dim].
+        - seq_input: The sequential input from other descriptor with
+                    shape [nframes, nloc, tebd_dim] or [nframes * nloc, 1 + nnei, tebd_dim]
+
+        Returns
+        -------
+        - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron].
+        - ret: environment matrix with shape [nframes, nloc, self.neei, out_size]
+        """
+        nframes, nloc = nlist.shape[:2]
+        nall = extended_coord.shape[1]
+        nlist2 = paddle.concat(
+            [
+                paddle.arange(0, nloc)
+                .to(device=nlist.place)  # pylint: disable=no-explicit-dtype
+                .reshape([1, nloc, 1])
+                .expand([nframes, -1, -1]),
+                nlist,
+            ],
+            axis=-1,
+        )
+        nlist_loc2 = paddle.concat(
+            [
+                paddle.arange(0, nloc)
+                .to(device=nlist_loc.place)  # pylint: disable=no-explicit-dtype
+                .reshape([1, nloc, 1])
+                .expand([nframes, -1, -1]),
+                nlist_loc,
+            ],
+            axis=-1,
+        )
+        nlist_type2 = paddle.concat(
+            [atype.reshape([nframes, nloc, 1]), nlist_type], axis=-1
+        )
+        nnei2_mask = nlist2 != -1
+        padding_mask = nlist2 == -1
+        nlist2 = nlist2 * nnei2_mask
+        nlist_loc2 = nlist_loc2 * nnei2_mask
+
+        # nframes x nloc x (1 + nnei2) x (1 + nnei2)
+        pair_mask = nnei2_mask.unsqueeze(-1) * nnei2_mask.unsqueeze(-2)
+        # nframes x nloc x (1 + nnei2) x (1 + nnei2) x head
+        attn_mask = paddle.zeros(
+            [nframes, nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads],
+            dtype=extended_coord.dtype,
+        ).to(device=nlist.place)
+        attn_mask.masked_fill_(padding_mask.unsqueeze(2).unsqueeze(-1), float("-inf"))
+        # (nframes x nloc) x head x (1 + nnei2) x (1 + nnei2)
+        attn_mask = (
+            attn_mask.reshape(
+                [nframes * nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads]
+            )
+            .transpose([0, 3, 1, 2])
+            .contiguous()
+        )
+
+        # Atomic feature
+        # [(nframes x nloc) x (1 + nnei2) x tebd_dim]
+        atom_feature = decomp.take_along_axis(
+            atype_tebd,
+            axis=1,
+            indices=nlist_loc2.reshape([nframes, -1])
+            .unsqueeze(-1)
+            .expand([-1, -1, self.embed_dim]),
+        ).reshape([nframes * nloc, 1 + self.nnei, self.embed_dim])
+        if self.pre_add_seq and seq_input is not None:
+            first_dim = seq_input.shape[0]
+            if first_dim == nframes * nloc:
+                atom_feature += seq_input
+            elif first_dim == nframes:
+                atom_feature_seq = decomp.take_along_axis(
+                    seq_input,
+                    axis=1,
+                    indices=nlist_loc2.reshape([nframes, -1])
+                    .unsqueeze(-1)
+                    .expand([-1, -1, self.embed_dim]),
+                ).reshape([nframes * nloc, 1 + self.nnei, self.embed_dim])
+                atom_feature += atom_feature_seq
+            else:
+                raise RuntimeError
+        atom_feature = atom_feature * nnei2_mask.reshape(
+            [nframes * nloc, 1 + self.nnei, 1]
+        )
+
+        # Pair feature
+        # [(nframes x nloc) x (1 + nnei2)]
+        nlist_type2_reshape = nlist_type2.reshape([nframes * nloc, 1 + self.nnei])
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2)]
+        edge_type = nlist_type2_reshape.unsqueeze(-1) * (
+            self.ntypes + 1
+        ) + nlist_type2_reshape.unsqueeze(-2)
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        edge_feature = self.edge_type_embedding(edge_type)
+
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 2]
+        edge_type_2dim = paddle.concat(
+            [
+                nlist_type2_reshape.reshape(
+                    [nframes * nloc, 1 + self.nnei, 1, 1]
+                ).expand([-1, -1, 1 + self.nnei, -1]),
+                nlist_type2_reshape.reshape(
+                    [nframes * nloc, 1, 1 + self.nnei, 1]
+                ).expand([-1, 1 + self.nnei, -1, -1])
+                + self.ntypes,
+            ],
+            axis=-1,
+        )
+        # [(nframes x nloc) x (1 + nnei2) x 3]
+        coord_selected = decomp.take_along_axis(
+            extended_coord.unsqueeze(1)
+            .expand([-1, nloc, -1, -1])
+            .reshape([nframes * nloc, nall, 3]),
+            axis=1,
+            indices=nlist2.reshape([nframes * nloc, 1 + self.nnei, 1]).expand(
+                [-1, -1, 3]
+            ),
+        )
+
+        # Update pair features (or and atomic features) with gbf features
+        # delta_pos: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 3].
+        atomic_feature, pair_feature, delta_pos = self.gaussian_encoder(
+            coord_selected, atom_feature, edge_type_2dim, edge_feature
+        )
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        attn_bias = pair_feature
+
+        # output: [(nframes x nloc) x (1 + nnei2) x tebd_dim]
+        # pair: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        output, pair = self.backbone(
+            atomic_feature,
+            pair=attn_bias,
+            attn_mask=attn_mask,
+            pair_mask=pair_mask,
+            atom_mask=nnei2_mask.reshape([nframes * nloc, 1 + self.nnei]),
+        )
+
+        return output, pair, delta_pos, None
diff --git a/deepmd/pd/model/descriptor/hybrid.py b/deepmd/pd/model/descriptor/hybrid.py
new file mode 100644
index 0000000000..dcacf0eb37
--- /dev/null
+++ b/deepmd/pd/model/descriptor/hybrid.py
@@ -0,0 +1,356 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import math
+from typing import (
+    Any,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pd.utils.nlist import (
+    nlist_distinguish_types,
+)
+from deepmd.pd.utils.utils import (
+    to_paddle_tensor,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@BaseDescriptor.register("hybrid")
+class DescrptHybrid(BaseDescriptor, paddle.nn.Layer):
+    """Concate a list of descriptors to form a new descriptor.
+
+    Parameters
+    ----------
+    list : list : List[Union[BaseDescriptor, Dict[str, Any]]]
+        Build a descriptor from the concatenation of the list of descriptors.
+        The descriptor can be either an object or a dictionary.
+    """
+
+    nlist_cut_idx: list[paddle.Tensor]
+
+    def __init__(
+        self,
+        list: list[Union[BaseDescriptor, dict[str, Any]]],
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        # warning: list is conflict with built-in list
+        descrpt_list = list
+        if descrpt_list == [] or descrpt_list is None:
+            raise RuntimeError(
+                "cannot build descriptor from an empty list of descriptors."
+            )
+        formatted_descript_list: list[BaseDescriptor] = []
+        for ii in descrpt_list:
+            if isinstance(ii, BaseDescriptor):
+                formatted_descript_list.append(ii)
+            elif isinstance(ii, dict):
+                formatted_descript_list.append(
+                    # pass other arguments (e.g. ntypes) to the descriptor
+                    BaseDescriptor(**ii, **kwargs)
+                )
+            else:
+                raise NotImplementedError
+        self.descrpt_list = paddle.nn.LayerList(formatted_descript_list)
+        self.numb_descrpt = len(self.descrpt_list)
+        for ii in range(1, self.numb_descrpt):
+            assert (
+                self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
+            ), f"number of atom types in {ii}th descrptor does not match others"
+        # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
+        self.nlist_cut_idx: list[paddle.Tensor] = []
+        if self.mixed_types() and not all(
+            descrpt.mixed_types() for descrpt in self.descrpt_list
+        ):
+            self.sel_no_mixed_types = np.max(
+                [
+                    descrpt.get_sel()
+                    for descrpt in self.descrpt_list
+                    if not descrpt.mixed_types()
+                ],
+                axis=0,
+            ).tolist()
+        else:
+            self.sel_no_mixed_types = None
+        for ii in range(self.numb_descrpt):
+            if self.mixed_types() == self.descrpt_list[ii].mixed_types():
+                hybrid_sel = self.get_sel()
+            else:
+                assert self.sel_no_mixed_types is not None
+                hybrid_sel = self.sel_no_mixed_types
+            sub_sel = self.descrpt_list[ii].get_sel()
+            start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1]
+            end_idx = start_idx + np.array(sub_sel)
+            cut_idx = np.concatenate(
+                [range(ss, ee) for ss, ee in zip(start_idx, end_idx)]
+            ).astype(np.int64)
+            self.nlist_cut_idx.append(to_paddle_tensor(cut_idx))
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        # do not use numpy here - jit is not happy
+        return max([descrpt.get_rcut() for descrpt in self.descrpt_list])
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        # may not be a good idea...
+        # Note: Using the minimum rcut_smth might not be appropriate in all scenarios. Consider using a different approach or provide detailed documentation on why the minimum value is chosen.
+        return min([descrpt.get_rcut_smth() for descrpt in self.descrpt_list])
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        if self.mixed_types():
+            return [
+                np.max(
+                    [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0
+                ).item()
+            ]
+        else:
+            return np.max(
+                [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0
+            ).tolist()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.descrpt_list[0].get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.descrpt_list[0].get_type_map()
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return sum([descrpt.get_dim_out() for descrpt in self.descrpt_list])
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list])
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return any(descrpt.mixed_types() for descrpt in self.descrpt_list)
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return any(descrpt.has_message_passing() for descrpt in self.descrpt_list)
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix. All descriptors should be the same."""
+        all_protection = [descrpt.get_env_protection() for descrpt in self.descrpt_list]
+        same_as_0 = [math.isclose(ii, all_protection[0]) for ii in all_protection]
+        if not all(same_as_0):
+            raise ValueError(
+                "Hybrid descriptor requires the same environment matrix protection for all descriptors. Found differing values."
+            )
+        return all_protection[0]
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            for ii, des in enumerate(self.descrpt_list):
+                self.descrpt_list[ii].share_params(
+                    base_class.descrpt_list[ii], shared_level, resume=resume
+                )
+        else:
+            raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        for ii, descrpt in enumerate(self.descrpt_list):
+            descrpt.change_type_map(
+                type_map=type_map,
+                model_with_new_type_stat=model_with_new_type_stat.descrpt_list[ii]
+                if model_with_new_type_stat is not None
+                else None,
+            )
+
+    def compute_input_stats(self, merged: list[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        for descrpt in self.descrpt_list:
+            descrpt.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: list[Union[paddle.Tensor, list[paddle.Tensor]]],
+        stddev: list[Union[paddle.Tensor, list[paddle.Tensor]]],
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        for ii, descrpt in enumerate(self.descrpt_list):
+            descrpt.set_stat_mean_and_stddev(mean[ii], stddev[ii])
+
+    def get_stat_mean_and_stddev(
+        self,
+    ) -> tuple[
+        list[Union[paddle.Tensor, list[paddle.Tensor]]],
+        list[Union[paddle.Tensor, list[paddle.Tensor]]],
+    ]:
+        """Get mean and stddev for descriptor."""
+        mean_list = []
+        stddev_list = []
+        for ii, descrpt in enumerate(self.descrpt_list):
+            mean_item, stddev_item = descrpt.get_stat_mean_and_stddev()
+            mean_list.append(mean_item)
+            stddev_list.append(stddev_item)
+        return mean_list, stddev_list
+
+    def forward(
+        self,
+        coord_ext: paddle.Tensor,
+        atype_ext: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3. This descriptor returns None
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function. this descriptor returns None
+        """
+        out_descriptor = []
+        out_gr = []
+        out_g2: Optional[paddle.Tensor] = None
+        out_h2: Optional[paddle.Tensor] = None
+        out_sw: Optional[paddle.Tensor] = None
+        if self.sel_no_mixed_types is not None:
+            nl_distinguish_types = nlist_distinguish_types(
+                nlist,
+                atype_ext,
+                self.sel_no_mixed_types,
+            )
+        else:
+            nl_distinguish_types = None
+        # make jit happy
+        # for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx):
+        for ii, descrpt in enumerate(self.descrpt_list):
+            # cut the nlist to the correct length
+            if self.mixed_types() == descrpt.mixed_types():
+                nl = nlist[:, :, self.nlist_cut_idx[ii].to(atype_ext.place)]
+            else:
+                # mixed_types is True, but descrpt.mixed_types is False
+                assert nl_distinguish_types is not None
+                nl = nl_distinguish_types[
+                    :, :, self.nlist_cut_idx[ii].to(atype_ext.place)
+                ]
+            odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping)
+            out_descriptor.append(odescriptor)
+            if gr is not None:
+                out_gr.append(gr)
+        out_descriptor = paddle.concat(out_descriptor, axis=-1)
+        out_gr = paddle.concat(out_gr, axis=-2) if out_gr else None
+        return out_descriptor, out_gr, out_g2, out_h2, out_sw
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        new_list = []
+        min_nbor_dist = None
+        for sub_jdata in local_jdata["list"]:
+            new_sub_jdata, min_nbor_dist_ = BaseDescriptor.update_sel(
+                train_data, type_map, sub_jdata
+            )
+            if min_nbor_dist_ is not None:
+                min_nbor_dist = min_nbor_dist_
+            new_list.append(new_sub_jdata)
+        local_jdata_cpy["list"] = new_list
+        return local_jdata_cpy, min_nbor_dist
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "hybrid",
+            "@version": 1,
+            "list": [descrpt.serialize() for descrpt in self.descrpt_list],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptHybrid":
+        data = data.copy()
+        class_name = data.pop("@class")
+        assert class_name == "Descriptor"
+        class_type = data.pop("type")
+        assert class_type == "hybrid"
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        obj = cls(
+            list=[BaseDescriptor.deserialize(ii) for ii in data["list"]],
+        )
+        return obj
diff --git a/deepmd/pd/model/descriptor/repformer_layer.py b/deepmd/pd/model/descriptor/repformer_layer.py
new file mode 100644
index 0000000000..91413badbe
--- /dev/null
+++ b/deepmd/pd/model/descriptor/repformer_layer.py
@@ -0,0 +1,1477 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+import paddle.nn as nn
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.init import (
+    constant_,
+    normal_,
+)
+from deepmd.pd.model.network.layernorm import (
+    LayerNorm,
+)
+from deepmd.pd.model.network.mlp import (
+    MLPLayer,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+    get_generator,
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+def get_residual(
+    _dim: int,
+    _scale: float,
+    _mode: str = "norm",
+    trainable: bool = True,
+    precision: str = "float64",
+    seed: Optional[Union[int, list[int]]] = None,
+) -> paddle.Tensor:
+    r"""
+    Get residual tensor for one update vector.
+
+    Parameters
+    ----------
+    _dim : int
+        The dimension of the update vector.
+    _scale
+        The initial scale of the residual tensor. See `_mode` for details.
+    _mode
+        The mode of residual initialization for the residual tensor.
+        - "norm" (default): init residual using normal with `_scale` std.
+        - "const": init residual using element-wise constants of `_scale`.
+    trainable
+        Whether the residual tensor is trainable.
+    precision
+        The precision of the residual tensor.
+    seed : int, optional
+        Random seed for parameter initialization.
+    """
+    random_generator = get_generator(seed)
+    residual = paddle.create_parameter(
+        [_dim],
+        dtype=PRECISION_DICT[precision],
+        default_initializer=nn.initializer.Constant(0),
+    ).to(device=env.DEVICE)
+    residual.stop_gradient = not trainable
+    if _mode == "norm":
+        normal_(residual.data, std=_scale, generator=random_generator)
+    elif _mode == "const":
+        constant_(residual.data, val=_scale)
+    else:
+        raise RuntimeError(f"Unsupported initialization mode '{_mode}'!")
+    return residual
+
+
+# common ops
+def _make_nei_g1(
+    g1_ext: paddle.Tensor,
+    nlist: paddle.Tensor,
+) -> paddle.Tensor:
+    """
+    Make neighbor-wise atomic invariant rep.
+
+    Parameters
+    ----------
+    g1_ext
+        Extended atomic invariant rep, with shape nb x nall x ng1.
+    nlist
+        Neighbor list, with shape nb x nloc x nnei.
+
+    Returns
+    -------
+    gg1: paddle.Tensor
+        Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+
+    """
+    # nlist: nb x nloc x nnei
+    nb, nloc, nnei = nlist.shape
+    # g1_ext: nb x nall x ng1
+    ng1 = g1_ext.shape[-1]
+    # index: nb x (nloc x nnei) x ng1
+    index = nlist.reshape([nb, nloc * nnei]).unsqueeze(-1).expand([-1, -1, ng1])
+    # gg1  : nb x (nloc x nnei) x ng1
+    gg1 = decomp.take_along_axis(g1_ext, axis=1, indices=index)
+    # gg1  : nb x nloc x nnei x ng1
+    gg1 = gg1.reshape([nb, nloc, nnei, ng1])
+    return gg1
+
+
+def _apply_nlist_mask(
+    gg: paddle.Tensor,
+    nlist_mask: paddle.Tensor,
+) -> paddle.Tensor:
+    """
+    Apply nlist mask to neighbor-wise rep tensors.
+
+    Parameters
+    ----------
+    gg
+        Neighbor-wise rep tensors, with shape nf x nloc x nnei x d.
+    nlist_mask
+        Neighbor list mask, where zero means no neighbor, with shape nf x nloc x nnei.
+    """
+    # gg:  nf x nloc x nnei x d
+    # msk: nf x nloc x nnei
+    return gg.masked_fill(~nlist_mask.unsqueeze(-1), 0.0)
+
+
+def _apply_switch(gg: paddle.Tensor, sw: paddle.Tensor) -> paddle.Tensor:
+    """
+    Apply switch function to neighbor-wise rep tensors.
+
+    Parameters
+    ----------
+    gg
+        Neighbor-wise rep tensors, with shape nf x nloc x nnei x d.
+    sw
+        The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+        and remains 0 beyond rcut, with shape nf x nloc x nnei.
+    """
+    # gg:  nf x nloc x nnei x d
+    # sw:  nf x nloc x nnei
+    return gg * sw.unsqueeze(-1)
+
+
+class Atten2Map(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        head_num: int,
+        has_gate: bool = False,  # apply gate to attn map
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        """Return neighbor-wise multi-head self-attention maps, with gate mechanism."""
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.head_num = head_num
+        self.mapqk = MLPLayer(
+            input_dim,
+            hidden_dim * 2 * head_num,
+            bias=False,
+            precision=precision,
+            seed=seed,
+        )
+        self.has_gate = has_gate
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+        self.precision = precision
+
+    def forward(
+        self,
+        g2: paddle.Tensor,  # nb x nloc x nnei x ng2
+        h2: paddle.Tensor,  # nb x nloc x nnei x 3
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        (
+            nb,
+            nloc,
+            nnei,
+            _,
+        ) = g2.shape
+        nd, nh = self.hidden_dim, self.head_num
+        # nb x nloc x nnei x nd x (nh x 2)
+        g2qk = self.mapqk(g2).reshape([nb, nloc, nnei, nd, nh * 2])
+        # nb x nloc x (nh x 2) x nnei x nd
+        g2qk = paddle.transpose(g2qk, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd
+        g2q, g2k = paddle.split(g2qk, decomp.sec(g2qk.shape[2], nh), axis=2)
+        # g2q = paddle.nn.functional.normalize(g2q, axis=-1)
+        # g2k = paddle.nn.functional.normalize(g2k, axis=-1)
+        # nb x nloc x nh x nnei x nnei
+        attnw = paddle.matmul(g2q, paddle.transpose(g2k, [0, 1, 2, 4, 3])) / nd**0.5
+        if self.has_gate:
+            gate = paddle.matmul(h2, paddle.transpose(h2, [0, 1, 3, 2])).unsqueeze(-3)
+            attnw = attnw * gate
+        # mask the attenmap, nb x nloc x 1 x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(2).unsqueeze(2)
+        # mask the attenmap, nb x nloc x 1 x nnei x 1
+        attnw_mask_c = ~nlist_mask.unsqueeze(2).unsqueeze(-1)
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[
+                :, :, None, None, :
+            ] - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = paddle.nn.functional.softmax(attnw, axis=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        # nb x nloc x nh x nnei x nnei
+        attnw = attnw.masked_fill(
+            attnw_mask_c,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :]
+        # nb x nloc x nnei x nnei
+        h2h2t = paddle.matmul(h2, paddle.transpose(h2, [0, 1, 3, 2])) / 3.0**0.5
+        # nb x nloc x nh x nnei x nnei
+        ret = attnw * h2h2t[:, :, None, :, :]
+        # ret = paddle.nn.functional.softmax(g2qk, axis=-1)
+        # nb x nloc x nnei x nnei x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2))
+        return ret
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2Map",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "hidden_dim": self.hidden_dim,
+            "head_num": self.head_num,
+            "has_gate": self.has_gate,
+            "smooth": self.smooth,
+            "attnw_shift": self.attnw_shift,
+            "precision": self.precision,
+            "mapqk": self.mapqk.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2Map":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapqk = data.pop("mapqk")
+        obj = cls(**data)
+        obj.mapqk = MLPLayer.deserialize(mapqk)
+        return obj
+
+
+class Atten2MultiHeadApply(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        head_num: int,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.input_dim = input_dim
+        self.head_num = head_num
+        self.mapv = MLPLayer(
+            input_dim,
+            input_dim * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.head_map = MLPLayer(
+            input_dim * head_num,
+            input_dim,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.precision = precision
+
+    def forward(
+        self,
+        AA: paddle.Tensor,  # nf x nloc x nnei x nnei x nh
+        g2: paddle.Tensor,  # nf x nloc x nnei x ng2
+    ) -> paddle.Tensor:
+        nf, nloc, nnei, ng2 = g2.shape
+        nh = self.head_num
+        # nf x nloc x nnei x ng2 x nh
+        g2v = self.mapv(g2).reshape([nf, nloc, nnei, ng2, nh])
+        # nf x nloc x nh x nnei x ng2
+        g2v = paddle.transpose(g2v, (0, 1, 4, 2, 3))
+        # g2v = paddle.nn.functional.normalize(g2v, axis=-1)
+        # nf x nloc x nh x nnei x nnei
+        AA = paddle.transpose(AA, (0, 1, 4, 2, 3))
+        # nf x nloc x nh x nnei x ng2
+        ret = paddle.matmul(AA, g2v)
+        # nf x nloc x nnei x ng2 x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2)).reshape(
+            [nf, nloc, nnei, (ng2 * nh)]
+        )
+        # nf x nloc x nnei x ng2
+        return self.head_map(ret)
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2MultiHeadApply",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "head_num": self.head_num,
+            "precision": self.precision,
+            "mapv": self.mapv.serialize(),
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2MultiHeadApply":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapv = data.pop("mapv")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.mapv = MLPLayer.deserialize(mapv)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class Atten2EquiVarApply(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        head_num: int,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.input_dim = input_dim
+        self.head_num = head_num
+        self.head_map = MLPLayer(
+            head_num, 1, bias=False, precision=precision, seed=seed
+        )
+        self.precision = precision
+
+    def forward(
+        self,
+        AA: paddle.Tensor,  # nf x nloc x nnei x nnei x nh
+        h2: paddle.Tensor,  # nf x nloc x nnei x 3
+    ) -> paddle.Tensor:
+        nf, nloc, nnei, _ = h2.shape
+        nh = self.head_num
+        # nf x nloc x nh x nnei x nnei
+        AA = paddle.transpose(AA, (0, 1, 4, 2, 3))
+        h2m = paddle.unsqueeze(h2, axis=2)
+        # nf x nloc x nh x nnei x 3
+        h2m = paddle.tile(h2m, [1, 1, nh, 1, 1])
+        # nf x nloc x nh x nnei x 3
+        ret = paddle.matmul(AA, h2m)
+        # nf x nloc x nnei x 3 x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2)).reshape([nf, nloc, nnei, 3, nh])
+        # nf x nloc x nnei x 3
+        return paddle.squeeze(self.head_map(ret), axis=-1)
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2EquiVarApply",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "head_num": self.head_num,
+            "precision": self.precision,
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2EquiVarApply":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class LocalAtten(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        head_num: int,
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.head_num = head_num
+        self.mapq = MLPLayer(
+            input_dim,
+            hidden_dim * 1 * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.mapkv = MLPLayer(
+            input_dim,
+            (hidden_dim + input_dim) * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.head_map = MLPLayer(
+            input_dim * head_num,
+            input_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+        )
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+        self.precision = precision
+
+    def forward(
+        self,
+        g1: paddle.Tensor,  # nb x nloc x ng1
+        gg1: paddle.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        nb, nloc, nnei = nlist_mask.shape
+        ni, nd, nh = self.input_dim, self.hidden_dim, self.head_num
+        assert ni == g1.shape[-1]
+        assert ni == gg1.shape[-1]
+        # nb x nloc x nd x nh
+        g1q = self.mapq(g1).reshape([nb, nloc, nd, nh])
+        # nb x nloc x nh x nd
+        g1q = paddle.transpose(g1q, (0, 1, 3, 2))
+        # nb x nloc x nnei x (nd+ni) x nh
+        gg1kv = self.mapkv(gg1).reshape([nb, nloc, nnei, nd + ni, nh])
+        gg1kv = paddle.transpose(gg1kv, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd, nb x nloc x nh x nnei x ng1
+        gg1k, gg1v = paddle.split(gg1kv, [nd, ni], axis=-1)
+
+        # nb x nloc x nh x 1 x nnei
+        attnw = (
+            paddle.matmul(g1q.unsqueeze(-2), paddle.transpose(gg1k, [0, 1, 2, 4, 3]))
+            / nd**0.5
+        )
+        # nb x nloc x nh x nnei
+        attnw = attnw.squeeze(-2)
+        # mask the attenmap, nb x nloc x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(-2)
+        # nb x nloc x nh x nnei
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw.unsqueeze(-2) - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = paddle.nn.functional.softmax(attnw, axis=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw.unsqueeze(-2)
+
+        # nb x nloc x nh x ng1
+        ret = (
+            paddle.matmul(attnw.unsqueeze(-2), gg1v)
+            .squeeze(-2)
+            .reshape([nb, nloc, nh * ni])
+        )
+        # nb x nloc x ng1
+        ret = self.head_map(ret)
+        return ret
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "LocalAtten",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "hidden_dim": self.hidden_dim,
+            "head_num": self.head_num,
+            "smooth": self.smooth,
+            "attnw_shift": self.attnw_shift,
+            "precision": self.precision,
+            "mapq": self.mapq.serialize(),
+            "mapkv": self.mapkv.serialize(),
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LocalAtten":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapq = data.pop("mapq")
+        mapkv = data.pop("mapkv")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.mapq = MLPLayer.deserialize(mapq)
+        obj.mapkv = MLPLayer.deserialize(mapkv)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class RepformerLayer(paddle.nn.Layer):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        g1_dim=128,
+        g2_dim=16,
+        axis_neuron: int = 4,
+        update_chnnl_2: bool = True,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        update_residual: float = 0.001,
+        update_residual_init: str = "norm",
+        smooth: bool = True,
+        precision: str = "float64",
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        use_sqrt_nnei: bool = True,
+        g1_out_conv: bool = True,
+        g1_out_mlp: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.epsilon = 1e-4  # protection of 1./nnei
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.ntypes = ntypes
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        assert len(sel) == 1
+        self.sel = sel
+        self.sec = self.sel
+        self.axis_neuron = axis_neuron
+        self.activation_function = activation_function
+        self.act = ActivationFn(activation_function)
+        self.update_g1_has_grrg = update_g1_has_grrg
+        self.update_g1_has_drrd = update_g1_has_drrd
+        self.update_g1_has_conv = update_g1_has_conv
+        self.update_g1_has_attn = update_g1_has_attn
+        self.update_chnnl_2 = update_chnnl_2
+        self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False
+        self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False
+        self.update_h2 = update_h2 if self.update_chnnl_2 else False
+        del update_g2_has_g1g1, update_g2_has_attn, update_h2
+        self.attn1_hidden = attn1_hidden
+        self.attn1_nhead = attn1_nhead
+        self.attn2_hidden = attn2_hidden
+        self.attn2_nhead = attn2_nhead
+        self.attn2_has_gate = attn2_has_gate
+        self.update_style = update_style
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.smooth = smooth
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.precision = precision
+        self.seed = seed
+        self.use_sqrt_nnei = use_sqrt_nnei
+        self.g1_out_conv = g1_out_conv
+        self.g1_out_mlp = g1_out_mlp
+
+        assert update_residual_init in [
+            "norm",
+            "const",
+        ], "'update_residual_init' only support 'norm' or 'const'!"
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.g1_residual = []
+        self.g2_residual = []
+        self.h2_residual = []
+
+        if self.update_style == "res_residual":
+            self.g1_residual.append(
+                get_residual(
+                    g1_dim,
+                    self.update_residual,
+                    self.update_residual_init,
+                    precision=precision,
+                    seed=child_seed(seed, 0),
+                )
+            )
+
+        g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron)
+        self.linear1 = MLPLayer(
+            g1_in_dim,
+            g1_dim,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.linear2 = None
+        self.proj_g1g2 = None
+        self.proj_g1g1g2 = None
+        self.attn2g_map = None
+        self.attn2_mh_apply = None
+        self.attn2_lm = None
+        self.attn2_ev_apply = None
+        self.loc_attn = None
+
+        if self.update_chnnl_2:
+            self.linear2 = MLPLayer(
+                g2_dim,
+                g2_dim,
+                precision=precision,
+                seed=child_seed(seed, 2),
+            )
+            if self.update_style == "res_residual":
+                self.g2_residual.append(
+                    get_residual(
+                        g2_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 3),
+                    )
+                )
+        if self.g1_out_mlp:
+            self.g1_self_mlp = MLPLayer(
+                g1_dim,
+                g1_dim,
+                precision=precision,
+                seed=child_seed(seed, 15),
+            )
+            if self.update_style == "res_residual":
+                self.g1_residual.append(
+                    get_residual(
+                        g1_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 16),
+                    )
+                )
+        else:
+            self.g1_self_mlp = None
+        if self.update_g1_has_conv:
+            if not self.g1_out_conv:
+                self.proj_g1g2 = MLPLayer(
+                    g1_dim,
+                    g2_dim,
+                    bias=False,
+                    precision=precision,
+                    seed=child_seed(seed, 4),
+                )
+            else:
+                self.proj_g1g2 = MLPLayer(
+                    g2_dim,
+                    g1_dim,
+                    bias=False,
+                    precision=precision,
+                    seed=child_seed(seed, 4),
+                )
+                if self.update_style == "res_residual":
+                    self.g1_residual.append(
+                        get_residual(
+                            g1_dim,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 17),
+                        )
+                    )
+        if self.update_g2_has_g1g1:
+            self.proj_g1g1g2 = MLPLayer(
+                g1_dim,
+                g2_dim,
+                bias=False,
+                precision=precision,
+                seed=child_seed(seed, 5),
+            )
+            if self.update_style == "res_residual":
+                self.g2_residual.append(
+                    get_residual(
+                        g2_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 6),
+                    )
+                )
+        if self.update_g2_has_attn or self.update_h2:
+            self.attn2g_map = Atten2Map(
+                g2_dim,
+                attn2_hidden,
+                attn2_nhead,
+                attn2_has_gate,
+                self.smooth,
+                precision=precision,
+                seed=child_seed(seed, 7),
+            )
+            if self.update_g2_has_attn:
+                self.attn2_mh_apply = Atten2MultiHeadApply(
+                    g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8)
+                )
+                self.attn2_lm = LayerNorm(
+                    g2_dim,
+                    eps=ln_eps,
+                    trainable=trainable_ln,
+                    precision=precision,
+                    seed=child_seed(seed, 9),
+                )
+                if self.update_style == "res_residual":
+                    self.g2_residual.append(
+                        get_residual(
+                            g2_dim,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 10),
+                        )
+                    )
+
+            if self.update_h2:
+                self.attn2_ev_apply = Atten2EquiVarApply(
+                    g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11)
+                )
+                if self.update_style == "res_residual":
+                    self.h2_residual.append(
+                        get_residual(
+                            1,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 12),
+                        )
+                    )
+        if self.update_g1_has_attn:
+            self.loc_attn = LocalAtten(
+                g1_dim,
+                attn1_hidden,
+                attn1_nhead,
+                self.smooth,
+                precision=precision,
+                seed=child_seed(seed, 13),
+            )
+            if self.update_style == "res_residual":
+                self.g1_residual.append(
+                    get_residual(
+                        g1_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 14),
+                    )
+                )
+
+        self.g1_residual = nn.ParameterList(self.g1_residual)
+        self.g2_residual = nn.ParameterList(self.g2_residual)
+        self.h2_residual = nn.ParameterList(self.h2_residual)
+
+    def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int:
+        ret = g1d if not self.g1_out_mlp else 0
+        if self.update_g1_has_grrg:
+            ret += g2d * ax
+        if self.update_g1_has_drrd:
+            ret += g1d * ax
+        if self.update_g1_has_conv and not self.g1_out_conv:
+            ret += g2d
+        return ret
+
+    def _update_h2(
+        self,
+        h2: paddle.Tensor,
+        attn: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """
+        Calculate the attention weights update for pair-wise equivariant rep.
+
+        Parameters
+        ----------
+        h2
+            Pair-wise equivariant rep tensors, with shape nf x nloc x nnei x 3.
+        attn
+            Attention weights from g2 attention, with shape nf x nloc x nnei x nnei x nh2.
+        """
+        assert self.attn2_ev_apply is not None
+        # nf x nloc x nnei x nh2
+        h2_1 = self.attn2_ev_apply(attn, h2)
+        return h2_1
+
+    def _update_g1_conv(
+        self,
+        gg1: paddle.Tensor,
+        g2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """
+        Calculate the convolution update for atomic invariant rep.
+
+        Parameters
+        ----------
+        gg1
+            Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+        g2
+            Pair invariant rep, with shape nb x nloc x nnei x ng2.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        """
+        assert self.proj_g1g2 is not None
+        nb, nloc, nnei, _ = g2.shape
+        ng1 = gg1.shape[-1]
+        ng2 = g2.shape[-1]
+        if not self.g1_out_conv:
+            # gg1  : nb x nloc x nnei x ng2
+            gg1 = self.proj_g1g2(gg1).reshape([nb, nloc, nnei, ng2])
+        else:
+            gg1 = gg1.reshape([nb, nloc, nnei, ng1])
+        # nb x nloc x nnei x ng2/ng1
+        gg1 = _apply_nlist_mask(gg1, nlist_mask)
+        if not self.smooth:
+            # normalized by number of neighbors, not smooth
+            # nb x nloc x 1
+            # must use astype here to convert bool to float, otherwise there will be numerical difference from numpy
+            invnnei = 1.0 / (
+                self.epsilon + paddle.sum(nlist_mask.astype(gg1.dtype), axis=-1)
+            ).unsqueeze(-1)
+        else:
+            gg1 = _apply_switch(gg1, sw)
+            invnnei = (1.0 / float(nnei)) * paddle.ones(
+                (nb, nloc, 1), dtype=gg1.dtype
+            ).to(device=gg1.place)
+        if not self.g1_out_conv:
+            # nb x nloc x ng2
+            g1_11 = paddle.sum(g2 * gg1, axis=2) * invnnei
+        else:
+            g2 = self.proj_g1g2(g2).reshape([nb, nloc, nnei, ng1])
+            # nb x nloc x ng1
+            g1_11 = paddle.sum(g2 * gg1, axis=2) * invnnei
+        return g1_11
+
+    @staticmethod
+    def _cal_hg(
+        g2: paddle.Tensor,
+        h2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+        smooth: bool = True,
+        epsilon: float = 1e-4,
+        use_sqrt_nnei: bool = True,
+    ) -> paddle.Tensor:
+        """
+        Calculate the transposed rotation matrix.
+
+        Parameters
+        ----------
+        g2
+            Neighbor-wise/Pair-wise invariant rep tensors, with shape nb x nloc x nnei x ng2.
+        h2
+            Neighbor-wise/Pair-wise equivariant rep tensors, with shape nb x nloc x nnei x 3.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        smooth
+            Whether to use smoothness in processes such as attention weights calculation.
+        epsilon
+            Protection of 1./nnei.
+
+        Returns
+        -------
+        hg
+            The transposed rotation matrix, with shape nb x nloc x 3 x ng2.
+        """
+        # g2:  nb x nloc x nnei x ng2
+        # h2:  nb x nloc x nnei x 3
+        # msk: nb x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        ng2 = g2.shape[-1]
+        # nb x nloc x nnei x ng2
+        g2 = _apply_nlist_mask(g2, nlist_mask)
+        if not smooth:
+            # nb x nloc
+            # must use type_as here to convert bool to float, otherwise there will be numerical difference from numpy
+            if not use_sqrt_nnei:
+                invnnei = 1.0 / (epsilon + paddle.sum(nlist_mask.type_as(g2), axis=-1))
+            else:
+                invnnei = 1.0 / (
+                    epsilon + paddle.sqrt(paddle.sum(nlist_mask.type_as(g2), axis=-1))
+                )
+            # nb x nloc x 1 x 1
+            invnnei = invnnei.unsqueeze(-1).unsqueeze(-1)
+        else:
+            g2 = _apply_switch(g2, sw)
+            if not use_sqrt_nnei:
+                invnnei = (1.0 / float(nnei)) * paddle.ones(
+                    (nb, nloc, 1, 1), dtype=g2.dtype
+                ).to(device=g2.place)
+            else:
+                invnnei = paddle.rsqrt(
+                    float(nnei)
+                    * paddle.ones((nb, nloc, 1, 1), dtype=g2.dtype).to(device=g2.place)
+                )
+        # nb x nloc x 3 x ng2
+        h2g2 = paddle.matmul(paddle.transpose(h2, [0, 1, 3, 2]), g2) * invnnei
+        return h2g2
+
+    @staticmethod
+    def _cal_grrg(h2g2: paddle.Tensor, axis_neuron: int) -> paddle.Tensor:
+        """
+        Calculate the atomic invariant rep.
+
+        Parameters
+        ----------
+        h2g2
+            The transposed rotation matrix, with shape nb x nloc x 3 x ng2.
+        axis_neuron
+            Size of the submatrix.
+
+        Returns
+        -------
+        grrg
+            Atomic invariant rep, with shape nb x nloc x (axis_neuron x ng2)
+        """
+        # nb x nloc x 3 x ng2
+        nb, nloc, _, ng2 = h2g2.shape
+        # nb x nloc x 3 x axis
+        h2g2m = paddle.split(h2g2, decomp.sec(h2g2.shape[-1], axis_neuron), axis=-1)[0]
+        # nb x nloc x axis x ng2
+        g1_13 = paddle.matmul(paddle.transpose(h2g2m, [0, 1, 3, 2]), h2g2) / (3.0**1)
+        # nb x nloc x (axisxng2)
+        g1_13 = g1_13.reshape([nb, nloc, axis_neuron * ng2])
+        return g1_13
+
+    def symmetrization_op(
+        self,
+        g2: paddle.Tensor,
+        h2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+        axis_neuron: int,
+        smooth: bool = True,
+        epsilon: float = 1e-4,
+    ) -> paddle.Tensor:
+        """
+        Symmetrization operator to obtain atomic invariant rep.
+
+        Parameters
+        ----------
+        g2
+            Neighbor-wise/Pair-wise invariant rep tensors, with shape nb x nloc x nnei x ng2.
+        h2
+            Neighbor-wise/Pair-wise equivariant rep tensors, with shape nb x nloc x nnei x 3.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        axis_neuron
+            Size of the submatrix.
+        smooth
+            Whether to use smoothness in processes such as attention weights calculation.
+        epsilon
+            Protection of 1./nnei.
+
+        Returns
+        -------
+        grrg
+            Atomic invariant rep, with shape nb x nloc x (axis_neuron x ng2)
+        """
+        # g2:  nb x nloc x nnei x ng2
+        # h2:  nb x nloc x nnei x 3
+        # msk: nb x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        # nb x nloc x 3 x ng2
+        h2g2 = self._cal_hg(
+            g2,
+            h2,
+            nlist_mask,
+            sw,
+            smooth=smooth,
+            epsilon=epsilon,
+            use_sqrt_nnei=self.use_sqrt_nnei,
+        )
+        # nb x nloc x (axisxng2)
+        g1_13 = self._cal_grrg(h2g2, axis_neuron)
+        return g1_13
+
+    def _update_g2_g1g1(
+        self,
+        g1: paddle.Tensor,  # nb x nloc x ng1
+        gg1: paddle.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        """
+        Update the g2 using element-wise dot g1_i * g1_j.
+
+        Parameters
+        ----------
+        g1
+            Atomic invariant rep, with shape nb x nloc x ng1.
+        gg1
+            Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        """
+        ret = g1.unsqueeze(-2) * gg1
+        # nb x nloc x nnei x ng1
+        ret = _apply_nlist_mask(ret, nlist_mask)
+        if self.smooth:
+            ret = _apply_switch(ret, sw)
+        return ret
+
+    def forward(
+        self,
+        g1_ext: paddle.Tensor,  # nf x nall x ng1
+        g2: paddle.Tensor,  # nf x nloc x nnei x ng2
+        h2: paddle.Tensor,  # nf x nloc x nnei x 3
+        nlist: paddle.Tensor,  # nf x nloc x nnei
+        nlist_mask: paddle.Tensor,  # nf x nloc x nnei
+        sw: paddle.Tensor,  # switch func, nf x nloc x nnei
+    ):
+        """
+        Parameters
+        ----------
+        g1_ext : nf x nall x ng1         extended single-atom chanel
+        g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
+        h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
+        nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
+        nlist_mask : nf x nloc x nnei   masks of the neighbor list. real nei 1 otherwise 0
+        sw : nf x nloc x nnei        switch function
+
+        Returns
+        -------
+        g1:     nf x nloc x ng1         updated single-atom chanel
+        g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
+        h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
+        """
+        cal_gg1 = (
+            self.update_g1_has_drrd
+            or self.update_g1_has_conv
+            or self.update_g1_has_attn
+            or self.update_g2_has_g1g1
+        )
+
+        nb, nloc, nnei, _ = g2.shape
+        nall = g1_ext.shape[1]
+        g1, _ = paddle.split(g1_ext, [nloc, nall - nloc], axis=1)
+        if paddle.in_dynamic_mode():
+            assert [nb, nloc] == g1.shape[:2]
+        if paddle.in_dynamic_mode():
+            assert [nb, nloc, nnei] == h2.shape[:3]
+
+        g2_update: list[paddle.Tensor] = [g2]
+        h2_update: list[paddle.Tensor] = [h2]
+        g1_update: list[paddle.Tensor] = [g1]
+        g1_mlp: list[paddle.Tensor] = [g1] if not self.g1_out_mlp else []
+        if self.g1_out_mlp:
+            if paddle.in_dynamic_mode():
+                assert self.g1_self_mlp is not None
+            g1_self_mlp = self.act(self.g1_self_mlp(g1))
+            g1_update.append(g1_self_mlp)
+
+        if cal_gg1:
+            gg1 = _make_nei_g1(g1_ext, nlist)
+        else:
+            gg1 = None
+
+        if self.update_chnnl_2:
+            # mlp(g2)
+            if paddle.in_dynamic_mode():
+                assert self.linear2 is not None
+            # nb x nloc x nnei x ng2
+            g2_1 = self.act(self.linear2(g2))
+            g2_update.append(g2_1)
+
+            if self.update_g2_has_g1g1:
+                # linear(g1_i * g1_j)
+                if paddle.in_dynamic_mode():
+                    assert gg1 is not None
+                if paddle.in_dynamic_mode():
+                    assert self.proj_g1g1g2 is not None
+                g2_update.append(
+                    self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw))
+                )
+
+            if self.update_g2_has_attn or self.update_h2:
+                # gated_attention(g2, h2)
+                if paddle.in_dynamic_mode():
+                    assert self.attn2g_map is not None
+                # nb x nloc x nnei x nnei x nh
+                AAg = self.attn2g_map(g2, h2, nlist_mask, sw)
+
+                if self.update_g2_has_attn:
+                    if paddle.in_dynamic_mode():
+                        assert self.attn2_mh_apply is not None
+                    if paddle.in_dynamic_mode():
+                        assert self.attn2_lm is not None
+                    # nb x nloc x nnei x ng2
+                    g2_2 = self.attn2_mh_apply(AAg, g2)
+                    g2_2 = self.attn2_lm(g2_2)
+                    g2_update.append(g2_2)
+
+                if self.update_h2:
+                    # linear_head(attention_weights * h2)
+                    h2_update.append(self._update_h2(h2, AAg))
+
+        if self.update_g1_has_conv:
+            if paddle.in_dynamic_mode():
+                assert gg1 is not None
+            g1_conv = self._update_g1_conv(gg1, g2, nlist_mask, sw)
+            if not self.g1_out_conv:
+                g1_mlp.append(g1_conv)
+            else:
+                g1_update.append(g1_conv)
+
+        if self.update_g1_has_grrg:
+            g1_mlp.append(
+                self.symmetrization_op(
+                    g2,
+                    h2,
+                    nlist_mask,
+                    sw,
+                    self.axis_neuron,
+                    smooth=self.smooth,
+                    epsilon=self.epsilon,
+                )
+            )
+
+        if self.update_g1_has_drrd:
+            if paddle.in_dynamic_mode():
+                assert gg1 is not None
+            g1_mlp.append(
+                self.symmetrization_op(
+                    gg1,
+                    h2,
+                    nlist_mask,
+                    sw,
+                    self.axis_neuron,
+                    smooth=self.smooth,
+                    epsilon=self.epsilon,
+                )
+            )
+
+        # nb x nloc x [ng1+ng2+(axisxng2)+(axisxng1)]
+        #                  conv   grrg      drrd
+        g1_1 = self.act(self.linear1(paddle.concat(g1_mlp, axis=-1)))
+        g1_update.append(g1_1)
+
+        if self.update_g1_has_attn:
+            assert gg1 is not None
+            assert self.loc_attn is not None
+            g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw))
+
+        # update
+        if self.update_chnnl_2:
+            g2_new = self.list_update(g2_update, "g2")
+            h2_new = self.list_update(h2_update, "h2")
+        else:
+            g2_new, h2_new = g2, h2
+        g1_new = self.list_update(g1_update, "g1")
+        return g1_new, g2_new, h2_new
+
+    def list_update_res_avg(
+        self,
+        update_list: list[paddle.Tensor],
+    ) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        for ii in range(1, nitem):
+            uu = uu + update_list[ii]
+        return uu / (float(nitem) ** 0.5)
+
+    def list_update_res_incr(self, update_list: list[paddle.Tensor]) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
+        for ii in range(1, nitem):
+            uu = uu + scale * update_list[ii]
+        return uu
+
+    def list_update_res_residual(
+        self, update_list: list[paddle.Tensor], update_name: str = "g1"
+    ) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        # make jit happy
+        if update_name == "g1":
+            for ii, vv in enumerate(self.g1_residual):
+                uu = uu + vv * update_list[ii + 1]
+        elif update_name == "g2":
+            for ii, vv in enumerate(self.g2_residual):
+                uu = uu + vv * update_list[ii + 1]
+        elif update_name == "h2":
+            for ii, vv in enumerate(self.h2_residual):
+                uu = uu + vv * update_list[ii + 1]
+        else:
+            raise NotImplementedError
+        return uu
+
+    def list_update(
+        self, update_list: list[paddle.Tensor], update_name: str = "g1"
+    ) -> paddle.Tensor:
+        if self.update_style == "res_avg":
+            return self.list_update_res_avg(update_list)
+        elif self.update_style == "res_incr":
+            return self.list_update_res_incr(update_list)
+        elif self.update_style == "res_residual":
+            return self.list_update_res_residual(update_list, update_name=update_name)
+        else:
+            raise RuntimeError(f"unknown update style {self.update_style}")
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        data = {
+            "@class": "RepformerLayer",
+            "@version": 1,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "ntypes": self.ntypes,
+            "g1_dim": self.g1_dim,
+            "g2_dim": self.g2_dim,
+            "axis_neuron": self.axis_neuron,
+            "update_chnnl_2": self.update_chnnl_2,
+            "update_g1_has_conv": self.update_g1_has_conv,
+            "update_g1_has_drrd": self.update_g1_has_drrd,
+            "update_g1_has_grrg": self.update_g1_has_grrg,
+            "update_g1_has_attn": self.update_g1_has_attn,
+            "update_g2_has_g1g1": self.update_g2_has_g1g1,
+            "update_g2_has_attn": self.update_g2_has_attn,
+            "update_h2": self.update_h2,
+            "attn1_hidden": self.attn1_hidden,
+            "attn1_nhead": self.attn1_nhead,
+            "attn2_hidden": self.attn2_hidden,
+            "attn2_nhead": self.attn2_nhead,
+            "attn2_has_gate": self.attn2_has_gate,
+            "activation_function": self.activation_function,
+            "update_style": self.update_style,
+            "smooth": self.smooth,
+            "precision": self.precision,
+            "trainable_ln": self.trainable_ln,
+            "use_sqrt_nnei": self.use_sqrt_nnei,
+            "g1_out_conv": self.g1_out_conv,
+            "g1_out_mlp": self.g1_out_mlp,
+            "ln_eps": self.ln_eps,
+            "linear1": self.linear1.serialize(),
+        }
+        if self.update_chnnl_2:
+            data.update(
+                {
+                    "linear2": self.linear2.serialize(),
+                }
+            )
+        if self.update_g1_has_conv:
+            data.update(
+                {
+                    "proj_g1g2": self.proj_g1g2.serialize(),
+                }
+            )
+        if self.update_g2_has_g1g1:
+            data.update(
+                {
+                    "proj_g1g1g2": self.proj_g1g1g2.serialize(),
+                }
+            )
+        if self.update_g2_has_attn or self.update_h2:
+            data.update(
+                {
+                    "attn2g_map": self.attn2g_map.serialize(),
+                }
+            )
+            if self.update_g2_has_attn:
+                data.update(
+                    {
+                        "attn2_mh_apply": self.attn2_mh_apply.serialize(),
+                        "attn2_lm": self.attn2_lm.serialize(),
+                    }
+                )
+
+            if self.update_h2:
+                data.update(
+                    {
+                        "attn2_ev_apply": self.attn2_ev_apply.serialize(),
+                    }
+                )
+        if self.update_g1_has_attn:
+            data.update(
+                {
+                    "loc_attn": self.loc_attn.serialize(),
+                }
+            )
+        if self.g1_out_mlp:
+            data.update(
+                {
+                    "g1_self_mlp": self.g1_self_mlp.serialize(),
+                }
+            )
+        if self.update_style == "res_residual":
+            data.update(
+                {
+                    "g1_residual": [to_numpy_array(t) for t in self.g1_residual],
+                    "g2_residual": [to_numpy_array(t) for t in self.g2_residual],
+                    "h2_residual": [to_numpy_array(t) for t in self.h2_residual],
+                }
+            )
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "RepformerLayer":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        linear1 = data.pop("linear1")
+        update_chnnl_2 = data["update_chnnl_2"]
+        update_g1_has_conv = data["update_g1_has_conv"]
+        update_g2_has_g1g1 = data["update_g2_has_g1g1"]
+        update_g2_has_attn = data["update_g2_has_attn"]
+        update_h2 = data["update_h2"]
+        update_g1_has_attn = data["update_g1_has_attn"]
+        update_style = data["update_style"]
+        g1_out_mlp = data["g1_out_mlp"]
+
+        linear2 = data.pop("linear2", None)
+        proj_g1g2 = data.pop("proj_g1g2", None)
+        proj_g1g1g2 = data.pop("proj_g1g1g2", None)
+        attn2g_map = data.pop("attn2g_map", None)
+        attn2_mh_apply = data.pop("attn2_mh_apply", None)
+        attn2_lm = data.pop("attn2_lm", None)
+        attn2_ev_apply = data.pop("attn2_ev_apply", None)
+        loc_attn = data.pop("loc_attn", None)
+        g1_self_mlp = data.pop("g1_self_mlp", None)
+        g1_residual = data.pop("g1_residual", [])
+        g2_residual = data.pop("g2_residual", [])
+        h2_residual = data.pop("h2_residual", [])
+
+        obj = cls(**data)
+        obj.linear1 = MLPLayer.deserialize(linear1)
+        if update_chnnl_2:
+            assert isinstance(linear2, dict)
+            obj.linear2 = MLPLayer.deserialize(linear2)
+        if update_g1_has_conv:
+            assert isinstance(proj_g1g2, dict)
+            obj.proj_g1g2 = MLPLayer.deserialize(proj_g1g2)
+        if update_g2_has_g1g1:
+            assert isinstance(proj_g1g1g2, dict)
+            obj.proj_g1g1g2 = MLPLayer.deserialize(proj_g1g1g2)
+        if update_g2_has_attn or update_h2:
+            assert isinstance(attn2g_map, dict)
+            obj.attn2g_map = Atten2Map.deserialize(attn2g_map)
+            if update_g2_has_attn:
+                assert isinstance(attn2_mh_apply, dict)
+                assert isinstance(attn2_lm, dict)
+                obj.attn2_mh_apply = Atten2MultiHeadApply.deserialize(attn2_mh_apply)
+                obj.attn2_lm = LayerNorm.deserialize(attn2_lm)
+            if update_h2:
+                assert isinstance(attn2_ev_apply, dict)
+                obj.attn2_ev_apply = Atten2EquiVarApply.deserialize(attn2_ev_apply)
+        if update_g1_has_attn:
+            assert isinstance(loc_attn, dict)
+            obj.loc_attn = LocalAtten.deserialize(loc_attn)
+        if g1_out_mlp:
+            assert isinstance(g1_self_mlp, dict)
+            obj.g1_self_mlp = MLPLayer.deserialize(g1_self_mlp)
+        if update_style == "res_residual":
+            for ii, t in enumerate(obj.g1_residual):
+                t.data = to_paddle_tensor(g1_residual[ii])
+            for ii, t in enumerate(obj.g2_residual):
+                t.data = to_paddle_tensor(g2_residual[ii])
+            for ii, t in enumerate(obj.h2_residual):
+                t.data = to_paddle_tensor(h2_residual[ii])
+        return obj
diff --git a/deepmd/pd/model/descriptor/repformers.py b/deepmd/pd/model/descriptor/repformers.py
new file mode 100644
index 0000000000..fbbfcc1216
--- /dev/null
+++ b/deepmd/pd/model/descriptor/repformers.py
@@ -0,0 +1,556 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.mlp import (
+    MLPLayer,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .repformer_layer import (
+    RepformerLayer,
+)
+
+# if not hasattr(paddle.ops.deepmd, "border_op"):
+
+#     def border_op(
+#         argument0,
+#         argument1,
+#         argument2,
+#         argument3,
+#         argument4,
+#         argument5,
+#         argument6,
+#         argument7,
+#         argument8,
+#     ) -> paddle.Tensor:
+#         raise NotImplementedError(
+#             "border_op is not available since customized Paddle OP library is not built when freezing the model."
+#         )
+
+#     # Note: this hack cannot actually save a model that can be runned using LAMMPS.
+#     paddle.ops.deepmd.border_op = border_op
+
+
+@DescriptorBlock.register("se_repformer")
+@DescriptorBlock.register("se_uni")
+class DescrptBlockRepformers(DescriptorBlock):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        nlayers: int = 3,
+        g1_dim=128,
+        g2_dim=16,
+        axis_neuron: int = 4,
+        direct_dist: bool = False,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        update_residual: float = 0.001,
+        update_residual_init: str = "norm",
+        set_davg_zero: bool = True,
+        smooth: bool = True,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        precision: str = "float64",
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        seed: Optional[Union[int, list[int]]] = None,
+        use_sqrt_nnei: bool = True,
+        g1_out_conv: bool = True,
+        g1_out_mlp: bool = True,
+    ):
+        r"""
+        The repformer descriptor block.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius.
+        rcut_smth : float
+            Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth.
+        sel : int
+            Maximally possible number of selected neighbors.
+        ntypes : int
+            Number of element types
+        nlayers : int, optional
+            Number of repformer layers.
+        g1_dim : int, optional
+            Dimension of the first graph convolution layer.
+        g2_dim : int, optional
+            Dimension of the second graph convolution layer.
+        axis_neuron : int, optional
+            Size of the submatrix of G (embedding matrix).
+        direct_dist : bool, optional
+            Whether to use direct distance information (1/r term) in the repformer block.
+        update_g1_has_conv : bool, optional
+            Whether to update the g1 rep with convolution term.
+        update_g1_has_drrd : bool, optional
+            Whether to update the g1 rep with the drrd term.
+        update_g1_has_grrg : bool, optional
+            Whether to update the g1 rep with the grrg term.
+        update_g1_has_attn : bool, optional
+            Whether to update the g1 rep with the localized self-attention.
+        update_g2_has_g1g1 : bool, optional
+            Whether to update the g2 rep with the g1xg1 term.
+        update_g2_has_attn : bool, optional
+            Whether to update the g2 rep with the gated self-attention.
+        update_h2 : bool, optional
+            Whether to update the h2 rep.
+        attn1_hidden : int, optional
+            The hidden dimension of localized self-attention to update the g1 rep.
+        attn1_nhead : int, optional
+            The number of heads in localized self-attention to update the g1 rep.
+        attn2_hidden : int, optional
+            The hidden dimension of gated self-attention to update the g2 rep.
+        attn2_nhead : int, optional
+            The number of heads in gated self-attention to update the g2 rep.
+        attn2_has_gate : bool, optional
+            Whether to use gate in the gated self-attention to update the g2 rep.
+        activation_function : str, optional
+            The activation function in the embedding net.
+        update_style : str, optional
+            Style to update a representation.
+            Supported options are:
+            -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n)
+            -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n)
+            -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n)
+            where `r1`, `r2` ... `r3` are residual weights defined by `update_residual`
+            and `update_residual_init`.
+        update_residual : float, optional
+            When update using residual mode, the initial std of residual vector weights.
+        update_residual_init : str, optional
+            When update using residual mode, the initialization mode of residual vector weights.
+        set_davg_zero : bool, optional
+            Set the normalization average to zero.
+        precision : str, optional
+            The precision of the embedding net parameters.
+        smooth : bool, optional
+            Whether to use smoothness in processes such as attention weights calculation.
+        exclude_types : list[list[int]], optional
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float, optional
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+            For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
+        trainable_ln : bool, optional
+            Whether to use trainable shift and scale weights in layer normalization.
+        use_sqrt_nnei : bool, optional
+            Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly.
+        g1_out_conv : bool, optional
+            Whether to put the convolutional update of g1 separately outside the concatenated MLP update.
+        g1_out_mlp : bool, optional
+            Whether to put the self MLP update of g1 separately outside the concatenated MLP update.
+        ln_eps : float, optional
+            The epsilon value for layer normalization.
+        seed : int, optional
+            Random seed for parameter initialization.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.ntypes = ntypes
+        self.nlayers = nlayers
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4  # use full descriptor.
+        assert len(sel) == 1
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.axis_neuron = axis_neuron
+        self.set_davg_zero = set_davg_zero
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+        self.update_g1_has_conv = update_g1_has_conv
+        self.update_g1_has_drrd = update_g1_has_drrd
+        self.update_g1_has_grrg = update_g1_has_grrg
+        self.update_g1_has_attn = update_g1_has_attn
+        self.update_g2_has_g1g1 = update_g2_has_g1g1
+        self.update_g2_has_attn = update_g2_has_attn
+        self.update_h2 = update_h2
+        self.attn1_hidden = attn1_hidden
+        self.attn1_nhead = attn1_nhead
+        self.attn2_has_gate = attn2_has_gate
+        self.attn2_hidden = attn2_hidden
+        self.attn2_nhead = attn2_nhead
+        self.activation_function = activation_function
+        self.update_style = update_style
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.direct_dist = direct_dist
+        self.act = ActivationFn(activation_function)
+        self.smooth = smooth
+        self.use_sqrt_nnei = use_sqrt_nnei
+        self.g1_out_conv = g1_out_conv
+        self.g1_out_mlp = g1_out_mlp
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.env_protection = env_protection
+        self.precision = precision
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.epsilon = 1e-4
+        self.seed = seed
+
+        self.g2_embd = MLPLayer(
+            1, self.g2_dim, precision=precision, seed=child_seed(seed, 0)
+        )
+        layers = []
+        for ii in range(nlayers):
+            layers.append(
+                RepformerLayer(
+                    self.rcut,
+                    self.rcut_smth,
+                    self.sel,
+                    self.ntypes,
+                    self.g1_dim,
+                    self.g2_dim,
+                    axis_neuron=self.axis_neuron,
+                    update_chnnl_2=(ii != nlayers - 1),
+                    update_g1_has_conv=self.update_g1_has_conv,
+                    update_g1_has_drrd=self.update_g1_has_drrd,
+                    update_g1_has_grrg=self.update_g1_has_grrg,
+                    update_g1_has_attn=self.update_g1_has_attn,
+                    update_g2_has_g1g1=self.update_g2_has_g1g1,
+                    update_g2_has_attn=self.update_g2_has_attn,
+                    update_h2=self.update_h2,
+                    attn1_hidden=self.attn1_hidden,
+                    attn1_nhead=self.attn1_nhead,
+                    attn2_has_gate=self.attn2_has_gate,
+                    attn2_hidden=self.attn2_hidden,
+                    attn2_nhead=self.attn2_nhead,
+                    activation_function=self.activation_function,
+                    update_style=self.update_style,
+                    update_residual=self.update_residual,
+                    update_residual_init=self.update_residual_init,
+                    smooth=self.smooth,
+                    trainable_ln=self.trainable_ln,
+                    ln_eps=self.ln_eps,
+                    precision=precision,
+                    use_sqrt_nnei=self.use_sqrt_nnei,
+                    g1_out_conv=self.g1_out_conv,
+                    g1_out_mlp=self.g1_out_mlp,
+                    seed=child_seed(child_seed(seed, 1), ii),
+                )
+            )
+        self.layers = paddle.nn.LayerList(layers)
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        stddev = paddle.ones(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension g2."""
+        return self.g2_dim
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        if comm_dict is None:
+            assert mapping is not None
+            assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        atype = extended_atype[:, :nloc]
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = paddle.where(exclude_mask != 0, nlist, paddle.full_like(nlist, -1))
+        # nb x nloc x nnei x 4, nb x nloc x nnei x 3, nb x nloc x nnei x 1
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        nlist_mask = nlist != -1
+        sw = paddle.squeeze(sw, -1)
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+
+        # [nframes, nloc, tebd_dim]
+        if comm_dict is None:
+            if paddle.in_dynamic_mode():
+                assert isinstance(extended_atype_embd, paddle.Tensor)  # for jit
+            atype_embd = extended_atype_embd[:, :nloc, :]
+            if paddle.in_dynamic_mode():
+                assert list(atype_embd.shape) == [nframes, nloc, self.g1_dim]
+        else:
+            atype_embd = extended_atype_embd
+            if paddle.in_dynamic_mode():
+                assert isinstance(atype_embd, paddle.Tensor)  # for jit
+        g1 = self.act(atype_embd)
+        # nb x nloc x nnei x 1,  nb x nloc x nnei x 3
+        if not self.direct_dist:
+            g2, h2 = paddle.split(dmatrix, [1, 3], axis=-1)
+        else:
+            # g2, h2 = paddle.linalg.norm(diff, axis=-1, keepdim=True), diff
+            g2, h2 = decomp.norm(diff, axis=-1, keepdim=True), diff
+            g2 = g2 / self.rcut
+            h2 = h2 / self.rcut
+        # nb x nloc x nnei x ng2
+        g2 = self.act(self.g2_embd(g2))
+
+        # set all padding positions to index of 0
+        # if the a neighbor is real or not is indicated by nlist_mask
+        nlist[nlist == -1] = 0
+        # nb x nall x ng1
+        if comm_dict is None:
+            assert mapping is not None
+            mapping = (
+                mapping.reshape([nframes, nall])
+                .unsqueeze(-1)
+                .expand([-1, -1, self.g1_dim])
+            )
+        for idx, ll in enumerate(self.layers):
+            # g1:     nb x nloc x ng1
+            # g1_ext: nb x nall x ng1
+            if comm_dict is None:
+                assert mapping is not None
+                g1_ext = decomp.take_along_axis(g1, axis=1, indices=mapping)
+            else:
+                n_padding = nall - nloc
+                g1 = paddle.nn.functional.pad(
+                    g1.squeeze(0), (0, 0, 0, n_padding), value=0.0
+                )
+                assert "send_list" in comm_dict
+                assert "send_proc" in comm_dict
+                assert "recv_proc" in comm_dict
+                assert "send_num" in comm_dict
+                assert "recv_num" in comm_dict
+                assert "communicator" in comm_dict
+                ret = paddle.ops.deepmd.border_op(
+                    comm_dict["send_list"],
+                    comm_dict["send_proc"],
+                    comm_dict["recv_proc"],
+                    comm_dict["send_num"],
+                    comm_dict["recv_num"],
+                    g1,
+                    comm_dict["communicator"],
+                    paddle.to_tensor(nloc),  # pylint: disable=no-explicit-dtype,no-explicit-device
+                    paddle.to_tensor(nall - nloc),  # pylint: disable=no-explicit-dtype,no-explicit-device
+                )
+                g1_ext = ret[0].unsqueeze(0)
+            g1, g2, h2 = ll.forward(
+                g1_ext,
+                g2,
+                h2,
+                nlist,
+                nlist_mask,
+                sw,
+            )
+
+        # nb x nloc x 3 x ng2
+        h2g2 = RepformerLayer._cal_hg(
+            g2,
+            h2,
+            nlist_mask,
+            sw,
+            smooth=self.smooth,
+            epsilon=self.epsilon,
+            use_sqrt_nnei=self.use_sqrt_nnei,
+        )
+        # (nb x nloc) x ng2 x 3
+        rot_mat = paddle.transpose(h2g2, (0, 1, 3, 2))
+
+        return g1, g2, h2, rot_mat.reshape([nframes, nloc, self.dim_emb, 3]), sw
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return True
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/descriptor/se_a.py b/deepmd/pd/model/descriptor/se_a.py
new file mode 100644
index 0000000000..76ea32797f
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_a.py
@@ -0,0 +1,680 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+from typing import (
+    Callable,
+    ClassVar,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor import (
+    DescriptorBlock,
+    prod_env_mat,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from typing import (
+        Final,
+    )
+except ImportError:
+    from paddle.jit import Final
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e2_a")
+@BaseDescriptor.register("se_a")
+class DescrptSeA(BaseDescriptor, paddle.nn.Layer):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        axis_neuron=16,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        type_one_side: bool = True,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        ntypes: Optional[int] = None,  # to be compat with input
+        type_map: Optional[list[str]] = None,
+        # not implemented
+        spin=None,
+    ):
+        del ntypes
+        if spin is not None:
+            raise NotImplementedError("old implementation of spin is not supported.")
+        super().__init__()
+        self.type_map = type_map
+        self.sea = DescrptBlockSeA(
+            rcut,
+            rcut_smth,
+            sel,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            type_one_side=type_one_side,
+            trainable=trainable,
+            seed=seed,
+        )
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.sea.get_rcut()
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.sea.get_rcut_smth()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.sea.get_nsel()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sea.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.sea.get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.sea.get_dim_out()
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.sea.get_dim_emb()
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return self.sea.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.sea.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return self.sea.need_sorted_nlist_for_lower()
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.sea.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeA descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in sea
+        if shared_level == 0:
+            self.sea.share_params(base_class.sea, 0, resume=resume)
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.sea.dim_out
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        raise NotImplementedError(
+            "Descriptor se_e2_a does not support changing for type related params!"
+            "This feature is currently not implemented because it would require additional work to support the non-mixed-types case. "
+            "We may consider adding this support in the future if there is a clear demand for it."
+        )
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.sea.compute_input_stats(merged, path)
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        """Update the type exclusions."""
+        self.sea.reinit_exclude(exclude_types)
+
+    def forward(
+        self,
+        coord_ext: paddle.Tensor,
+        atype_ext: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+
+        """
+        return self.sea.forward(nlist, coord_ext, atype_ext, None, mapping)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.sea.mean = mean
+        self.sea.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.sea.mean, self.sea.stddev
+
+    def serialize(self) -> dict:
+        obj = self.sea
+        return {
+            "@class": "Descriptor",
+            "type": "se_e2_a",
+            "@version": 2,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "neuron": obj.neuron,
+            "axis_neuron": obj.axis_neuron,
+            "resnet_dt": obj.resnet_dt,
+            "set_davg_zero": obj.set_davg_zero,
+            "activation_function": obj.activation_function,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "type_map": self.type_map,
+            ## to be updated when the options are supported.
+            "trainable": True,
+            "type_one_side": obj.type_one_side,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeA":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.sea.prec).to(device=env.DEVICE)
+
+        obj.sea["davg"] = t_cvt(variables["davg"])
+        obj.sea["dstd"] = t_cvt(variables["dstd"])
+        obj.sea.filter_layers = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, local_jdata_cpy["sel"] = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], False
+        )
+        return local_jdata_cpy, min_nbor_dist
+
+
+@DescriptorBlock.register("se_e2_a")
+class DescrptBlockSeA(DescriptorBlock):
+    ndescrpt: Final[int]
+    __constants__: ClassVar[list] = ["ndescrpt"]
+
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        axis_neuron=16,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        type_one_side: bool = True,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        **kwargs,
+    ):
+        """Construct an embedding net of type `se_a`.
+
+        Args:
+        - rcut: Cut-off radius.
+        - rcut_smth: Smooth hyper-parameter for pair force & energy.
+        - sel: For each element type, how many atoms is selected as neighbors.
+        - filter_neuron: Number of neurons in each hidden layers of the embedding net.
+        - axis_neuron: Number of columns of the sub-matrix of the embedding matrix.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.axis_neuron = axis_neuron
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.env_protection = env_protection
+        self.ntypes = len(sel)
+        self.type_one_side = type_one_side
+        self.seed = seed
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+
+        self.sel = sel
+        # should be on CPU to avoid D2H, as it is used as slice index
+        self.sec = [0, *np.cumsum(self.sel).tolist()]
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        stddev = paddle.ones(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+
+        ndim = 1 if self.type_one_side else 2
+        filter_layers = NetworkCollection(
+            ndim=ndim, ntypes=len(sel), network_type="embedding_network"
+        )
+        for ii, embedding_idx in enumerate(
+            itertools.product(range(self.ntypes), repeat=ndim)
+        ):
+            filter_layers[embedding_idx] = EmbeddingNet(
+                1,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, ii),
+            )
+        self.filter_layers = filter_layers
+        self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.neuron[-1]
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return False
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1] * self.axis_neuron
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return 0
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - coord: Tell atom coordinates with shape [nframes, natoms[1]*3].
+        - atype: Tell atom types with shape [nframes, natoms[1]].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+        - box: Tell simulation box with shape [nframes, 9].
+
+        Returns
+        -------
+        - `paddle.Tensor`: descriptor matrix with shape [nframes, natoms[0]*self.filter_neuron[-1]*self.axis_neuron].
+        """
+        del extended_atype_embd, mapping
+        nf = nlist.shape[0]
+        nloc = nlist.shape[1]
+        atype: paddle.Tensor = extended_atype[:, :nloc]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+
+        assert self.filter_layers is not None
+        dmatrix = dmatrix.reshape([-1, self.nnei, 4])
+        dmatrix = dmatrix.astype(self.prec)
+        nfnl = dmatrix.shape[0]
+        # pre-allocate a shape to pass jit
+        xyz_scatter = paddle.zeros(
+            [nfnl, 4, self.filter_neuron[-1]],
+            dtype=self.prec,
+        ).to(extended_coord.place)
+        # nfnl x nnei
+        exclude_mask = self.emask(nlist, extended_atype).reshape([nfnl, self.nnei])
+        for embedding_idx, ll in enumerate(self.filter_layers.networks):
+            if self.type_one_side:
+                ii = embedding_idx
+                # paddle.jit is not happy with slice(None)
+                # ti_mask = paddle.ones(nfnl, dtype=paddle.bool, device=dmatrix.place)
+                # applying a mask seems to cause performance degradation
+                ti_mask = None
+            else:
+                # ti: center atom type, ii: neighbor type...
+                ii = embedding_idx // self.ntypes
+                ti = embedding_idx % self.ntypes
+                ti_mask = atype.flatten() == ti
+            # nfnl x nt
+            if ti_mask is not None:
+                mm = exclude_mask[ti_mask, self.sec[ii] : self.sec[ii + 1]]
+            else:
+                mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
+            # nfnl x nt x 4
+            if ti_mask is not None:
+                rr = dmatrix[ti_mask, self.sec[ii] : self.sec[ii + 1], :]
+            else:
+                rr = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
+            if rr.numel() > 0:
+                rr = rr * mm.unsqueeze(2).astype(rr.dtype)
+                ss = rr[:, :, :1]
+                # nfnl x nt x ng
+                gg = ll.forward(ss)
+                # nfnl x 4 x ng
+                gr = paddle.matmul(rr.transpose([0, 2, 1]), gg)
+                if ti_mask is not None:
+                    xyz_scatter[ti_mask] += gr
+                else:
+                    xyz_scatter += gr
+
+        xyz_scatter /= self.nnei
+        xyz_scatter_1 = xyz_scatter.transpose([0, 2, 1])
+        rot_mat: paddle.Tensor = xyz_scatter_1[:, :, 1:4]
+        xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron]
+        result = paddle.matmul(
+            xyz_scatter_1, xyz_scatter_2
+        )  # shape is [nframes*nall, self.filter_neuron[-1], self.axis_neuron]
+        result = result.reshape([nf, nloc, self.filter_neuron[-1] * self.axis_neuron])
+        rot_mat = rot_mat.reshape([nf, nloc] + list(rot_mat.shape[1:]))  # noqa:RUF005
+        return (
+            result.astype(env.GLOBAL_PD_FLOAT_PRECISION),
+            rot_mat.astype(env.GLOBAL_PD_FLOAT_PRECISION),
+            None,
+            None,
+            sw,
+        )
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/descriptor/se_atten.py b/deepmd/pd/model/descriptor/se_atten.py
new file mode 100644
index 0000000000..2eaeb7f7a7
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_atten.py
@@ -0,0 +1,979 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as paddle_func
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.layernorm import (
+    LayerNorm,
+)
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    MLPLayer,
+    NetworkCollection,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@DescriptorBlock.register("se_atten")
+class DescrptBlockSeAtten(DescriptorBlock):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        set_davg_zero: bool = True,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        activation_function="tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        scaling_factor=1.0,
+        normalize=True,
+        temperature=None,
+        smooth: bool = True,
+        type_one_side: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        seed: Optional[Union[int, list[int]]] = None,
+        type: Optional[str] = None,
+    ):
+        r"""Construct an embedding net of type `se_atten`.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius :math:`r_c`
+        rcut_smth : float
+            From where the environment matrix should be smoothed :math:`r_s`
+        sel : list[int], int
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+        ntypes : int
+            Number of element types
+        neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+        axis_neuron : int
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+        tebd_dim : int
+            Dimension of the type embedding
+        tebd_input_mode : str
+            The input mode of the type embedding. Supported modes are ["concat", "strip"].
+            - "concat": Concatenate the type embedding with the smoothed radial information as the union input for the embedding network.
+            - "strip": Use a separated embedding network for the type embedding and combine the output with the radial embedding network output.
+        resnet_dt : bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+        trainable_ln : bool
+            Whether to use trainable shift and scale weights in layer normalization.
+        ln_eps : float, Optional
+            The epsilon value for layer normalization.
+        type_one_side : bool
+            If 'False', type embeddings of both neighbor and central atoms are considered.
+            If 'True', only type embeddings of neighbor atoms are considered.
+            Default is 'False'.
+        attn : int
+            Hidden dimension of the attention vectors
+        attn_layer : int
+            Number of attention layers
+        attn_dotr : bool
+            If dot the angular gate to the attention weights
+        attn_mask : bool
+            (Only support False to keep consistent with other backend references.)
+            (Not used in this version.)
+            If mask the diagonal of attention weights
+        exclude_types : list[list[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+        set_davg_zero : bool
+            Set the shift of embedding net input to zero.
+        activation_function : str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+        precision : str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+        scaling_factor : float
+            The scaling factor of normalization in calculations of attention weights.
+            If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5
+        normalize : bool
+            Whether to normalize the hidden vectors in attention weights calculation.
+        temperature : float
+            If not None, the scaling of attention weights is `temperature` itself.
+        seed : int, Optional
+            Random seed for parameter initialization.
+        """
+        super().__init__()
+        del type
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.axis_neuron = axis_neuron
+        self.tebd_dim = tebd_dim
+        self.tebd_input_mode = tebd_input_mode
+        self.set_davg_zero = set_davg_zero
+        self.attn_dim = attn
+        self.attn_layer = attn_layer
+        self.attn_dotr = attn_dotr
+        self.attn_mask = attn_mask
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.scaling_factor = scaling_factor
+        self.normalize = normalize
+        self.temperature = temperature
+        self.smooth = smooth
+        self.type_one_side = type_one_side
+        self.env_protection = env_protection
+        self.trainable_ln = trainable_ln
+        self.seed = seed
+        #  to keep consistent with default value in this backends
+        if ln_eps is None:
+            ln_eps = 1e-5
+        self.ln_eps = ln_eps
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.dpa1_attention = NeighborGatedAttention(
+            self.attn_layer,
+            self.nnei,
+            self.filter_neuron[-1],
+            self.attn_dim,
+            dotr=self.attn_dotr,
+            do_mask=self.attn_mask,
+            scaling_factor=self.scaling_factor,
+            normalize=self.normalize,
+            temperature=self.temperature,
+            trainable_ln=self.trainable_ln,
+            ln_eps=self.ln_eps,
+            smooth=self.smooth,
+            precision=self.precision,
+            seed=child_seed(self.seed, 0),
+        )
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        stddev = paddle.ones(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.tebd_dim_input = self.tebd_dim if self.type_one_side else self.tebd_dim * 2
+        if self.tebd_input_mode in ["concat"]:
+            self.embd_input_dim = 1 + self.tebd_dim_input
+        else:
+            self.embd_input_dim = 1
+
+        self.filter_layers_strip = None
+        filter_layers = NetworkCollection(
+            ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+        )
+        filter_layers[0] = EmbeddingNet(
+            self.embd_input_dim,
+            self.filter_neuron,
+            activation_function=self.activation_function,
+            precision=self.precision,
+            resnet_dt=self.resnet_dt,
+            seed=child_seed(self.seed, 1),
+        )
+        self.filter_layers = filter_layers
+        if self.tebd_input_mode in ["strip"]:
+            filter_layers_strip = NetworkCollection(
+                ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+            )
+            filter_layers_strip[0] = EmbeddingNet(
+                self.tebd_dim_input,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, 2),
+            )
+            self.filter_layers_strip = filter_layers_strip
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension of embedding."""
+        return self.filter_neuron[-1]
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1] * self.axis_neuron
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.tebd_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of embedding."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall x nt
+        extended_atype_embd
+            The extended type embedding of atoms. shape: nf x nall
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        result
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        nb = nframes
+        nall = extended_coord.reshape([nb, -1, 3]).shape[1]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = paddle.where(exclude_mask != 0, nlist, paddle.full_like(nlist, -1))
+        nlist_mask = nlist != -1
+        nlist = paddle.where(nlist == -1, paddle.zeros_like(nlist), nlist)
+        sw = paddle.squeeze(sw, -1)
+        # nf x nloc x nt -> nf x nloc x nnei x nt
+        atype_tebd = extended_atype_embd[:, :nloc, :]
+        atype_tebd_nnei = atype_tebd.unsqueeze(2).expand([-1, -1, self.nnei, -1])
+        # nf x nall x nt
+        nt = extended_atype_embd.shape[-1]
+        atype_tebd_ext = extended_atype_embd
+        # nb x (nloc x nnei) x nt
+        index = nlist.reshape([nb, nloc * nnei]).unsqueeze(-1).expand([-1, -1, nt])
+        # nb x (nloc x nnei) x nt
+        # atype_tebd_nlist = paddle.take_along_axis(atype_tebd_ext, axis=1, index=index)
+        atype_tebd_nlist = decomp.take_along_axis(atype_tebd_ext, axis=1, indices=index)
+        # nb x nloc x nnei x nt
+        atype_tebd_nlist = atype_tebd_nlist.reshape([nb, nloc, nnei, nt])
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+        # (nb x nloc) x nnei
+        exclude_mask = exclude_mask.reshape([nb * nloc, nnei])
+        # nfnl x nnei x 4
+        dmatrix = dmatrix.reshape([-1, self.nnei, 4])
+        nfnl = dmatrix.shape[0]
+        # nfnl x nnei x 4
+        rr = dmatrix
+        rr = rr * exclude_mask[:, :, None].astype(rr.dtype)
+        ss = rr[:, :, :1]
+        nlist_tebd = atype_tebd_nlist.reshape([nfnl, nnei, self.tebd_dim])
+        atype_tebd = atype_tebd_nnei.reshape([nfnl, nnei, self.tebd_dim])
+        if self.tebd_input_mode in ["concat"]:
+            if not self.type_one_side:
+                # nfnl x nnei x (1 + tebd_dim * 2)
+                ss = paddle.concat([ss, nlist_tebd, atype_tebd], axis=2)
+            else:
+                # nfnl x nnei x (1 + tebd_dim)
+                ss = paddle.concat([ss, nlist_tebd], axis=2)
+            # nfnl x nnei x ng
+            gg = self.filter_layers.networks[0](ss)
+        elif self.tebd_input_mode in ["strip"]:
+            # nfnl x nnei x ng
+            gg_s = self.filter_layers.networks[0](ss)
+            assert self.filter_layers_strip is not None
+            if not self.type_one_side:
+                # nfnl x nnei x (tebd_dim * 2)
+                tt = paddle.concat([nlist_tebd, atype_tebd], axis=2)
+            else:
+                # nfnl x nnei x tebd_dim
+                tt = nlist_tebd
+            # nfnl x nnei x ng
+            gg_t = self.filter_layers_strip.networks[0](tt)
+            if self.smooth:
+                gg_t = gg_t * sw.reshape([-1, self.nnei, 1])
+            # nfnl x nnei x ng
+            gg = gg_s * gg_t + gg_s
+        else:
+            raise NotImplementedError
+
+        # input_r = paddle.nn.functional.normalize(
+        #     rr.reshape([-1, self.nnei, 4])[:, :, 1:4], axis=-1
+        # )
+        input_r = decomp.normalize(rr.reshape([-1, self.nnei, 4])[:, :, 1:4], axis=-1)
+        gg = self.dpa1_attention(
+            gg, nlist_mask, input_r=input_r, sw=sw
+        )  # shape is [nframes*nloc, self.neei, out_size]
+        # nfnl x 4 x ng
+        xyz_scatter = paddle.matmul(rr.transpose([0, 2, 1]), gg)
+        xyz_scatter = xyz_scatter / self.nnei
+        xyz_scatter_1 = xyz_scatter.transpose([0, 2, 1])
+        rot_mat = xyz_scatter_1[:, :, 1:4]
+        xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron]
+        result = paddle.matmul(
+            xyz_scatter_1, xyz_scatter_2
+        )  # shape is [nframes*nloc, self.filter_neuron[-1], self.axis_neuron]
+        return (
+            result.reshape([nframes, nloc, self.filter_neuron[-1] * self.axis_neuron]),
+            gg.reshape([nframes, nloc, self.nnei, self.filter_neuron[-1]]),
+            dmatrix.reshape([nframes, nloc, self.nnei, 4])[..., 1:],
+            rot_mat.reshape([nframes, nloc, self.filter_neuron[-1], 3]),
+            sw,
+        )
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
+
+
+class NeighborGatedAttention(nn.Layer):
+    def __init__(
+        self,
+        layer_num: int,
+        nnei: int,
+        embed_dim: int,
+        hidden_dim: int,
+        dotr: bool = False,
+        do_mask: bool = False,
+        scaling_factor: float = 1.0,
+        normalize: bool = True,
+        temperature: Optional[float] = None,
+        trainable_ln: bool = True,
+        ln_eps: float = 1e-5,
+        smooth: bool = True,
+        precision: str = DEFAULT_PRECISION,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        """Construct a neighbor-wise attention net."""
+        super().__init__()
+        self.layer_num = layer_num
+        self.nnei = nnei
+        self.embed_dim = embed_dim
+        self.hidden_dim = hidden_dim
+        self.dotr = dotr
+        self.do_mask = do_mask
+        self.scaling_factor = scaling_factor
+        self.normalize = normalize
+        self.temperature = temperature
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.smooth = smooth
+        self.precision = precision
+        self.seed = seed
+        self.network_type = NeighborGatedAttentionLayer
+        attention_layers = []
+        for i in range(self.layer_num):
+            attention_layers.append(
+                NeighborGatedAttentionLayer(
+                    nnei,
+                    embed_dim,
+                    hidden_dim,
+                    dotr=dotr,
+                    do_mask=do_mask,
+                    scaling_factor=scaling_factor,
+                    normalize=normalize,
+                    temperature=temperature,
+                    trainable_ln=trainable_ln,
+                    ln_eps=ln_eps,
+                    smooth=smooth,
+                    precision=precision,
+                    seed=child_seed(seed, i),
+                )
+            )
+        self.attention_layers = nn.LayerList(attention_layers)
+
+    def forward(
+        self,
+        input_G,
+        nei_mask,
+        input_r: Optional[paddle.Tensor] = None,
+        sw: Optional[paddle.Tensor] = None,
+    ):
+        """Compute the multi-layer gated self-attention.
+
+        Parameters
+        ----------
+        input_G
+            inputs with shape: (nf x nloc) x nnei x embed_dim.
+        nei_mask
+            neighbor mask, with paddings being 0. shape: (nf x nloc) x nnei.
+        input_r
+            normalized radial. shape: (nf x nloc) x nnei x 3.
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+        """
+        out = input_G
+        for layer in self.attention_layers:
+            out = layer(out, nei_mask, input_r=input_r, sw=sw)
+        return out
+
+    def __getitem__(self, key):
+        if isinstance(key, int):
+            return self.attention_layers[key]
+        else:
+            raise TypeError(key)
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, int):
+            raise TypeError(key)
+        if isinstance(value, self.network_type):
+            pass
+        elif isinstance(value, dict):
+            value = self.network_type.deserialize(value)
+        else:
+            raise TypeError(value)
+        self.attention_layers[key] = value
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "NeighborGatedAttention",
+            "@version": 1,
+            "layer_num": self.layer_num,
+            "nnei": self.nnei,
+            "embed_dim": self.embed_dim,
+            "hidden_dim": self.hidden_dim,
+            "dotr": self.dotr,
+            "do_mask": self.do_mask,
+            "scaling_factor": self.scaling_factor,
+            "normalize": self.normalize,
+            "temperature": self.temperature,
+            "trainable_ln": self.trainable_ln,
+            "ln_eps": self.ln_eps,
+            "precision": self.precision,
+            "attention_layers": [layer.serialize() for layer in self.attention_layers],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "NeighborGatedAttention":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        attention_layers = data.pop("attention_layers")
+        obj = cls(**data)
+        for ii, network in enumerate(attention_layers):
+            obj[ii] = network
+        return obj
+
+
+class NeighborGatedAttentionLayer(nn.Layer):
+    def __init__(
+        self,
+        nnei: int,
+        embed_dim: int,
+        hidden_dim: int,
+        dotr: bool = False,
+        do_mask: bool = False,
+        scaling_factor: float = 1.0,
+        normalize: bool = True,
+        temperature: Optional[float] = None,
+        smooth: bool = True,
+        trainable_ln: bool = True,
+        ln_eps: float = 1e-5,
+        precision: str = DEFAULT_PRECISION,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        """Construct a neighbor-wise attention layer."""
+        super().__init__()
+        self.nnei = nnei
+        self.embed_dim = embed_dim
+        self.hidden_dim = hidden_dim
+        self.dotr = dotr
+        self.do_mask = do_mask
+        self.scaling_factor = scaling_factor
+        self.normalize = normalize
+        self.temperature = temperature
+        self.precision = precision
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.seed = seed
+        self.attention_layer = GatedAttentionLayer(
+            nnei,
+            embed_dim,
+            hidden_dim,
+            dotr=dotr,
+            do_mask=do_mask,
+            scaling_factor=scaling_factor,
+            normalize=normalize,
+            temperature=temperature,
+            smooth=smooth,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.attn_layer_norm = LayerNorm(
+            self.embed_dim,
+            eps=ln_eps,
+            trainable=trainable_ln,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+
+    def forward(
+        self,
+        x,
+        nei_mask,
+        input_r: Optional[paddle.Tensor] = None,
+        sw: Optional[paddle.Tensor] = None,
+    ):
+        residual = x
+        x, _ = self.attention_layer(x, nei_mask, input_r=input_r, sw=sw)
+        x = residual + x
+        x = self.attn_layer_norm(x)
+        return x
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "nnei": self.nnei,
+            "embed_dim": self.embed_dim,
+            "hidden_dim": self.hidden_dim,
+            "dotr": self.dotr,
+            "do_mask": self.do_mask,
+            "scaling_factor": self.scaling_factor,
+            "normalize": self.normalize,
+            "temperature": self.temperature,
+            "trainable_ln": self.trainable_ln,
+            "ln_eps": self.ln_eps,
+            "precision": self.precision,
+            "attention_layer": self.attention_layer.serialize(),
+            "attn_layer_norm": self.attn_layer_norm.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "NeighborGatedAttentionLayer":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        attention_layer = data.pop("attention_layer")
+        attn_layer_norm = data.pop("attn_layer_norm")
+        obj = cls(**data)
+        obj.attention_layer = GatedAttentionLayer.deserialize(attention_layer)
+        obj.attn_layer_norm = LayerNorm.deserialize(attn_layer_norm)
+        return obj
+
+
+class GatedAttentionLayer(nn.Layer):
+    def __init__(
+        self,
+        nnei: int,
+        embed_dim: int,
+        hidden_dim: int,
+        num_heads: int = 1,
+        dotr: bool = False,
+        do_mask: bool = False,
+        scaling_factor: float = 1.0,
+        normalize: bool = True,
+        temperature: Optional[float] = None,
+        bias: bool = True,
+        smooth: bool = True,
+        precision: str = DEFAULT_PRECISION,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        """Construct a multi-head neighbor-wise attention net."""
+        super().__init__()
+        assert hidden_dim % num_heads == 0, "hidden_dim must be divisible by num_heads"
+        self.nnei = nnei
+        self.embed_dim = embed_dim
+        self.hidden_dim = hidden_dim
+        self.num_heads = num_heads
+        self.head_dim = hidden_dim // num_heads
+        self.dotr = dotr
+        self.do_mask = do_mask
+        self.bias = bias
+        self.smooth = smooth
+        self.scaling_factor = scaling_factor
+        self.temperature = temperature
+        self.precision = precision
+        self.seed = seed
+        self.scaling = (
+            (self.head_dim * scaling_factor) ** -0.5
+            if temperature is None
+            else temperature
+        )
+        self.normalize = normalize
+        self.in_proj = MLPLayer(
+            embed_dim,
+            hidden_dim * 3,
+            bias=bias,
+            use_timestep=False,
+            bavg=0.0,
+            stddev=1.0,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.out_proj = MLPLayer(
+            hidden_dim,
+            embed_dim,
+            bias=bias,
+            use_timestep=False,
+            bavg=0.0,
+            stddev=1.0,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+
+    def forward(
+        self,
+        query,
+        nei_mask,
+        input_r: Optional[paddle.Tensor] = None,
+        sw: Optional[paddle.Tensor] = None,
+        attnw_shift: float = 20.0,
+    ):
+        """Compute the multi-head gated self-attention.
+
+        Parameters
+        ----------
+        query
+            inputs with shape: (nf x nloc) x nnei x embed_dim.
+        nei_mask
+            neighbor mask, with paddings being 0. shape: (nf x nloc) x nnei.
+        input_r
+            normalized radial. shape: (nf x nloc) x nnei x 3.
+        sw
+            The smooth switch function. shape: (nf x nloc) x nnei
+        attnw_shift : float
+            The attention weight shift to preserve smoothness when doing padding before softmax.
+        """
+        q, k, v = self.in_proj(query).chunk(3, axis=-1)
+
+        # Reshape for multi-head attention: (nf x nloc) x num_heads x nnei x head_dim
+        q = q.reshape([-1, self.nnei, self.num_heads, self.head_dim]).transpose(
+            [0, 2, 1, 3]
+        )
+        k = k.reshape([-1, self.nnei, self.num_heads, self.head_dim]).transpose(
+            [0, 2, 1, 3]
+        )
+        v = v.reshape([-1, self.nnei, self.num_heads, self.head_dim]).transpose(
+            [0, 2, 1, 3]
+        )
+
+        if self.normalize:
+            # q = paddle_func.normalize(q, axis=-1)
+            # k = paddle_func.normalize(k, axis=-1)
+            # v = paddle_func.normalize(v, axis=-1)
+            q = decomp.normalize(q, axis=-1)
+            k = decomp.normalize(k, axis=-1)
+            v = decomp.normalize(v, axis=-1)
+
+        q = q * self.scaling
+        # (nf x nloc) x num_heads x head_dim x nnei
+        k = k.transpose([0, 1, 3, 2])
+
+        # Compute attention scores
+        # (nf x nloc) x num_heads x nnei x nnei
+        attn_weights = paddle.matmul(q, k)
+        # (nf x nloc) x nnei
+        nei_mask = nei_mask.reshape([-1, self.nnei])
+
+        if self.smooth:
+            assert sw is not None
+            # (nf x nloc) x 1 x nnei
+            sw = sw.reshape([-1, 1, self.nnei])
+            attn_weights = (attn_weights + attnw_shift) * sw[:, :, :, None] * sw[
+                :, :, None, :
+            ] - attnw_shift
+        else:
+            # (nf x nloc) x 1 x 1 x nnei
+            attn_weights = attn_weights.masked_fill(
+                ~nei_mask.unsqueeze(1).unsqueeze(1), float("-inf")
+            )
+
+        attn_weights = paddle_func.softmax(attn_weights, axis=-1)
+        attn_weights = attn_weights.masked_fill(
+            ~nei_mask.unsqueeze(1).unsqueeze(-1), 0.0
+        )
+        if self.smooth:
+            assert sw is not None
+            attn_weights = attn_weights * sw[:, :, :, None] * sw[:, :, None, :]
+
+        if self.dotr:
+            # (nf x nloc) x nnei x 3
+            assert input_r is not None, "input_r must be provided when dotr is True!"
+            # (nf x nloc) x 1 x nnei x nnei
+            angular_weight = paddle.matmul(
+                input_r, input_r.transpose([0, 2, 1])
+            ).reshape([-1, 1, self.nnei, self.nnei])
+            attn_weights = attn_weights * angular_weight
+
+        # Apply attention to values
+        # (nf x nloc) x nnei x (num_heads x head_dim)
+        o = (
+            paddle.matmul(attn_weights, v)
+            .transpose([0, 2, 1, 3])
+            .reshape([-1, self.nnei, self.hidden_dim])
+        )
+        output = self.out_proj(o)
+        return output, attn_weights
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "nnei": self.nnei,
+            "embed_dim": self.embed_dim,
+            "hidden_dim": self.hidden_dim,
+            "num_heads": self.num_heads,
+            "dotr": self.dotr,
+            "do_mask": self.do_mask,
+            "scaling_factor": self.scaling_factor,
+            "normalize": self.normalize,
+            "temperature": self.temperature,
+            "bias": self.bias,
+            "smooth": self.smooth,
+            "precision": self.precision,
+            "in_proj": self.in_proj.serialize(),
+            "out_proj": self.out_proj.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GatedAttentionLayer":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        in_proj = data.pop("in_proj")
+        out_proj = data.pop("out_proj")
+        obj = cls(**data)
+        obj.in_proj = MLPLayer.deserialize(in_proj)
+        obj.out_proj = MLPLayer.deserialize(out_proj)
+        return obj
diff --git a/deepmd/pd/model/descriptor/se_atten_v2.py b/deepmd/pd/model/descriptor/se_atten_v2.py
new file mode 100644
index 0000000000..6a321114fb
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_atten_v2.py
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.network.mlp import (
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .se_atten import (
+    NeighborGatedAttention,
+)
+
+
+@BaseDescriptor.register("se_atten_v2")
+class DescrptSeAttenV2(DescrptDPA1):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        set_davg_zero: bool = True,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        scaling_factor: int = 1.0,
+        normalize=True,
+        temperature=None,
+        concat_output_tebd: bool = True,
+        trainable: bool = True,
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        type_one_side: bool = False,
+        stripped_type_embedding: Optional[bool] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
+        type_map: Optional[list[str]] = None,
+        # not implemented
+        spin=None,
+        type: Optional[str] = None,
+    ) -> None:
+        r"""Construct smooth version of embedding net of type `se_atten_v2`.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius :math:`r_c`
+        rcut_smth : float
+            From where the environment matrix should be smoothed :math:`r_s`
+        sel : list[int], int
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+        ntypes : int
+            Number of element types
+        neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+        axis_neuron : int
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+        tebd_dim : int
+            Dimension of the type embedding
+        set_davg_zero : bool
+            Set the shift of embedding net input to zero.
+        attn : int
+            Hidden dimension of the attention vectors
+        attn_layer : int
+            Number of attention layers
+        attn_dotr : bool
+            If dot the angular gate to the attention weights
+        attn_mask : bool
+            (Only support False to keep consistent with other backend references.)
+            (Not used in this version.)
+            If mask the diagonal of attention weights
+        activation_function : str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+        precision : str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+        resnet_dt : bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+        exclude_types : list[list[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+        scaling_factor : float
+            The scaling factor of normalization in calculations of attention weights.
+            If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5
+        normalize : bool
+            Whether to normalize the hidden vectors in attention weights calculation.
+        temperature : float
+            If not None, the scaling of attention weights is `temperature` itself.
+        concat_output_tebd : bool
+            Whether to concat type embedding at the output of the descriptor.
+        trainable : bool
+            If the weights of this descriptors are trainable.
+        trainable_ln : bool
+            Whether to use trainable shift and scale weights in layer normalization.
+        ln_eps : float, Optional
+            The epsilon value for layer normalization.
+        type_one_side : bool
+            If 'False', type embeddings of both neighbor and central atoms are considered.
+            If 'True', only type embeddings of neighbor atoms are considered.
+            Default is 'False'.
+        stripped_type_embedding : bool, Optional
+            (Deprecated, kept only for compatibility.)
+            Whether to strip the type embedding into a separate embedding network.
+            Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
+            Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
+            The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
+        seed : int, Optional
+            Random seed for parameter initialization.
+        use_econf_tebd : bool, Optional
+            Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+        type_map : list[str], Optional
+            A list of strings. Give the name to each type of atoms.
+        spin
+            (Only support None to keep consistent with other backend references.)
+            (Not used in this version. Not-none option is not implemented.)
+            The old implementation of deepspin.
+        """
+        DescrptDPA1.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode="strip",
+            set_davg_zero=set_davg_zero,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=attn_mask,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            scaling_factor=scaling_factor,
+            normalize=normalize,
+            temperature=temperature,
+            concat_output_tebd=concat_output_tebd,
+            trainable=trainable,
+            trainable_ln=trainable_ln,
+            ln_eps=ln_eps,
+            smooth_type_embedding=True,
+            type_one_side=type_one_side,
+            stripped_type_embedding=stripped_type_embedding,
+            seed=seed,
+            use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
+            type_map=type_map,
+            # not implemented
+            spin=spin,
+            type=type,
+        )
+
+    def serialize(self) -> dict:
+        obj = self.se_atten
+        data = {
+            "@class": "Descriptor",
+            "type": "se_atten_v2",
+            "@version": 2,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "ntypes": obj.ntypes,
+            "neuron": obj.neuron,
+            "axis_neuron": obj.axis_neuron,
+            "tebd_dim": obj.tebd_dim,
+            "set_davg_zero": obj.set_davg_zero,
+            "attn": obj.attn_dim,
+            "attn_layer": obj.attn_layer,
+            "attn_dotr": obj.attn_dotr,
+            "attn_mask": False,
+            "activation_function": obj.activation_function,
+            "resnet_dt": obj.resnet_dt,
+            "scaling_factor": obj.scaling_factor,
+            "normalize": obj.normalize,
+            "temperature": obj.temperature,
+            "trainable_ln": obj.trainable_ln,
+            "ln_eps": obj.ln_eps,
+            "type_one_side": obj.type_one_side,
+            "concat_output_tebd": self.concat_output_tebd,
+            "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
+            "type_map": self.type_map,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "embeddings_strip": obj.filter_layers_strip.serialize(),
+            "attention_layers": obj.dpa1_attention.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "trainable": self.trainable,
+            "spin": None,
+        }
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 2, 1)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        type_embedding = data.pop("type_embedding")
+        attention_layers = data.pop("attention_layers")
+        data.pop("env_mat")
+        embeddings_strip = data.pop("embeddings_strip")
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.se_atten.prec).to(device=env.DEVICE)
+
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        obj.se_atten["davg"] = t_cvt(variables["davg"])
+        obj.se_atten["dstd"] = t_cvt(variables["dstd"])
+        obj.se_atten.filter_layers = NetworkCollection.deserialize(embeddings)
+        obj.se_atten.filter_layers_strip = NetworkCollection.deserialize(
+            embeddings_strip
+        )
+        obj.se_atten.dpa1_attention = NeighborGatedAttention.deserialize(
+            attention_layers
+        )
+        return obj
diff --git a/deepmd/pd/model/descriptor/se_r.py b/deepmd/pd/model/descriptor/se_r.py
new file mode 100644
index 0000000000..871b37fd40
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_r.py
@@ -0,0 +1,485 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e2_r")
+@BaseDescriptor.register("se_r")
+class DescrptSeR(BaseDescriptor, paddle.nn.Layer):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.exclude_types = exclude_types
+        self.ntypes = len(sel)
+        self.type_map = type_map
+        self.seed = seed
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.env_protection = env_protection
+
+        self.sel = sel
+        self.sec = paddle.to_tensor(
+            np.append([0], np.cumsum(self.sel)), dtype="int64"
+        ).to(device=env.DEVICE)
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 1
+
+        wanted_shape = (self.ntypes, self.nnei, 1)
+        mean = paddle.zeros(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        stddev = paddle.ones(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.filter_layers_old = None
+        self.filter_layers = None
+
+        filter_layers = NetworkCollection(
+            ndim=1, ntypes=len(sel), network_type="embedding_network"
+        )
+        # TODO: ndim=2 if type_one_side=False
+        for ii in range(self.ntypes):
+            filter_layers[(ii,)] = EmbeddingNet(
+                1,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, ii),
+            )
+        self.filter_layers = filter_layers
+        self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.neuron[-1]
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        raise NotImplementedError
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return 0
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return False
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return False
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeR descriptors, the user-defined share-level
+        # shared_level: 0
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "mean") and not resume:
+                # in case of change params during resume
+                base_env = EnvMatStatSe(base_class)
+                base_env.stats = base_class.stats
+                for kk in base_class.get_stats():
+                    base_env.stats[kk] += self.get_stats()[kk]
+                mean, stddev = base_env()
+                if not base_class.set_davg_zero:
+                    paddle.assign(
+                        paddle.to_tensor(mean).to(device=env.DEVICE), base_class.mean
+                    )  # pylint: disable=no-explicit-dtype
+                paddle.assign(
+                    paddle.to_tensor(stddev).to(device=env.DEVICE), base_class.stddev
+                )  # pylint: disable=no-explicit-dtype
+                self.mean = base_class.mean
+                self.stddev = base_class.stddev
+            # self.set_state_dict(base_class.state_dict()) # this does not work, because it only inits the model
+            # the following will successfully link all the params except buffers
+            for item in self._sub_layers:
+                self._sub_layers[item] = base_class._sub_layers[item]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        raise NotImplementedError(
+            "Descriptor se_e2_r does not support changing for type related params!"
+            "This feature is currently not implemented because it would require additional work to support the non-mixed-types case. "
+            "We may consider adding this support in the future if there is a clear demand for it."
+        )
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        coord_ext: paddle.Tensor,
+        atype_ext: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+
+        """
+        del mapping
+        nf = nlist.shape[0]
+        nloc = nlist.shape[1]
+        atype = atype_ext[:, :nloc]
+        dmatrix, diff, sw = prod_env_mat(
+            coord_ext,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            True,
+            protection=self.env_protection,
+        )
+
+        assert self.filter_layers is not None
+        dmatrix = dmatrix.reshape([-1, self.nnei, 1])
+        dmatrix = dmatrix.astype(self.prec)
+        nfnl = dmatrix.shape[0]
+        # pre-allocate a shape to pass jit
+        xyz_scatter = paddle.zeros(
+            [nfnl, 1, self.filter_neuron[-1]], dtype=self.prec
+        ).to(device=coord_ext.place)
+
+        # nfnl x nnei
+        exclude_mask = self.emask(nlist, atype_ext).reshape([nfnl, self.nnei])
+        for ii, ll in enumerate(self.filter_layers.networks):
+            # nfnl x nt
+            mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
+            # nfnl x nt x 1
+            ss = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
+            ss = ss * mm[:, :, None].astype(ss.dtype)
+            # nfnl x nt x ng
+            gg = ll.forward(ss)
+            gg = paddle.mean(gg, axis=1).unsqueeze(1)
+            xyz_scatter += gg * (self.sel[ii] / self.nnei)
+
+        res_rescale = 1.0 / 5.0
+        result = xyz_scatter * res_rescale
+        result = result.reshape([nf, nloc, self.filter_neuron[-1]])
+        return (
+            result.astype(env.GLOBAL_PD_FLOAT_PRECISION),
+            None,
+            None,
+            None,
+            sw,
+        )
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.mean = mean
+        self.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.mean, self.stddev
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "se_r",
+            "@version": 2,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[self.prec],
+            "embeddings": self.filter_layers.serialize(),
+            "env_mat": DPEnvMat(self.rcut, self.rcut_smth).serialize(),
+            "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
+            "@variables": {
+                "davg": self["davg"].numpy(),
+                "dstd": self["dstd"].numpy(),
+            },
+            "type_map": self.type_map,
+            ## to be updated when the options are supported.
+            "trainable": True,
+            "type_one_side": True,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeR":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.prec).to(device=env.DEVICE)
+
+        obj["davg"] = t_cvt(variables["davg"])
+        obj["dstd"] = t_cvt(variables["dstd"])
+        obj.filter_layers = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, local_jdata_cpy["sel"] = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], False
+        )
+        return local_jdata_cpy, min_nbor_dist
diff --git a/deepmd/pd/model/descriptor/se_t.py b/deepmd/pd/model/descriptor/se_t.py
new file mode 100644
index 0000000000..3236a052e5
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_t.py
@@ -0,0 +1,733 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+from typing import (
+    Callable,
+    ClassVar,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor import (
+    DescriptorBlock,
+    prod_env_mat,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from typing import (
+        Final,
+    )
+except ImportError:
+    from paddle.jit import Final
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e3")
+@BaseDescriptor.register("se_at")
+@BaseDescriptor.register("se_a_3be")
+class DescrptSeT(BaseDescriptor, paddle.nn.Layer):
+    r"""DeepPot-SE constructed from all information (both angular and radial) of atomic
+    configurations.
+
+    The embedding takes angles between two neighboring atoms as input.
+
+    Parameters
+    ----------
+    rcut : float
+            The cut-off radius
+    rcut_smth : float
+            From where the environment matrix should be smoothed
+    sel : list[int]
+            sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net
+    resnet_dt : bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    set_davg_zero : bool
+            Set the shift of embedding net input to zero.
+    activation_function : str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    env_protection : float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+    exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    precision : str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable : bool
+            If the weights of embedding net are trainable.
+    seed : int, Optional
+            Random seed for initializing the network parameters.
+    type_map: List[str], Optional
+            A list of strings. Give the name to each type of atoms.
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
+        resnet_dt: bool = False,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        env_protection: float = 0.0,
+        exclude_types: list[tuple[int, int]] = [],
+        precision: str = "float64",
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        ntypes: Optional[int] = None,  # to be compat with input
+        # not implemented
+        spin=None,
+    ):
+        del ntypes
+        if spin is not None:
+            raise NotImplementedError("old implementation of spin is not supported.")
+        super().__init__()
+        self.type_map = type_map
+        self.seat = DescrptBlockSeT(
+            rcut,
+            rcut_smth,
+            sel,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            env_protection=env_protection,
+            exclude_types=exclude_types,
+            precision=precision,
+            trainable=trainable,
+            seed=seed,
+        )
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.seat.get_rcut()
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.seat.get_rcut_smth()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.seat.get_nsel()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.seat.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.seat.get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.seat.get_dim_out()
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.seat.get_dim_emb()
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return self.seat.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.seat.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return self.seat.need_sorted_nlist_for_lower()
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.seat.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeT descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in sea
+        if shared_level == 0:
+            self.seat.share_params(base_class.seat, 0, resume=resume)
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.seat.dim_out
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        raise NotImplementedError(
+            "Descriptor se_e3 does not support changing for type related params!"
+            "This feature is currently not implemented because it would require additional work to support the non-mixed-types case. "
+            "We may consider adding this support in the future if there is a clear demand for it."
+        )
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.seat.compute_input_stats(merged, path)
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        """Update the type exclusions."""
+        self.seat.reinit_exclude(exclude_types)
+
+    def forward(
+        self,
+        coord_ext: paddle.Tensor,
+        atype_ext: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x ng
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation.
+            This descriptor returns None.
+        g2
+            The rotationally invariant pair-partical representation.
+            This descriptor returns None.
+        h2
+            The rotationally equivariant pair-partical representation.
+            This descriptor returns None.
+        sw
+            The smooth switch function.
+
+        """
+        return self.seat.forward(nlist, coord_ext, atype_ext, None, mapping)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.seat.mean = mean
+        self.seat.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.seat.mean, self.seat.stddev
+
+    def serialize(self) -> dict:
+        obj = self.seat
+        return {
+            "@class": "Descriptor",
+            "type": "se_e3",
+            "@version": 2,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "neuron": obj.neuron,
+            "resnet_dt": obj.resnet_dt,
+            "set_davg_zero": obj.set_davg_zero,
+            "activation_function": obj.activation_function,
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "type_map": self.type_map,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "trainable": obj.trainable,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeT":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.seat.prec).to(device=env.DEVICE)
+
+        obj.seat["davg"] = t_cvt(variables["davg"])
+        obj.seat["dstd"] = t_cvt(variables["dstd"])
+        obj.seat.filter_layers = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, local_jdata_cpy["sel"] = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], False
+        )
+        return local_jdata_cpy, min_nbor_dist
+
+
+@DescriptorBlock.register("se_e3")
+class DescrptBlockSeT(DescriptorBlock):
+    ndescrpt: Final[int]
+    __constants__: ClassVar[list] = ["ndescrpt"]
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: list[int],
+        neuron: list[int] = [24, 48, 96],
+        resnet_dt: bool = False,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        env_protection: float = 0.0,
+        exclude_types: list[tuple[int, int]] = [],
+        precision: str = "float64",
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        r"""Construct an embedding net of type `se_e3`.
+
+        The embedding takes angles between two neighboring atoms as input.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius
+        rcut_smth : float
+            From where the environment matrix should be smoothed
+        sel : list[int]
+            sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+        neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net
+        resnet_dt : bool
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+        set_davg_zero : bool
+            Set the shift of embedding net input to zero.
+        activation_function : str
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+        env_protection : float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+        exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        precision : str
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+        trainable : bool
+            If the weights of embedding net are trainable.
+        seed : int, Optional
+            Random seed for initializing the network parameters.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.env_protection = env_protection
+        self.ntypes = len(sel)
+        self.seed = seed
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+
+        self.sel = sel
+        # should be on CPU to avoid D2H, as it is used as slice index
+        self.sec = [0, *np.cumsum(self.sel).tolist()]
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        stddev = paddle.ones(wanted_shape, dtype=self.prec).to(device=env.DEVICE)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+
+        ndim = 2
+        filter_layers = NetworkCollection(
+            ndim=ndim, ntypes=len(sel), network_type="embedding_network"
+        )
+        for ii, embedding_idx in enumerate(
+            itertools.product(range(self.ntypes), repeat=ndim)
+        ):
+            filter_layers[embedding_idx] = EmbeddingNet(
+                1,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, ii),
+            )
+        self.filter_layers = filter_layers
+        self.stats = None
+        # set trainable
+        self.trainable = trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.neuron[-1]
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return False
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1]
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return 0
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall x nt
+        extended_atype_embd
+            The extended type embedding of atoms. shape: nf x nall
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        result
+            The descriptor. shape: nf x nloc x ng
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation.
+            This descriptor returns None.
+        g2
+            The rotationally invariant pair-partical representation.
+            This descriptor returns None.
+        h2
+            The rotationally equivariant pair-partical representation.
+            This descriptor returns None.
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del extended_atype_embd, mapping
+        nf = nlist.shape[0]
+        nloc = nlist.shape[1]
+        atype = extended_atype[:, :nloc]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        dmatrix = dmatrix.reshape([-1, self.nnei, 4])
+        dmatrix = dmatrix.astype(self.prec)
+        nfnl = dmatrix.shape[0]
+        # pre-allocate a shape to pass jit
+        result = paddle.zeros(
+            [nfnl, self.filter_neuron[-1]],
+            dtype=self.prec,
+        ).to(device=extended_coord.place)
+        # nfnl x nnei
+        exclude_mask = self.emask(nlist, extended_atype).reshape([nfnl, self.nnei])
+        for embedding_idx, ll in enumerate(self.filter_layers.networks):
+            ti = embedding_idx % self.ntypes
+            nei_type_j = self.sel[ti]
+            tj = embedding_idx // self.ntypes
+            nei_type_i = self.sel[tj]
+            if ti <= tj:
+                # avoid repeat calculation
+                # nfnl x nt_i x 3
+                rr_i = dmatrix[:, self.sec[ti] : self.sec[ti + 1], 1:]
+                mm_i = exclude_mask[:, self.sec[ti] : self.sec[ti + 1]]
+                rr_i = rr_i * mm_i[:, :, None].astype(rr_i.dtype)
+                # nfnl x nt_j x 3
+                rr_j = dmatrix[:, self.sec[tj] : self.sec[tj + 1], 1:]
+                mm_j = exclude_mask[:, self.sec[tj] : self.sec[tj + 1]]
+                rr_j = rr_j * mm_j[:, :, None].astype(rr_j.dtype)
+                # nfnl x nt_i x nt_j
+                env_ij = paddle.einsum("ijm,ikm->ijk", rr_i, rr_j)
+                # nfnl x nt_i x nt_j x 1
+                env_ij_reshape = env_ij.unsqueeze(-1)
+                # nfnl x nt_i x nt_j x ng
+                gg = ll.forward(env_ij_reshape)
+                # nfnl x nt_i x nt_j x ng
+                res_ij = paddle.einsum("ijk,ijkm->im", env_ij, gg)
+                res_ij = res_ij * (1.0 / float(nei_type_i) / float(nei_type_j))
+                result += res_ij
+        # xyz_scatter /= (self.nnei * self.nnei)
+        result = result.reshape([nf, nloc, self.filter_neuron[-1]])
+        return (
+            result.astype(env.GLOBAL_PD_FLOAT_PRECISION),
+            None,
+            None,
+            None,
+            sw,
+        )
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/descriptor/se_t_tebd.py b/deepmd/pd/model/descriptor/se_t_tebd.py
new file mode 100644
index 0000000000..606056437c
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_t_tebd.py
@@ -0,0 +1,876 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    extend_descrpt_stat,
+)
+
+
+@BaseDescriptor.register("se_e3_tebd")
+class DescrptSeTTebd(BaseDescriptor, paddle.nn.Layer):
+    r"""Construct an embedding net that takes angles between two neighboring atoms and type embeddings as input.
+
+    Parameters
+    ----------
+    rcut
+            The cut-off radius
+    rcut_smth
+            From where the environment matrix should be smoothed
+    sel : Union[List[int], int]
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+    ntypes : int
+            Number of element types
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net
+    tebd_dim : int
+            Dimension of the type embedding
+    tebd_input_mode : str
+            The input mode of the type embedding. Supported modes are ["concat", "strip"].
+            - "concat": Concatenate the type embedding with the smoothed angular information as the union input for the embedding network.
+            - "strip": Use a separated embedding network for the type embedding and combine the output with the angular embedding network output.
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    set_davg_zero
+            Set the shift of embedding net input to zero.
+    activation_function
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    env_protection: float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+    exclude_types : List[Tuple[int, int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable
+            If the weights of embedding net are trainable.
+    seed
+            Random seed for initializing the network parameters.
+    type_map: List[str], Optional
+            A list of strings. Give the name to each type of atoms.
+    concat_output_tebd: bool
+            Whether to concat type embedding at the output of the descriptor.
+    use_econf_tebd: bool, Optional
+            Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+    smooth: bool
+            Whether to use smooth process in calculation.
+
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [2, 4, 8],
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        resnet_dt: bool = False,
+        set_davg_zero: bool = True,
+        activation_function: str = "tanh",
+        env_protection: float = 0.0,
+        exclude_types: list[tuple[int, int]] = [],
+        precision: str = "float64",
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        concat_output_tebd: bool = True,
+        use_econf_tebd: bool = False,
+        use_tebd_bias=False,
+        smooth: bool = True,
+    ):
+        super().__init__()
+        self.se_ttebd = DescrptBlockSeTTebd(
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode=tebd_input_mode,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            smooth=smooth,
+            seed=child_seed(seed, 1),
+        )
+        self.use_econf_tebd = use_econf_tebd
+        self.type_map = type_map
+        self.smooth = smooth
+        self.type_embedding = TypeEmbedNet(
+            ntypes,
+            tebd_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+            use_econf_tebd=use_econf_tebd,
+            type_map=type_map,
+            use_tebd_bias=use_tebd_bias,
+        )
+        self.tebd_dim = tebd_dim
+        self.concat_output_tebd = concat_output_tebd
+        self.trainable = trainable
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.se_ttebd.get_rcut()
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.se_ttebd.get_rcut_smth()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.se_ttebd.get_nsel()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.se_ttebd.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.se_ttebd.get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        ret = self.se_ttebd.get_dim_out()
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        return self.se_ttebd.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.se_ttebd.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.se_ttebd.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return self.se_ttebd.need_sorted_nlist_for_lower()
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.se_ttebd.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA1 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in both type_embedding and se_ttebd
+        if shared_level == 0:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.se_ttebd.share_params(base_class.se_ttebd, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.se_ttebd.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.se_ttebd.mean = mean
+        self.se_ttebd.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.se_ttebd.mean, self.se_ttebd.stddev
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        obj = self.se_ttebd
+        obj.ntypes = len(type_map)
+        self.type_map = type_map
+        self.type_embedding.change_type_map(type_map=type_map)
+        obj.reinit_exclude(map_pair_exclude_types(obj.exclude_types, remap_index))
+        if has_new_type:
+            # the avg and std of new types need to be updated
+            extend_descrpt_stat(
+                obj,
+                type_map,
+                des_with_stat=model_with_new_type_stat.se_ttebd
+                if model_with_new_type_stat is not None
+                else None,
+            )
+        obj["davg"] = obj["davg"][remap_index]
+        obj["dstd"] = obj["dstd"][remap_index]
+
+    def serialize(self) -> dict:
+        obj = self.se_ttebd
+        data = {
+            "@class": "Descriptor",
+            "type": "se_e3_tebd",
+            "@version": 1,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "ntypes": obj.ntypes,
+            "neuron": obj.neuron,
+            "tebd_dim": obj.tebd_dim,
+            "tebd_input_mode": obj.tebd_input_mode,
+            "set_davg_zero": obj.set_davg_zero,
+            "activation_function": obj.activation_function,
+            "resnet_dt": obj.resnet_dt,
+            "concat_output_tebd": self.concat_output_tebd,
+            "use_econf_tebd": self.use_econf_tebd,
+            "type_map": self.type_map,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "smooth": self.smooth,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "trainable": self.trainable,
+        }
+        if obj.tebd_input_mode in ["strip"]:
+            data.update({"embeddings_strip": obj.filter_layers_strip.serialize()})
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeTTebd":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        type_embedding = data.pop("type_embedding")
+        env_mat = data.pop("env_mat")
+        tebd_input_mode = data["tebd_input_mode"]
+        if tebd_input_mode in ["strip"]:
+            embeddings_strip = data.pop("embeddings_strip")
+        else:
+            embeddings_strip = None
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.se_ttebd.prec).to(device=env.DEVICE)
+
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        obj.se_ttebd["davg"] = t_cvt(variables["davg"])
+        obj.se_ttebd["dstd"] = t_cvt(variables["dstd"])
+        obj.se_ttebd.filter_layers = NetworkCollection.deserialize(embeddings)
+        if tebd_input_mode in ["strip"]:
+            obj.se_ttebd.filter_layers_strip = NetworkCollection.deserialize(
+                embeddings_strip
+            )
+        return obj
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        g1, g2, h2, rot_mat, sw = self.se_ttebd(
+            nlist,
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping=None,
+        )
+        if self.concat_output_tebd:
+            g1 = paddle.concat([g1, g1_inp], axis=-1)
+
+        return g1, rot_mat, g2, h2, sw
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, sel = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], True
+        )
+        local_jdata_cpy["sel"] = sel[0]
+        return local_jdata_cpy, min_nbor_dist
+
+
+@DescriptorBlock.register("se_ttebd")
+class DescrptBlockSeTTebd(DescriptorBlock):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        set_davg_zero: bool = True,
+        activation_function="tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        smooth: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.tebd_dim = tebd_dim
+        self.tebd_input_mode = tebd_input_mode
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.env_protection = env_protection
+        self.seed = seed
+        self.smooth = smooth
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        stddev = paddle.ones(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.tebd_dim_input = self.tebd_dim * 2
+        if self.tebd_input_mode in ["concat"]:
+            self.embd_input_dim = 1 + self.tebd_dim_input
+        else:
+            self.embd_input_dim = 1
+
+        self.filter_layers_old = None
+        self.filter_layers = None
+        self.filter_layers_strip = None
+        filter_layers = NetworkCollection(
+            ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+        )
+        filter_layers[0] = EmbeddingNet(
+            self.embd_input_dim,
+            self.filter_neuron,
+            activation_function=self.activation_function,
+            precision=self.precision,
+            resnet_dt=self.resnet_dt,
+            seed=child_seed(self.seed, 1),
+        )
+        self.filter_layers = filter_layers
+        if self.tebd_input_mode in ["strip"]:
+            filter_layers_strip = NetworkCollection(
+                ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+            )
+            filter_layers_strip[0] = EmbeddingNet(
+                self.tebd_dim_input,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, 2),
+            )
+            self.filter_layers_strip = filter_layers_strip
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension of embedding."""
+        return self.filter_neuron[-1]
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1]
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.tebd_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of embedding."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall x nt
+        extended_atype_embd
+            The extended type embedding of atoms. shape: nf x nall
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        result
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        nb = nframes
+        nall = extended_coord.reshape([nb, -1, 3]).shape[1]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = paddle.where(exclude_mask != 0, nlist, -1)
+        nlist_mask = nlist != -1
+        nlist = paddle.where(nlist == -1, 0, nlist)
+        sw = paddle.squeeze(sw, -1)
+        # nf x nall x nt
+        nt = extended_atype_embd.shape[-1]
+        atype_tebd_ext = extended_atype_embd
+        # nb x (nloc x nnei) x nt
+        index = nlist.reshape([nb, nloc * nnei]).unsqueeze(-1).expand([-1, -1, nt])
+        # nb x (nloc x nnei) x nt
+        # atype_tebd_nlist = paddle.take_along_axis(atype_tebd_ext, axis=1, index=index)
+        atype_tebd_nlist = decomp.take_along_axis(atype_tebd_ext, axis=1, indices=index)
+        # nb x nloc x nnei x nt
+        atype_tebd_nlist = atype_tebd_nlist.reshape([nb, nloc, nnei, nt])
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+        # (nb x nloc) x nnei
+        exclude_mask = exclude_mask.reshape([nb * nloc, nnei])
+        assert self.filter_layers is not None
+        # nfnl x nnei x 4
+        dmatrix = dmatrix.reshape([-1, self.nnei, 4])
+        nfnl = dmatrix.shape[0]
+        # nfnl x nnei x 4
+        rr = dmatrix
+        rr = rr * exclude_mask[:, :, None].astype(rr.dtype)
+
+        # nfnl x nt_i x 3
+        rr_i = rr[:, :, 1:]
+        # nfnl x nt_j x 3
+        rr_j = rr[:, :, 1:]
+        # nfnl x nt_i x nt_j
+        # env_ij = paddle.einsum("ijm,ikm->ijk", rr_i, rr_j)
+        env_ij = (
+            # ij1m x i1km -> ijkm -> ijk
+            rr_i.unsqueeze(2) * rr_j.unsqueeze(1)
+        ).sum(-1)
+        # nfnl x nt_i x nt_j x 1
+        ss = env_ij.unsqueeze(-1)
+
+        # nfnl x nnei x tebd_dim
+        nlist_tebd = atype_tebd_nlist.reshape([nfnl, nnei, self.tebd_dim])
+
+        # nfnl x nt_i x nt_j x tebd_dim
+        nlist_tebd_i = nlist_tebd.unsqueeze(2).expand([-1, -1, self.nnei, -1])
+        nlist_tebd_j = nlist_tebd.unsqueeze(1).expand([-1, self.nnei, -1, -1])
+
+        if self.tebd_input_mode in ["concat"]:
+            # nfnl x nt_i x nt_j x (1 + tebd_dim * 2)
+            ss = paddle.concat([ss, nlist_tebd_i, nlist_tebd_j], axis=-1)
+            # nfnl x nt_i x nt_j x ng
+            gg = self.filter_layers.networks[0](ss)
+        elif self.tebd_input_mode in ["strip"]:
+            # nfnl x nt_i x nt_j x ng
+            gg_s = self.filter_layers.networks[0](ss)
+            assert self.filter_layers_strip is not None
+            # nfnl x nt_i x nt_j x (tebd_dim * 2)
+            tt = paddle.concat([nlist_tebd_i, nlist_tebd_j], axis=-1)
+            # nfnl x nt_i x nt_j x ng
+            gg_t = self.filter_layers_strip.networks[0](tt)
+            if self.smooth:
+                gg_t = (
+                    gg_t
+                    * sw.reshape([nfnl, self.nnei, 1, 1])
+                    * sw.reshape([nfnl, 1, self.nnei, 1])
+                )
+            # nfnl x nt_i x nt_j x ng
+            gg = gg_s * gg_t + gg_s
+        else:
+            raise NotImplementedError
+
+        # nfnl x ng
+        # res_ij = paddle.einsum("ijk,ijkm->im", env_ij, gg)
+        res_ij = (
+            # ijk1 x ijkm -> ijkm -> im
+            env_ij.unsqueeze(-1) * gg
+        ).sum([1, 2])
+        res_ij = res_ij * (1.0 / float(self.nnei) / float(self.nnei))
+        # nf x nl x ng
+        result = res_ij.reshape([nframes, nloc, self.filter_neuron[-1]])
+        return (
+            result.astype(env.GLOBAL_PD_FLOAT_PRECISION),
+            None,
+            None,
+            None,
+            sw,
+        )
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/model/__init__.py b/deepmd/pd/model/model/__init__.py
new file mode 100644
index 0000000000..86f09cdcb8
--- /dev/null
+++ b/deepmd/pd/model/model/__init__.py
@@ -0,0 +1,304 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The model that takes the coordinates, cell and atom types as input
+and predicts some property. The models are automatically generated from
+atomic models by the `deepmd.dpmodel.make_model` method.
+
+The `make_model` method does the reduction, auto-differentiation and
+communication of the atomic properties according to output variable
+definition `deepmd.dpmodel.OutputVariableDef`.
+
+All models should be inherited from :class:`deepmd.pd.model.model.model.BaseModel`.
+Models generated by `make_model` have already done it.
+"""
+
+import copy
+import json
+
+import numpy as np
+
+from deepmd.pd.model.atomic_model import (
+    DPAtomicModel,
+    PairTabAtomicModel,
+)
+from deepmd.pd.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pd.model.task import (
+    BaseFitting,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+from .dipole_model import (
+    DipoleModel,
+)
+from .dos_model import (
+    DOSModel,
+)
+from .dp_linear_model import (
+    LinearEnergyModel,
+)
+from .dp_model import (
+    DPModelCommon,
+)
+from .dp_zbl_model import (
+    DPZBLModel,
+)
+from .ener_model import (
+    EnergyModel,
+)
+from .frozen import (
+    FrozenModel,
+)
+from .make_hessian_model import (
+    make_hessian_model,
+)
+from .make_model import (
+    make_model,
+)
+from .model import (
+    BaseModel,
+)
+from .polar_model import (
+    PolarModel,
+)
+from .property_model import (
+    PropertyModel,
+)
+from .spin_model import (
+    SpinEnergyModel,
+    SpinModel,
+)
+
+
+def _get_standard_model_components(model_params, ntypes):
+    # descriptor
+    model_params["descriptor"]["ntypes"] = ntypes
+    model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
+    descriptor = BaseDescriptor(**model_params["descriptor"])
+    # fitting
+    fitting_net = model_params.get("fitting_net", {})
+    fitting_net["type"] = fitting_net.get("type", "ener")
+    fitting_net["ntypes"] = descriptor.get_ntypes()
+    fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
+    fitting_net["mixed_types"] = descriptor.mixed_types()
+    if fitting_net["type"] in ["dipole", "polar"]:
+        fitting_net["embedding_width"] = descriptor.get_dim_emb()
+    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
+    grad_force = "direct" not in fitting_net["type"]
+    if not grad_force:
+        fitting_net["out_dim"] = descriptor.get_dim_emb()
+        if "ener" in fitting_net["type"]:
+            fitting_net["return_energy"] = True
+    fitting = BaseFitting(**fitting_net)
+    return descriptor, fitting, fitting_net["type"]
+
+
+def get_spin_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    if not model_params["spin"]["use_spin"] or isinstance(
+        model_params["spin"]["use_spin"][0], int
+    ):
+        use_spin = np.full(len(model_params["type_map"]), False, dtype=bool)
+        use_spin[model_params["spin"]["use_spin"]] = True
+        model_params["spin"]["use_spin"] = use_spin.tolist()
+    # include virtual spin and placeholder types
+    model_params["type_map"] += [item + "_spin" for item in model_params["type_map"]]
+    spin = Spin(
+        use_spin=model_params["spin"]["use_spin"],
+        virtual_scale=model_params["spin"]["virtual_scale"],
+    )
+    pair_exclude_types = spin.get_pair_exclude_types(
+        exclude_types=model_params.get("pair_exclude_types", None)
+    )
+    model_params["pair_exclude_types"] = pair_exclude_types
+    # for descriptor data stat
+    model_params["descriptor"]["exclude_types"] = pair_exclude_types
+    atom_exclude_types = spin.get_atom_exclude_types(
+        exclude_types=model_params.get("atom_exclude_types", None)
+    )
+    model_params["atom_exclude_types"] = atom_exclude_types
+    if (
+        "env_protection" not in model_params["descriptor"]
+        or model_params["descriptor"]["env_protection"] == 0.0
+    ):
+        model_params["descriptor"]["env_protection"] = 1e-6
+    if model_params["descriptor"]["type"] in ["se_e2_a"]:
+        # only expand sel for se_e2_a
+        model_params["descriptor"]["sel"] += model_params["descriptor"]["sel"]
+    backbone_model = get_standard_model(model_params)
+    return SpinEnergyModel(backbone_model=backbone_model, spin=spin)
+
+
+def get_linear_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    weights = model_params.get("weights", "mean")
+    list_of_models = []
+    ntypes = len(model_params["type_map"])
+    for sub_model_params in model_params["models"]:
+        if "descriptor" in sub_model_params:
+            # descriptor
+            sub_model_params["descriptor"]["ntypes"] = ntypes
+            descriptor, fitting, _ = _get_standard_model_components(
+                sub_model_params, ntypes
+            )
+            list_of_models.append(
+                DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
+            )
+
+        else:  # must be pairtab
+            assert (
+                "type" in sub_model_params and sub_model_params["type"] == "pairtab"
+            ), "Sub-models in LinearEnergyModel must be a DPModel or a PairTable Model"
+            list_of_models.append(
+                PairTabAtomicModel(
+                    sub_model_params["tab_file"],
+                    sub_model_params["rcut"],
+                    sub_model_params["sel"],
+                    type_map=model_params["type_map"],
+                )
+            )
+
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+    return LinearEnergyModel(
+        models=list_of_models,
+        type_map=model_params["type_map"],
+        weights=weights,
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+    )
+
+
+def get_zbl_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    ntypes = len(model_params["type_map"])
+    descriptor, fitting, _ = _get_standard_model_components(model_params, ntypes)
+    dp_model = DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
+    # pairtab
+    filepath = model_params["use_srtab"]
+    pt_model = PairTabAtomicModel(
+        filepath,
+        model_params["descriptor"]["rcut"],
+        model_params["descriptor"]["sel"],
+        type_map=model_params["type_map"],
+    )
+
+    rmin = model_params["sw_rmin"]
+    rmax = model_params["sw_rmax"]
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+    return DPZBLModel(
+        dp_model,
+        pt_model,
+        rmin,
+        rmax,
+        type_map=model_params["type_map"],
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+    )
+
+
+def _can_be_converted_to_float(value):
+    try:
+        float(value)
+        return True
+    except (TypeError, ValueError):
+        # return false for any failure...
+        return False
+
+
+def _convert_preset_out_bias_to_array(preset_out_bias, type_map):
+    if preset_out_bias is not None:
+        for kk in preset_out_bias:
+            if len(preset_out_bias[kk]) != len(type_map):
+                raise ValueError(
+                    "length of the preset_out_bias should be the same as the type_map"
+                )
+            for jj in range(len(preset_out_bias[kk])):
+                if preset_out_bias[kk][jj] is not None:
+                    if isinstance(preset_out_bias[kk][jj], list):
+                        bb = preset_out_bias[kk][jj]
+                    elif _can_be_converted_to_float(preset_out_bias[kk][jj]):
+                        bb = [float(preset_out_bias[kk][jj])]
+                    else:
+                        raise ValueError(
+                            f"unsupported type/value of the {jj}th element of "
+                            f"preset_out_bias['{kk}'] "
+                            f"{type(preset_out_bias[kk][jj])}"
+                        )
+                    preset_out_bias[kk][jj] = np.array(bb)
+    return preset_out_bias
+
+
+def get_standard_model(model_params):
+    model_params_old = model_params
+    model_params = copy.deepcopy(model_params)
+    ntypes = len(model_params["type_map"])
+    descriptor, fitting, fitting_net_type = _get_standard_model_components(
+        model_params, ntypes
+    )
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+    preset_out_bias = model_params.get("preset_out_bias")
+    preset_out_bias = _convert_preset_out_bias_to_array(
+        preset_out_bias, model_params["type_map"]
+    )
+
+    if fitting_net_type == "dipole":
+        modelcls = DipoleModel
+    elif fitting_net_type == "polar":
+        modelcls = PolarModel
+    elif fitting_net_type == "dos":
+        modelcls = DOSModel
+    elif fitting_net_type in ["ener", "direct_force_ener"]:
+        modelcls = EnergyModel
+    elif fitting_net_type == "property":
+        modelcls = PropertyModel
+    else:
+        raise RuntimeError(f"Unknown fitting type: {fitting_net_type}")
+
+    model = modelcls(
+        descriptor=descriptor,
+        fitting=fitting,
+        type_map=model_params["type_map"],
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+        preset_out_bias=preset_out_bias,
+    )
+    model.model_def_script = json.dumps(model_params_old)
+    return model
+
+
+def get_model(model_params):
+    model_type = model_params.get("type", "standard")
+    if model_type == "standard":
+        if "spin" in model_params:
+            return get_spin_model(model_params)
+        elif "use_srtab" in model_params:
+            return get_zbl_model(model_params)
+        else:
+            return get_standard_model(model_params)
+    elif model_type == "linear_ener":
+        return get_linear_model(model_params)
+    else:
+        return BaseModel.get_class_by_type(model_type).get_model(model_params)
+
+
+__all__ = [
+    "BaseModel",
+    "get_model",
+    "DPModelCommon",
+    "EnergyModel",
+    "DipoleModel",
+    "PolarModel",
+    "DOSModel",
+    "FrozenModel",
+    "SpinModel",
+    "SpinEnergyModel",
+    "DPZBLModel",
+    "make_model",
+    "make_hessian_model",
+    "LinearEnergyModel",
+]
diff --git a/deepmd/pd/model/model/dipole_model.py b/deepmd/pd/model/model/dipole_model.py
new file mode 100644
index 0000000000..1bbb315661
--- /dev/null
+++ b/deepmd/pd/model/model/dipole_model.py
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPDipoleAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPDOSModel_ = make_model(DPDipoleAtomicModel)
+
+
+@BaseModel.register("dipole")
+class DipoleModel(DPModelCommon, DPDOSModel_):
+    model_type = "dipole"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        DPModelCommon.__init__(self)
+        DPDOSModel_.__init__(self, *args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "dipole": deepcopy(out_def_data["dipole"]),
+            "global_dipole": deepcopy(out_def_data["dipole_redu"]),
+        }
+        if self.do_grad_r("dipole"):
+            output_def["force"] = deepcopy(out_def_data["dipole_derv_r"])
+            output_def["force"].squeeze(-2)
+        if self.do_grad_c("dipole"):
+            output_def["virial"] = deepcopy(out_def_data["dipole_derv_c_redu"])
+            output_def["virial"].squeeze(-2)
+            output_def["atom_virial"] = deepcopy(out_def_data["dipole_derv_c"])
+            output_def["atom_virial"].squeeze(-3)
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["dipole"] = model_ret["dipole"]
+            model_predict["global_dipole"] = model_ret["dipole_redu"]
+            if self.do_grad_r("dipole"):
+                model_predict["force"] = model_ret["dipole_derv_r"].squeeze(-2)
+            if self.do_grad_c("dipole"):
+                model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["atom_virial"] = model_ret["dipole_derv_c"].squeeze(
+                        -3
+                    )
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["dipole"] = model_ret["dipole"]
+            model_predict["global_dipole"] = model_ret["dipole_redu"]
+            if self.do_grad_r("dipole"):
+                model_predict["extended_force"] = model_ret["dipole_derv_r"].squeeze(-2)
+            if self.do_grad_c("dipole"):
+                model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["extended_virial"] = model_ret[
+                        "dipole_derv_c"
+                    ].squeeze(-3)
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pd/model/model/dos_model.py b/deepmd/pd/model/model/dos_model.py
new file mode 100644
index 0000000000..f2c75a7138
--- /dev/null
+++ b/deepmd/pd/model/model/dos_model.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPDOSAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPDOSModel_ = make_model(DPDOSAtomicModel)
+
+
+@BaseModel.register("dos")
+class DOSModel(DPModelCommon, DPDOSModel_):
+    model_type = "dos"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        DPModelCommon.__init__(self)
+        DPDOSModel_.__init__(self, *args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_dos": deepcopy(out_def_data["dos"]),
+            "dos": deepcopy(out_def_data["dos_redu"]),
+        }
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    def get_numb_dos(self) -> int:
+        """Get the number of  DOS for DOSFittingNet."""
+        return self.get_fitting_net().dim_out
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pd/model/model/dp_linear_model.py b/deepmd/pd/model/model/dp_linear_model.py
new file mode 100644
index 0000000000..48a6c8e74c
--- /dev/null
+++ b/deepmd/pd/model/model/dp_linear_model.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    LinearEnergyAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPLinearModel_ = make_model(LinearEnergyAtomicModel)
+
+
+@BaseModel.register("linear_ener")
+class LinearEnergyModel(DPLinearModel_):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_energy": deepcopy(out_def_data["energy"]),
+            "energy": deepcopy(out_def_data["energy_redu"]),
+        }
+        if self.do_grad_r("energy"):
+            output_def["force"] = deepcopy(out_def_data["energy_derv_r"])
+            output_def["force"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            output_def["virial"] = deepcopy(out_def_data["energy_derv_c_redu"])
+            output_def["virial"].squeeze(-2)
+            output_def["atom_virial"] = deepcopy(out_def_data["energy_derv_c"])
+            output_def["atom_virial"].squeeze(-3)
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(-3)
+        else:
+            model_predict["force"] = model_ret["dforce"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["extended_virial"] = model_ret["energy_derv_c"].squeeze(
+                    -3
+                )
+        else:
+            assert model_ret["dforce"] is not None
+            model_predict["dforce"] = model_ret["dforce"]
+        return model_predict
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        type_map = local_jdata_cpy["type_map"]
+        min_nbor_dist = None
+        for idx, sub_model in enumerate(local_jdata_cpy["models"]):
+            if "tab_file" not in sub_model:
+                sub_model, temp_min = DPModelCommon.update_sel(
+                    train_data, type_map, local_jdata["models"][idx]
+                )
+                if min_nbor_dist is None or temp_min <= min_nbor_dist:
+                    min_nbor_dist = temp_min
+        return local_jdata_cpy, min_nbor_dist
diff --git a/deepmd/pd/model/model/dp_model.py b/deepmd/pd/model/model/dp_model.py
new file mode 100644
index 0000000000..1e1cee6826
--- /dev/null
+++ b/deepmd/pd/model/model/dp_model.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+from deepmd.pd.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+
+class DPModelCommon:
+    """A base class to implement common methods for all the Models."""
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["descriptor"], min_nbor_dist = BaseDescriptor.update_sel(
+            train_data, type_map, local_jdata["descriptor"]
+        )
+        return local_jdata_cpy, min_nbor_dist
+
+    def get_fitting_net(self):
+        """Get the fitting network."""
+        return self.atomic_model.fitting_net
+
+    def get_descriptor(self):
+        """Get the descriptor."""
+        return self.atomic_model.descriptor
diff --git a/deepmd/pd/model/model/dp_zbl_model.py b/deepmd/pd/model/model/dp_zbl_model.py
new file mode 100644
index 0000000000..eb1b194ce4
--- /dev/null
+++ b/deepmd/pd/model/model/dp_zbl_model.py
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPZBLModel_ = make_model(DPZBLLinearEnergyAtomicModel)
+
+
+@BaseModel.register("zbl")
+class DPZBLModel(DPZBLModel_):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_energy": deepcopy(out_def_data["energy"]),
+            "energy": deepcopy(out_def_data["energy_redu"]),
+        }
+        if self.do_grad_r("energy"):
+            output_def["force"] = deepcopy(out_def_data["energy_derv_r"])
+            output_def["force"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            output_def["virial"] = deepcopy(out_def_data["energy_derv_c_redu"])
+            output_def["virial"].squeeze(-2)
+            output_def["atom_virial"] = deepcopy(out_def_data["energy_derv_c"])
+            output_def["atom_virial"].squeeze(-3)
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(-3)
+        else:
+            model_predict["force"] = model_ret["dforce"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["extended_virial"] = model_ret["energy_derv_c"].squeeze(
+                    -3
+                )
+        else:
+            assert model_ret["dforce"] is not None
+            model_predict["dforce"] = model_ret["dforce"]
+        return model_predict
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["dpmodel"], min_nbor_dist = DPModelCommon.update_sel(
+            train_data, type_map, local_jdata["dpmodel"]
+        )
+        return local_jdata_cpy, min_nbor_dist
diff --git a/deepmd/pd/model/model/ener_model.py b/deepmd/pd/model/model/ener_model.py
new file mode 100644
index 0000000000..3f3db4a527
--- /dev/null
+++ b/deepmd/pd/model/model/ener_model.py
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPEnergyAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPEnergyModel_ = make_model(DPEnergyAtomicModel)
+
+
+@BaseModel.register("ener")
+class EnergyModel(DPModelCommon, DPEnergyModel_):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        DPModelCommon.__init__(self)
+        DPEnergyModel_.__init__(self, *args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_energy": deepcopy(out_def_data["energy"]),
+            "energy": deepcopy(out_def_data["energy_redu"]),
+        }
+        if self.do_grad_r("energy"):
+            output_def["force"] = deepcopy(out_def_data["energy_derv_r"])
+            output_def["force"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            output_def["virial"] = deepcopy(out_def_data["energy_derv_c_redu"])
+            output_def["virial"].squeeze(-2)
+            output_def["atom_virial"] = deepcopy(out_def_data["energy_derv_c"])
+            output_def["atom_virial"].squeeze(-3)
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_energy"] = model_ret["energy"]
+            model_predict["energy"] = model_ret["energy_redu"]
+            if self.do_grad_r("energy"):
+                model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+            if self.do_grad_c("energy"):
+                model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(
+                        -3
+                    )
+            else:
+                model_predict["force"] = model_ret["dforce"]
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            comm_dict=comm_dict,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_energy"] = model_ret["energy"]
+            model_predict["energy"] = model_ret["energy_redu"]
+            if self.do_grad_r("energy"):
+                model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+            if self.do_grad_c("energy"):
+                model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["extended_virial"] = model_ret[
+                        "energy_derv_c"
+                    ].squeeze(-3)
+            else:
+                assert model_ret["dforce"] is not None
+                model_predict["dforce"] = model_ret["dforce"]
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pd/model/model/frozen.py b/deepmd/pd/model/model/frozen.py
new file mode 100644
index 0000000000..f7bc0a4556
--- /dev/null
+++ b/deepmd/pd/model/model/frozen.py
@@ -0,0 +1,180 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+
+@BaseModel.register("frozen")
+class FrozenModel(BaseModel):
+    """Load model from a frozen model, which cannot be trained.
+
+    Parameters
+    ----------
+    model_file : str
+        The path to the frozen model
+    """
+
+    def __init__(self, model_file: str, **kwargs):
+        super().__init__(**kwargs)
+        self.model_file = model_file
+        if model_file.endswith(".json"):
+            self.model = paddle.jit.load(model_file.split(".json")[0])
+        else:
+            raise NotImplementedError("Only support .json file")
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of developer implemented atomic models."""
+        return self.model.fitting_output_def()
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.model.get_rcut()
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map."""
+        return self.model.get_type_map()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.model.get_sel()
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.model.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.model.get_dim_aparam()
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.model.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.model.is_aparam_nall()
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.model.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.model.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the model needs sorted nlist when using `forward_lower`."""
+        return self.model.need_sorted_nlist_for_lower()
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        return self.model.forward(
+            coord,
+            atype,
+            box=box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        # try to use the original script instead of "frozen model"
+        # Note: this cannot change the script of the parent model
+        # it may still try to load hard-coded filename, which might
+        # be a problem
+        return self.model.get_model_def_script()
+
+    def get_min_nbor_dist(self) -> Optional[float]:
+        """Get the minimum neighbor distance."""
+        return self.model.get_min_nbor_dist()
+
+    def serialize(self) -> dict:
+        from deepmd.pd.model.model import (
+            get_model,
+        )
+
+        # try to recover the original model
+        model_def_script = json.loads(self.get_model_def_script())
+        model = get_model(model_def_script)
+        model.set_state_dict(self.model.state_dict())
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        raise RuntimeError("Should not touch here.")
+
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nnei()
+
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nsel()
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statictics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        return local_jdata, None
+
+    def model_output_type(self) -> str:
+        """Get the output type for the model."""
+        return self.model.model_output_type()
diff --git a/deepmd/pd/model/model/make_hessian_model.py b/deepmd/pd/model/model/make_hessian_model.py
new file mode 100644
index 0000000000..19222a9f1d
--- /dev/null
+++ b/deepmd/pd/model/model/make_hessian_model.py
@@ -0,0 +1,215 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import math
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    get_hessian_name,
+)
+
+
+def make_hessian_model(T_Model):
+    """Make a model that can compute Hessian.
+
+    LIMITATION: this model is not jitable due to the restrictions of paddle jit script.
+
+    LIMITATION: only the hessian of `forward_common` is available.
+
+    Parameters
+    ----------
+    T_Model
+        The model. Should provide the `forward_common` and `atomic_output_def` methods
+
+    Returns
+    -------
+    The model computes hessian.
+
+    """
+
+    class CM(T_Model):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(
+                *args,
+                **kwargs,
+            )
+            self.hess_fitting_def = copy.deepcopy(super().atomic_output_def())
+
+        def requires_hessian(
+            self,
+            keys: Union[str, list[str]],
+        ):
+            """Set which output variable(s) requires hessian."""
+            if isinstance(keys, str):
+                keys = [keys]
+            for kk in self.hess_fitting_def.keys():
+                if kk in keys:
+                    self.hess_fitting_def[kk].r_hessian = True
+
+        def atomic_output_def(self):
+            """Get the fitting output def."""
+            return self.hess_fitting_def
+
+        def forward_common(
+            self,
+            coord,
+            atype,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ) -> dict[str, paddle.Tensor]:
+            """Return model prediction.
+
+            Parameters
+            ----------
+            coord
+                The coordinates of the atoms.
+                shape: nf x (nloc x 3)
+            atype
+                The type of atoms. shape: nf x nloc
+            box
+                The simulation box. shape: nf x 9
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                If calculate the atomic virial.
+
+            Returns
+            -------
+            ret_dict
+                The result dict of type Dict[str,paddle.Tensor].
+                The keys are defined by the `ModelOutputDef`.
+
+            """
+            ret = super().forward_common(
+                coord,
+                atype,
+                box=box,
+                fparam=fparam,
+                aparam=aparam,
+                do_atomic_virial=do_atomic_virial,
+            )
+            vdef = self.atomic_output_def()
+            hess_yes = [vdef[kk].r_hessian for kk in vdef.keys()]
+            if any(hess_yes):
+                hess = self._cal_hessian_all(
+                    coord,
+                    atype,
+                    box=box,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                ret.update(hess)
+            return ret
+
+        def _cal_hessian_all(
+            self,
+            coord: paddle.Tensor,
+            atype: paddle.Tensor,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+        ) -> dict[str, paddle.Tensor]:
+            raise NotImplementedError("paddle do not support full hessian.")
+            nf, nloc = atype.shape
+            coord = coord.reshape([nf, (nloc * 3)])
+            box = box.reshape([nf, 9]) if box is not None else None
+            fparam = fparam.reshape([nf, -1]) if fparam is not None else None
+            aparam = aparam.reshape([nf, nloc, -1]) if aparam is not None else None
+            fdef = self.atomic_output_def()
+            # keys of values that require hessian
+            hess_keys: list[str] = []
+            for kk in fdef.keys():
+                if fdef[kk].r_hessian:
+                    hess_keys.append(kk)
+            # result dict init by empty lists
+            res = {get_hessian_name(kk): [] for kk in hess_keys}
+            # loop over variable
+            for kk in hess_keys:
+                vdef = fdef[kk]
+                vshape = vdef.shape
+                vsize = math.prod(vdef.shape)
+                # loop over frames
+                for ii in range(nf):
+                    icoord = coord[ii]
+                    iatype = atype[ii]
+                    ibox = box[ii] if box is not None else None
+                    ifparam = fparam[ii] if fparam is not None else None
+                    iaparam = aparam[ii] if aparam is not None else None
+                    # loop over all components
+                    for idx in range(vsize):
+                        hess = self._cal_hessian_one_component(
+                            idx, icoord, iatype, ibox, ifparam, iaparam
+                        )
+                        res[get_hessian_name(kk)].append(hess)
+                res[get_hessian_name(kk)] = paddle.stack(
+                    res[get_hessian_name(kk)]
+                ).reshape((nf, *vshape, nloc * 3, nloc * 3))
+            return res
+
+        def _cal_hessian_one_component(
+            self,
+            ci,
+            coord,
+            atype,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+        ) -> paddle.Tensor:
+            raise NotImplementedError("paddle do not support full hessian.")
+            # coord, # (nloc x 3)
+            # atype, # nloc
+            # box: Optional[paddle.Tensor] = None,     # 9
+            # fparam: Optional[paddle.Tensor] = None,  # nfp
+            # aparam: Optional[paddle.Tensor] = None,  # (nloc x nap)
+            wc = wrapper_class_forward_energy(self, ci, atype, box, fparam, aparam)
+
+            hess = paddle.autograd.hessian(
+                wc,
+                coord,
+            )
+            return hess
+
+    class wrapper_class_forward_energy:
+        def __init__(
+            self,
+            obj: CM,
+            ci: int,
+            atype: paddle.Tensor,
+            box: Optional[paddle.Tensor],
+            fparam: Optional[paddle.Tensor],
+            aparam: Optional[paddle.Tensor],
+        ):
+            self.atype, self.box, self.fparam, self.aparam = atype, box, fparam, aparam
+            self.ci = ci
+            self.obj = obj
+
+        def __call__(
+            self,
+            xx,
+        ):
+            ci = self.ci
+            atype, box, fparam, aparam = self.atype, self.box, self.fparam, self.aparam
+            res = super(CM, self.obj).forward_common(
+                xx.unsqueeze(0),
+                atype.unsqueeze(0),
+                box.unsqueeze(0) if box is not None else None,
+                fparam.unsqueeze(0) if fparam is not None else None,
+                aparam.unsqueeze(0) if aparam is not None else None,
+                do_atomic_virial=False,
+            )
+            er = res["energy_redu"][0].reshape([-1])[ci]
+            return er
+
+    return CM
diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py
new file mode 100644
index 0000000000..258ba5d2fc
--- /dev/null
+++ b/deepmd/pd/model/model/make_model.py
@@ -0,0 +1,587 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    ModelOutputDef,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableCategory,
+    OutputVariableOperation,
+    check_operation_applied,
+)
+from deepmd.pd.model.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.pd.model.model.transform_output import (
+    communicate_extended_output,
+    fit_output_to_model_output,
+)
+from deepmd.pd.utils import (
+    decomp,
+)
+from deepmd.pd.utils.env import (
+    GLOBAL_PD_ENER_FLOAT_PRECISION,
+    GLOBAL_PD_FLOAT_PRECISION,
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+    nlist_distinguish_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+def make_model(T_AtomicModel: type[BaseAtomicModel]):
+    """Make a model as a derived class of an atomic model.
+
+    The model provide two interfaces.
+
+    1. the `forward_common_lower`, that takes extended coordinates, atyps and neighbor list,
+    and outputs the atomic and property and derivatives (if required) on the extended region.
+
+    2. the `forward_common`, that takes coordinates, atypes and cell and predicts
+    the atomic and reduced property, and derivatives (if required) on the local region.
+
+    Parameters
+    ----------
+    T_AtomicModel
+        The atomic model.
+
+    Returns
+    -------
+    CM
+        The model.
+
+    """
+
+    class CM(BaseModel):
+        def __init__(
+            self,
+            *args,
+            # underscore to prevent conflict with normal inputs
+            atomic_model_: Optional[T_AtomicModel] = None,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            if atomic_model_ is not None:
+                self.atomic_model: T_AtomicModel = atomic_model_
+            else:
+                self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs)
+            self.precision_dict = PRECISION_DICT
+            self.reverse_precision_dict = RESERVED_PRECISON_DICT
+            self.global_pd_float_precision = GLOBAL_PD_FLOAT_PRECISION
+            self.global_pd_ener_float_precision = GLOBAL_PD_ENER_FLOAT_PRECISION
+
+        def model_output_def(self):
+            """Get the output def for the model."""
+            return ModelOutputDef(self.atomic_output_def())
+
+        def model_output_type(self) -> list[str]:
+            """Get the output type for the model."""
+            output_def = self.model_output_def()
+            var_defs = output_def.var_defs
+            # jit: Comprehension ifs are not supported yet
+            # type hint is critical for JIT
+            vars: list[str] = []
+            for kk, vv in var_defs.items():
+                # .value is critical for JIT
+                if vv.category == OutputVariableCategory.OUT.value:
+                    vars.append(kk)
+            return vars
+
+        # cannot use the name forward. paddle script does not work
+        def forward_common(
+            self,
+            coord,
+            atype,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ) -> dict[str, paddle.Tensor]:
+            """Return model prediction.
+
+            Parameters
+            ----------
+            coord
+                The coordinates of the atoms.
+                shape: nf x (nloc x 3)
+            atype
+                The type of atoms. shape: nf x nloc
+            box
+                The simulation box. shape: nf x 9
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                If calculate the atomic virial.
+
+            Returns
+            -------
+            ret_dict
+                The result dict of type Dict[str,paddle.Tensor].
+                The keys are defined by the `ModelOutputDef`.
+
+            """
+            cc, bb, fp, ap, input_prec = self.input_type_cast(
+                coord, box=box, fparam=fparam, aparam=aparam
+            )
+            del coord, box, fparam, aparam
+            (
+                extended_coord,
+                extended_atype,
+                mapping,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                cc,
+                atype,
+                self.get_rcut(),
+                self.get_sel(),
+                mixed_types=self.mixed_types(),
+                box=bb,
+            )
+            model_predict_lower = self.forward_common_lower(
+                extended_coord,
+                extended_atype,
+                nlist,
+                mapping,
+                do_atomic_virial=do_atomic_virial,
+                fparam=fp,
+                aparam=ap,
+            )
+            model_predict = communicate_extended_output(
+                model_predict_lower,
+                self.model_output_def(),
+                mapping,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def get_out_bias(self) -> paddle.Tensor:
+            return self.atomic_model.get_out_bias()
+
+        def set_out_bias(self, out_bias: paddle.Tensor) -> None:
+            self.atomic_model.set_out_bias(out_bias)
+
+        def change_out_bias(
+            self,
+            merged,
+            bias_adjust_mode="change-by-statistic",
+        ) -> None:
+            """Change the output bias of atomic model according to the input data and the pretrained model.
+
+            Parameters
+            ----------
+            merged : Union[Callable[[], List[dict]], List[dict]]
+                - List[dict]: A list of data samples from various data systems.
+                    Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                    originating from the `i`-th data system.
+                - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                    only when needed. Since the sampling process can be slow and memory-intensive,
+                    the lazy function helps by only sampling once.
+            bias_adjust_mode : str
+                The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+                'change-by-statistic' : perform predictions on labels of target dataset,
+                        and do least square on the errors to obtain the target shift as bias.
+                'set-by-statistic' : directly use the statistic output bias in the target dataset.
+            """
+            self.atomic_model.change_out_bias(
+                merged,
+                bias_adjust_mode=bias_adjust_mode,
+            )
+
+        def forward_common_lower(
+            self,
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+            do_atomic_virial: bool = False,
+            comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+            extra_nlist_sort: bool = False,
+        ):
+            """Return model prediction. Lower interface that takes
+            extended atomic coordinates and types, nlist, and mapping
+            as input, and returns the predictions on the extended region.
+            The predictions are not reduced.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x (nall x 3)
+            extended_atype
+                atomic type in extended region. nf x nall
+            nlist
+                neighbor list. nf x nloc x nsel.
+            mapping
+                mapps the extended indices to local indices. nf x nall.
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                whether calculate atomic virial.
+            comm_dict
+                The data needed for communication for parallel inference.
+            extra_nlist_sort
+                whether to forcibly sort the nlist.
+
+            Returns
+            -------
+            result_dict
+                the result dict, defined by the `FittingOutputDef`.
+
+            """
+            nframes, nall = extended_atype.shape[:2]
+            extended_coord = extended_coord.reshape([nframes, -1, 3])
+            nlist = self.format_nlist(
+                extended_coord, extended_atype, nlist, extra_nlist_sort=extra_nlist_sort
+            )
+            cc_ext, _, fp, ap, input_prec = self.input_type_cast(
+                extended_coord, fparam=fparam, aparam=aparam
+            )
+            del extended_coord, fparam, aparam
+            atomic_ret = self.atomic_model.forward_common_atomic(
+                cc_ext,
+                extended_atype,
+                nlist,
+                mapping=mapping,
+                fparam=fp,
+                aparam=ap,
+                comm_dict=comm_dict,
+            )
+            model_predict = fit_output_to_model_output(
+                atomic_ret,
+                self.atomic_output_def(),
+                cc_ext,
+                do_atomic_virial=do_atomic_virial,
+                create_graph=self.training,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def input_type_cast(
+            self,
+            coord: paddle.Tensor,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+        ) -> tuple[
+            paddle.Tensor,
+            Optional[paddle.Tensor],
+            Optional[paddle.Tensor],
+            Optional[paddle.Tensor],
+            str,
+        ]:
+            """Cast the input data to global float type."""
+            input_prec = self.reverse_precision_dict[coord.dtype]
+            ###
+            ### type checking would not pass jit, convert to coord prec anyway
+            ###
+            # for vv, kk in zip([fparam, aparam], ["frame", "atomic"]):
+            #     if vv is not None and self.reverse_precision_dict[vv.dtype] != input_prec:
+            #         log.warning(
+            #           f"type of {kk} parameter {self.reverse_precision_dict[vv.dtype]}"
+            #           " does not match"
+            #           f" that of the coordinate {input_prec}"
+            #         )
+            _lst: list[Optional[paddle.Tensor]] = [
+                vv.astype(coord.dtype) if vv is not None else None
+                for vv in [box, fparam, aparam]
+            ]
+            box, fparam, aparam = _lst
+            if (
+                input_prec
+                == self.reverse_precision_dict[self.global_pd_float_precision]
+            ):
+                return coord, box, fparam, aparam, input_prec
+            else:
+                pp = self.global_pd_float_precision
+                return (
+                    coord.to(pp),
+                    box.to(pp) if box is not None else None,
+                    fparam.to(pp) if fparam is not None else None,
+                    aparam.to(pp) if aparam is not None else None,
+                    input_prec,
+                )
+
+        def output_type_cast(
+            self,
+            model_ret: dict[str, paddle.Tensor],
+            input_prec: str,
+        ) -> dict[str, paddle.Tensor]:
+            """Convert the model output to the input prec."""
+            do_cast = (
+                input_prec
+                != self.reverse_precision_dict[self.global_pd_float_precision]
+            )
+            pp = self.precision_dict[input_prec]
+            odef = self.model_output_def()
+            for kk in odef.keys():
+                if kk not in model_ret.keys():
+                    # do not return energy_derv_c if not do_atomic_virial
+                    continue
+                if check_operation_applied(odef[kk], OutputVariableOperation.REDU):
+                    model_ret[kk] = (
+                        model_ret[kk].to(self.global_pd_ener_float_precision)
+                        if model_ret[kk] is not None
+                        else None
+                    )
+                elif do_cast:
+                    model_ret[kk] = (
+                        model_ret[kk].to(pp) if model_ret[kk] is not None else None
+                    )
+            return model_ret
+
+        def format_nlist(
+            self,
+            extended_coord: paddle.Tensor,
+            extended_atype: paddle.Tensor,
+            nlist: paddle.Tensor,
+            extra_nlist_sort: bool = False,
+        ):
+            """Format the neighbor list.
+
+            1. If the number of neighbors in the `nlist` is equal to sum(self.sel),
+            it does nothong
+
+            2. If the number of neighbors in the `nlist` is smaller than sum(self.sel),
+            the `nlist` is pad with -1.
+
+            3. If the number of neighbors in the `nlist` is larger than sum(self.sel),
+            the nearest sum(sel) neighbors will be preseved.
+
+            Known limitations:
+
+            In the case of not self.mixed_types, the nlist is always formatted.
+            May have side effact on the efficiency.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x nall x 3
+            extended_atype
+                atomic type in extended region. nf x nall
+            nlist
+                neighbor list. nf x nloc x nsel
+            extra_nlist_sort
+                whether to forcibly sort the nlist.
+
+            Returns
+            -------
+            formated_nlist
+                the formated nlist.
+
+            """
+            mixed_types = self.mixed_types()
+            nlist = self._format_nlist(
+                extended_coord,
+                nlist,
+                sum(self.get_sel()),
+                extra_nlist_sort=extra_nlist_sort,
+            )
+            if not mixed_types:
+                nlist = nlist_distinguish_types(nlist, extended_atype, self.get_sel())
+            return nlist
+
+        def _format_nlist(
+            self,
+            extended_coord: paddle.Tensor,
+            nlist: paddle.Tensor,
+            nnei: int,
+            extra_nlist_sort: bool = False,
+        ):
+            n_nf, n_nloc, n_nnei = nlist.shape
+            # nf x nall x 3
+            extended_coord = extended_coord.reshape([n_nf, -1, 3])
+            rcut = self.get_rcut()
+
+            if n_nnei < nnei:
+                nlist = paddle.concat(
+                    [
+                        nlist,
+                        -1
+                        * paddle.ones(
+                            [n_nf, n_nloc, nnei - n_nnei],
+                            dtype=nlist.dtype,
+                        ),
+                    ],
+                    axis=-1,
+                )
+
+            if n_nnei > nnei or extra_nlist_sort:
+                n_nf, n_nloc, n_nnei = nlist.shape
+                m_real_nei = nlist >= 0
+                nlist = paddle.where(m_real_nei, nlist, paddle.zeros_like(nlist))
+                # nf x nloc x 3
+                coord0 = extended_coord[:, :n_nloc, :]
+                # nf x (nloc x nnei) x 3
+                index = nlist.reshape([n_nf, n_nloc * n_nnei, 1]).expand([-1, -1, 3])
+                coord1 = decomp.take_along_axis(extended_coord, axis=1, indices=index)
+                # nf x nloc x nnei x 3
+                coord1 = coord1.reshape([n_nf, n_nloc, n_nnei, 3])
+                # nf x nloc x nnei
+                # rr = paddle.linalg.norm(coord0[:, :, None, :] - coord1, axis=-1)
+                rr = decomp.norm(coord0[:, :, None, :] - coord1, axis=-1)
+                rr = paddle.where(m_real_nei, rr, float("inf"))
+                rr, nlist_mapping = (
+                    paddle.sort(rr, axis=-1),
+                    paddle.argsort(rr, axis=-1),
+                )
+                nlist = decomp.take_along_axis(nlist, axis=2, indices=nlist_mapping)
+                nlist = paddle.where(rr > rcut, paddle.full_like(nlist, -1), nlist)
+                nlist = nlist[..., :nnei]
+            else:  # not extra_nlist_sort and n_nnei <= nnei:
+                pass  # great!
+            assert nlist.shape[-1] == nnei
+            return nlist
+
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+            """
+            return self.atomic_model.do_grad_r(var_name)
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+            """
+            return self.atomic_model.do_grad_c(var_name)
+
+        def change_type_map(
+            self, type_map: list[str], model_with_new_type_stat=None
+        ) -> None:
+            """Change the type related params to new ones, according to `type_map` and the original one in the model.
+            If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+            """
+            self.atomic_model.change_type_map(
+                type_map=type_map,
+                model_with_new_type_stat=model_with_new_type_stat.atomic_model
+                if model_with_new_type_stat is not None
+                else None,
+            )
+
+        def serialize(self) -> dict:
+            return self.atomic_model.serialize()
+
+        @classmethod
+        def deserialize(cls, data) -> "CM":
+            return cls(atomic_model_=T_AtomicModel.deserialize(data))
+
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+            return self.atomic_model.get_dim_fparam()
+
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+            return self.atomic_model.get_dim_aparam()
+
+        def get_sel_type(self) -> list[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+            return self.atomic_model.get_sel_type()
+
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+            return self.atomic_model.is_aparam_nall()
+
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            return self.atomic_model.get_rcut()
+
+        def get_type_map(self) -> list[str]:
+            """Get the type map."""
+            return self.atomic_model.get_type_map()
+
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nsel()
+
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nnei()
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model."""
+            return self.atomic_model.atomic_output_def()
+
+        def compute_or_load_stat(
+            self,
+            sampled_func,
+            stat_file_path: Optional[DPPath] = None,
+        ):
+            """Compute or load the statistics."""
+            return self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
+
+        def get_sel(self) -> list[int]:
+            """Returns the number of selected atoms for each type."""
+            return self.atomic_model.get_sel()
+
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            return self.atomic_model.mixed_types()
+
+        def has_message_passing(self) -> bool:
+            """Returns whether the model has message passing."""
+            return self.atomic_model.has_message_passing()
+
+        def need_sorted_nlist_for_lower(self) -> bool:
+            """Returns whether the model needs sorted nlist when using `forward_lower`."""
+            return self.atomic_model.need_sorted_nlist_for_lower()
+
+        def forward(
+            self,
+            coord,
+            atype,
+            box: Optional[paddle.Tensor] = None,
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ) -> dict[str, paddle.Tensor]:
+            # directly call the forward_common method when no specific transform rule
+            return self.forward_common(
+                coord,
+                atype,
+                box,
+                fparam=fparam,
+                aparam=aparam,
+                do_atomic_virial=do_atomic_virial,
+            )
+
+    return CM
diff --git a/deepmd/pd/model/model/model.py b/deepmd/pd/model/model/model.py
new file mode 100644
index 0000000000..06a2c6910f
--- /dev/null
+++ b/deepmd/pd/model/model/model.py
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel.model.base_model import (
+    make_base_model,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+class BaseModel(paddle.nn.Layer, make_base_model()):
+    def __init__(self, *args, **kwargs):
+        """Construct a basic model for different tasks."""
+        paddle.nn.Layer.__init__(self)
+        self.model_def_script = ""
+        self.min_nbor_dist = None
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The sampled data frames from different data systems.
+        stat_file_path
+            The path to the statistics files.
+        """
+        raise NotImplementedError
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
+
+    def get_min_nbor_dist(self) -> Optional[float]:
+        """Get the minimum distance between two atoms."""
+        return self.min_nbor_dist
+
+    def get_ntypes(self):
+        """Returns the number of element types."""
+        return len(self.get_type_map())
diff --git a/deepmd/pd/model/model/polar_model.py b/deepmd/pd/model/model/polar_model.py
new file mode 100644
index 0000000000..fb79b4b79a
--- /dev/null
+++ b/deepmd/pd/model/model/polar_model.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPPolarAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPDOSModel_ = make_model(DPPolarAtomicModel)
+
+
+@BaseModel.register("polar")
+class PolarModel(DPModelCommon, DPDOSModel_):
+    model_type = "polar"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        DPDOSModel_.__init__(self, *args, **kwargs)
+        DPModelCommon.__init__(self)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "polar": deepcopy(out_def_data["polarizability"]),
+            "global_polar": deepcopy(out_def_data["polarizability_redu"]),
+        }
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["polar"] = model_ret["polarizability"]
+            model_predict["global_polar"] = model_ret["polarizability_redu"]
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["polar"] = model_ret["polarizability"]
+            model_predict["global_polar"] = model_ret["polarizability_redu"]
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pd/model/model/property_model.py b/deepmd/pd/model/model/property_model.py
new file mode 100644
index 0000000000..3c0cf52b06
--- /dev/null
+++ b/deepmd/pd/model/model/property_model.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    DPPropertyAtomicModel,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+
+from .dp_model import (
+    DPModelCommon,
+)
+from .make_model import (
+    make_model,
+)
+
+DPPropertyModel_ = make_model(DPPropertyAtomicModel)
+
+
+@BaseModel.register("property")
+class PropertyModel(DPModelCommon, DPPropertyModel_):
+    model_type = "property"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        DPModelCommon.__init__(self)
+        DPPropertyModel_.__init__(self, *args, **kwargs)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_property": deepcopy(out_def_data["property"]),
+            "property": deepcopy(out_def_data["property_redu"]),
+        }
+        if "mask" in out_def_data:
+            output_def["mask"] = deepcopy(out_def_data["mask"])
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_predict = {}
+        model_predict["atom_property"] = model_ret["property"]
+        model_predict["property"] = model_ret["property_redu"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+        return model_predict
+
+    def get_task_dim(self) -> int:
+        """Get the output dimension of PropertyFittingNet."""
+        return self.get_fitting_net().dim_out
+
+    def get_intensive(self) -> bool:
+        """Get whether the property is intensive."""
+        return self.model_output_def()["property"].intensive
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            comm_dict=comm_dict,
+            extra_nlist_sort=self.need_sorted_nlist_for_lower(),
+        )
+        model_predict = {}
+        model_predict["atom_property"] = model_ret["property"]
+        model_predict["property"] = model_ret["property_redu"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+        return model_predict
diff --git a/deepmd/pd/model/model/spin_model.py b/deepmd/pd/model/model/spin_model.py
new file mode 100644
index 0000000000..0fcf90a7af
--- /dev/null
+++ b/deepmd/pd/model/model/spin_model.py
@@ -0,0 +1,623 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+from copy import (
+    deepcopy,
+)
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    ModelOutputDef,
+)
+from deepmd.pd.model.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.pd.utils import (
+    decomp,
+)
+from deepmd.pd.utils.utils import (
+    to_paddle_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+from .make_model import (
+    make_model,
+)
+
+
+class SpinModel(paddle.nn.Layer):
+    """A spin model wrapper, with spin input preprocess and output split."""
+
+    def __init__(
+        self,
+        backbone_model,
+        spin: Spin,
+    ):
+        super().__init__()
+        self.backbone_model = backbone_model
+        self.spin = spin
+        self.ntypes_real = self.spin.ntypes_real
+        self.virtual_scale_mask = to_paddle_tensor(self.spin.get_virtual_scale_mask())
+        self.spin_mask = to_paddle_tensor(self.spin.get_spin_mask())
+
+    def process_spin_input(self, coord, atype, spin):
+        """Generate virtual coordinates and types, concat into the input."""
+        nframes, nloc = atype.shape
+        coord = coord.reshape([nframes, nloc, 3])
+        spin = spin.reshape([nframes, nloc, 3])
+        atype_spin = paddle.concat([atype, atype + self.ntypes_real], axis=-1)
+        virtual_coord = coord + spin * (self.virtual_scale_mask.to(atype.place))[
+            atype
+        ].reshape([nframes, nloc, 1])
+        coord_spin = paddle.concat([coord, virtual_coord], axis=-2)
+        return coord_spin, atype_spin
+
+    def process_spin_input_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+    ):
+        """
+        Add `extended_spin` into `extended_coord` to generate virtual atoms, and extend `nlist` and `mapping`.
+        Note that the final `extended_coord_updated` with shape [nframes, nall + nall, 3] has the following order:
+        - [:, :nloc]: original nloc real atoms.
+        - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+        - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+        - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+        """
+        nframes, nall = extended_coord.shape[:2]
+        nloc = nlist.shape[1]
+        virtual_extended_coord = extended_coord + extended_spin * (
+            self.virtual_scale_mask.to(extended_atype.place)
+        )[extended_atype].reshape([nframes, nall, 1])
+        virtual_extended_atype = extended_atype + self.ntypes_real
+        extended_coord_updated = self.concat_switch_virtual(
+            extended_coord, virtual_extended_coord, nloc
+        )
+        extended_atype_updated = self.concat_switch_virtual(
+            extended_atype, virtual_extended_atype, nloc
+        )
+        if mapping is not None:
+            virtual_mapping = mapping + nloc
+            mapping_updated = self.concat_switch_virtual(mapping, virtual_mapping, nloc)
+        else:
+            mapping_updated = None
+        # extend the nlist
+        nlist_updated = self.extend_nlist(extended_atype, nlist)
+        return (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        )
+
+    def process_spin_output(
+        self, atype, out_tensor, add_mag: bool = True, virtual_scale: bool = True
+    ):
+        """
+        Split the output both real and virtual atoms, and scale the latter.
+        add_mag: whether to add magnetic tensor onto the real tensor.
+            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+        virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
+            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+        """
+        nframes, nloc_double = out_tensor.shape[:2]
+        nloc = nloc_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask.to(atype.place)
+        else:
+            virtual_scale_mask = self.spin_mask.to(atype.place)
+        atomic_mask = virtual_scale_mask[atype].reshape([nframes, nloc, 1])
+        out_real, out_mag = paddle.split(out_tensor, [nloc, nloc], axis=1)
+        if add_mag:
+            out_real = out_real + out_mag
+        shape2 = 1
+        for ss in out_real.shape[2:]:
+            shape2 *= ss
+        out_mag = (
+            out_mag.reshape([nframes, nloc, shape2]) * atomic_mask.astype(out_mag.dtype)
+        ).reshape(out_mag.shape)
+        return out_real, out_mag, atomic_mask > 0.0
+
+    def process_spin_output_lower(
+        self,
+        extended_atype,
+        extended_out_tensor,
+        nloc: int,
+        add_mag: bool = True,
+        virtual_scale: bool = True,
+    ):
+        """
+        Split the extended output of both real and virtual atoms with switch, and scale the latter.
+        add_mag: whether to add magnetic tensor onto the real tensor.
+            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+        virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
+            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+        """
+        nframes, nall_double = extended_out_tensor.shape[:2]
+        nall = nall_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask.to(extended_atype.place)
+        else:
+            virtual_scale_mask = self.spin_mask.to(extended_atype.place)
+        atomic_mask = virtual_scale_mask[extended_atype].reshape([nframes, nall, 1])
+        extended_out_real = paddle.concat(
+            [
+                extended_out_tensor[:, :nloc],
+                extended_out_tensor[:, nloc + nloc : nloc + nall],
+            ],
+            axis=1,
+        )
+        extended_out_mag = paddle.concat(
+            [
+                extended_out_tensor[:, nloc : nloc + nloc],
+                extended_out_tensor[:, nloc + nall :],
+            ],
+            axis=1,
+        )
+        if add_mag:
+            extended_out_real = extended_out_real + extended_out_mag
+        shape2 = 1
+        for ss in extended_out_tensor.shape[2:]:
+            shape2 *= ss
+        extended_out_mag = (
+            extended_out_mag.reshape([nframes, nall, shape2]) * atomic_mask
+        ).reshape(extended_out_mag.shape)
+        return extended_out_real, extended_out_mag, atomic_mask > 0.0
+
+    @staticmethod
+    def extend_nlist(extended_atype, nlist):
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_atype.shape[1]
+        nlist_mask = nlist != -1
+        nlist[nlist == -1] = 0
+        nlist_shift = nlist + nall
+        nlist[~nlist_mask] = -1
+        nlist_shift[~nlist_mask] = -1
+        self_real = (
+            paddle.arange(0, nloc, dtype=nlist.dtype)
+            .to(device=nlist.place)
+            .reshape([1, -1, 1])
+            .expand([nframes, -1, -1])
+        )
+        self_spin = self_real + nall
+        # real atom's neighbors: self spin + real neighbor + virtual neighbor
+        # nf x nloc x (1 + nnei + nnei)
+        real_nlist = paddle.concat([self_spin, nlist, nlist_shift], axis=-1)
+        # spin atom's neighbors: real + real neighbor + virtual neighbor
+        # nf x nloc x (1 + nnei + nnei)
+        spin_nlist = paddle.concat([self_real, nlist, nlist_shift], axis=-1)
+        # nf x (nloc + nloc) x (1 + nnei + nnei)
+        extended_nlist = paddle.concat([real_nlist, spin_nlist], axis=-2)
+        # update the index for switch
+        first_part_index = (nloc <= extended_nlist) & (extended_nlist < nall)
+        second_part_index = (nall <= extended_nlist) & (extended_nlist < (nall + nloc))
+        # extended_nlist[first_part_index] += nloc
+        extended_nlist = decomp.masked_add_(extended_nlist, first_part_index, nloc)
+        # extended_nlist[second_part_index] -= nall - nloc
+        entended_nlist = decomp.masked_add_(
+            extended_nlist, second_part_index, nloc - nall
+        )
+        return extended_nlist
+
+    @staticmethod
+    def concat_switch_virtual(extended_tensor, extended_tensor_virtual, nloc: int):
+        """
+        Concat real and virtual extended tensors, and switch all the local ones to the first nloc * 2 atoms.
+        - [:, :nloc]: original nloc real atoms.
+        - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+        - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+        - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+        """
+        nframes, nall = extended_tensor.shape[:2]
+        out_shape = list(extended_tensor.shape)
+        out_shape[1] *= 2
+        extended_tensor_updated = paddle.zeros(
+            out_shape,
+            dtype=extended_tensor.dtype,
+        ).to(device=extended_tensor.place)
+        extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc]
+        extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[
+            :, :nloc
+        ]
+        extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[
+            :, nloc:
+        ]
+        extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:]
+        return extended_tensor_updated.reshape(out_shape)
+
+    @staticmethod
+    def expand_aparam(aparam, nloc: int):
+        """Expand the atom parameters for virtual atoms if necessary."""
+        nframes, natom, numb_aparam = aparam.shape
+        if natom == nloc:  # good
+            pass
+        elif natom < nloc:  # for spin with virtual atoms
+            aparam = paddle.concat(
+                [
+                    aparam,
+                    paddle.zeros(
+                        [nframes, nloc - natom, numb_aparam],
+                        dtype=aparam.dtype,
+                    ).to(device=aparam.place),
+                ],
+                axis=1,
+            )
+        else:
+            raise ValueError(
+                f"get an input aparam with {aparam.shape[1]} inputs, ",
+                f"which is larger than {nloc} atoms.",
+            )
+        return aparam
+
+    def get_type_map(self) -> list[str]:
+        """Get the type map."""
+        tmap = self.backbone_model.get_type_map()
+        ntypes = len(tmap) // 2  # ignore the virtual type
+        return tmap[:ntypes]
+
+    def get_ntypes(self):
+        """Returns the number of element types."""
+        return len(self.get_type_map())
+
+    def get_rcut(self):
+        """Get the cut-off radius."""
+        return self.backbone_model.get_rcut()
+
+    def get_dim_fparam(self):
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.backbone_model.get_dim_fparam()
+
+    def get_dim_aparam(self):
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.backbone_model.get_dim_aparam()
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.backbone_model.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.backbone_model.is_aparam_nall()
+
+    def model_output_type(self) -> list[str]:
+        """Get the output type for the model."""
+        return self.backbone_model.model_output_type()
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.backbone_model.get_model_def_script()
+
+    def get_min_nbor_dist(self) -> Optional[float]:
+        """Get the minimum neighbor distance."""
+        return self.backbone_model.get_min_nbor_dist()
+
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        # for C++ interface
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nnei() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nnei()
+
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nsel() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nsel()
+
+    def has_spin(self) -> bool:
+        """Returns whether it has spin input and output."""
+        return True
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the model has message passing."""
+        return self.backbone_model.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the model needs sorted nlist when using `forward_lower`."""
+        return self.backbone_model.need_sorted_nlist_for_lower()
+
+    def model_output_def(self):
+        """Get the output def for the model."""
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        backbone_model_atomic_output_def = self.backbone_model.atomic_output_def()
+        backbone_model_atomic_output_def[var_name].magnetic = True
+        return ModelOutputDef(backbone_model_atomic_output_def)
+
+    def __getattr__(self, name):
+        """Get attribute from the wrapped model."""
+        if (
+            name == "backbone_model"
+        ):  # paddle.nn.Layer will exclude modules to self.__dict__["_sub_layers"]
+            return self.__dict__["_sub_layers"]["backbone_model"]
+        elif name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self._buffers:
+            return self._buffers[name]
+        else:
+            return getattr(self.backbone_model, name)
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+
+        @functools.lru_cache
+        def spin_sampled_func():
+            sampled = sampled_func()
+            spin_sampled = []
+            for sys in sampled:
+                coord_updated, atype_updated = self.process_spin_input(
+                    sys["coord"], sys["atype"], sys["spin"]
+                )
+                tmp_dict = {
+                    "coord": coord_updated,
+                    "atype": atype_updated,
+                }
+                if "natoms" in sys:
+                    natoms = sys["natoms"]
+                    tmp_dict["natoms"] = paddle.concat(
+                        [2 * natoms[:, :2], natoms[:, 2:], natoms[:, 2:]], axis=-1
+                    )
+                for item_key in sys.keys():
+                    if item_key not in ["coord", "atype", "spin", "natoms"]:
+                        tmp_dict[item_key] = sys[item_key]
+                spin_sampled.append(tmp_dict)
+            return spin_sampled
+
+        self.backbone_model.compute_or_load_stat(spin_sampled_func, stat_file_path)
+
+    def forward_common(
+        self,
+        coord,
+        atype,
+        spin,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        nframes, nloc = atype.shape
+        coord_updated, atype_updated = self.process_spin_input(coord, atype, spin)
+        if aparam is not None:
+            aparam = self.expand_aparam(aparam, nloc * 2)
+        model_ret = self.backbone_model.forward_common(
+            coord_updated,
+            atype_updated,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_ret[f"{var_name}"] = paddle.split(
+            model_ret[f"{var_name}"], [nloc, nloc], axis=1
+        )[0]
+        if self.backbone_model.do_grad_r(var_name):
+            (
+                model_ret[f"{var_name}_derv_r"],
+                model_ret[f"{var_name}_derv_r_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output(atype, model_ret[f"{var_name}_derv_r"])
+        if self.backbone_model.do_grad_c(var_name) and do_atomic_virial:
+            (
+                model_ret[f"{var_name}_derv_c"],
+                model_ret[f"{var_name}_derv_c_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output(
+                atype,
+                model_ret[f"{var_name}_derv_c"],
+                add_mag=False,
+                virtual_scale=False,
+            )
+        return model_ret
+
+    def forward_common_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+        extra_nlist_sort: bool = False,
+    ):
+        nframes, nloc = nlist.shape[:2]
+        (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        ) = self.process_spin_input_lower(
+            extended_coord, extended_atype, extended_spin, nlist, mapping=mapping
+        )
+        if aparam is not None:
+            aparam = self.expand_aparam(aparam, nloc * 2)
+        model_ret = self.backbone_model.forward_common_lower(
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping=mapping_updated,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=extra_nlist_sort,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_ret[f"{var_name}"] = paddle.split(
+            model_ret[f"{var_name}"], [nloc, nloc], axis=1
+        )[0]
+        if self.backbone_model.do_grad_r(var_name):
+            (
+                model_ret[f"{var_name}_derv_r"],
+                model_ret[f"{var_name}_derv_r_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output_lower(
+                extended_atype, model_ret[f"{var_name}_derv_r"], nloc
+            )
+        if self.backbone_model.do_grad_c(var_name) and do_atomic_virial:
+            (
+                model_ret[f"{var_name}_derv_c"],
+                model_ret[f"{var_name}_derv_c_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output_lower(
+                extended_atype,
+                model_ret[f"{var_name}_derv_c"],
+                nloc,
+                add_mag=False,
+                virtual_scale=False,
+            )
+        return model_ret
+
+    def serialize(self) -> dict:
+        return {
+            "backbone_model": self.backbone_model.serialize(),
+            "spin": self.spin.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data) -> "SpinModel":
+        backbone_model_obj = make_model(DPAtomicModel).deserialize(
+            data["backbone_model"]
+        )
+        spin = Spin.deserialize(data["spin"])
+        return cls(
+            backbone_model=backbone_model_obj,
+            spin=spin,
+        )
+
+
+class SpinEnergyModel(SpinModel):
+    """A spin model for energy."""
+
+    model_type = "ener"
+
+    def __init__(
+        self,
+        backbone_model,
+        spin: Spin,
+    ):
+        super().__init__(backbone_model, spin)
+
+    def translated_output_def(self):
+        out_def_data = self.model_output_def().get_data()
+        output_def = {
+            "atom_energy": deepcopy(out_def_data["energy"]),
+            "energy": deepcopy(out_def_data["energy_redu"]),
+            "mask_mag": deepcopy(out_def_data["mask_mag"]),
+        }
+        if self.do_grad_r("energy"):
+            output_def["force"] = deepcopy(out_def_data["energy_derv_r"])
+            output_def["force"].squeeze(-2)
+            output_def["force_mag"] = deepcopy(out_def_data["energy_derv_r_mag"])
+            output_def["force_mag"].squeeze(-2)
+        return output_def
+
+    def forward(
+        self,
+        coord,
+        atype,
+        spin,
+        box: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> dict[str, paddle.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            spin,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        model_predict["mask_mag"] = model_ret["mask_mag"]
+        if self.backbone_model.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+            model_predict["force_mag"] = model_ret["energy_derv_r_mag"].squeeze(-2)
+        # not support virial by far
+        return model_predict
+
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            extended_spin,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            extra_nlist_sort=self.backbone_model.need_sorted_nlist_for_lower(),
+        )
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        model_predict["extended_mask_mag"] = model_ret["mask_mag"]
+        if self.backbone_model.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+            model_predict["extended_force_mag"] = model_ret[
+                "energy_derv_r_mag"
+            ].squeeze(-2)
+        # not support virial by far
+        return model_predict
diff --git a/deepmd/pd/model/model/transform_output.py b/deepmd/pd/model/model/transform_output.py
new file mode 100644
index 0000000000..148258d8f2
--- /dev/null
+++ b/deepmd/pd/model/model/transform_output.py
@@ -0,0 +1,268 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+    get_deriv_name,
+    get_reduce_name,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+
+
+def atomic_virial_corr(
+    extended_coord: paddle.Tensor,
+    atom_energy: paddle.Tensor,
+):
+    nall = extended_coord.shape[1]
+    nloc = atom_energy.shape[1]
+    coord, _ = paddle.split(extended_coord, [nloc, nall - nloc], axis=1)
+    # no derivative with respect to the loc coord.
+    coord = coord.detach()
+    ce = coord * atom_energy
+    sumce0, sumce1, sumce2 = paddle.split(paddle.sum(ce, axis=1), [1, 1, 1], axis=-1)
+    faked_grad = paddle.ones_like(sumce0)
+    # lst = paddle.jit.annotate(List[Optional[paddle.Tensor]], [faked_grad])
+    extended_virial_corr0 = paddle.autograd.grad(
+        [sumce0],
+        [extended_coord],
+        # grad_outputs=lst,
+        create_graph=False,
+        retain_graph=True,
+    )[0]
+    assert extended_virial_corr0 is not None
+    extended_virial_corr1 = paddle.autograd.grad(
+        [sumce1],
+        [extended_coord],
+        # grad_outputs=lst,
+        create_graph=False,
+        retain_graph=True,
+    )[0]
+    assert extended_virial_corr1 is not None
+    extended_virial_corr2 = paddle.autograd.grad(
+        [sumce2],
+        [extended_coord],
+        # grad_outputs=lst,
+        create_graph=False,
+        retain_graph=True,
+    )[0]
+    assert extended_virial_corr2 is not None
+    extended_virial_corr = paddle.concat(
+        [
+            extended_virial_corr0.unsqueeze(-1),
+            extended_virial_corr1.unsqueeze(-1),
+            extended_virial_corr2.unsqueeze(-1),
+        ],
+        axis=-1,
+    )
+    return extended_virial_corr
+
+
+def task_deriv_one(
+    atom_energy: paddle.Tensor,
+    energy: paddle.Tensor,
+    extended_coord: paddle.Tensor,
+    do_virial: bool = True,
+    do_atomic_virial: bool = False,
+    create_graph: bool = True,
+):
+    # faked_grad = paddle.ones_like(energy)
+    # lst = paddle.jit.annotate(List[Optional[paddle.Tensor]], [faked_grad])
+    extended_force = paddle.autograd.grad(
+        [energy],
+        [extended_coord],
+        # grad_outputs=lst,
+        create_graph=create_graph,
+        retain_graph=True,
+    )[0]
+    assert extended_force is not None
+    extended_force = -extended_force
+    if do_virial:
+        extended_virial = extended_force.unsqueeze(-1) @ extended_coord.unsqueeze(-2)
+        # the correction sums to zero, which does not contribute to global virial
+        if do_atomic_virial:
+            extended_virial_corr = atomic_virial_corr(extended_coord, atom_energy)
+            extended_virial = extended_virial + extended_virial_corr
+        # to [...,3,3] -> [...,9]
+        extended_virial = extended_virial.reshape(
+            [*list(extended_virial.shape[:-2]), 9]
+        )
+    else:
+        extended_virial = None
+    return extended_force, extended_virial
+
+
+def get_leading_dims(
+    vv: paddle.Tensor,
+    vdef: OutputVariableDef,
+):
+    """Get the dimensions of nf x nloc."""
+    vshape = vv.shape
+    return list(vshape[: (len(vshape) - len(vdef.shape))])
+
+
+def get_atom_axis(
+    vdef: paddle.Tensor,
+):
+    """Get the axis of atoms."""
+    atom_axis = -(len(vdef.shape) + 1)
+    return atom_axis
+
+
+def take_deriv(
+    vv: paddle.Tensor,
+    svv: paddle.Tensor,
+    vdef: OutputVariableDef,
+    coord_ext: paddle.Tensor,
+    do_virial: bool = False,
+    do_atomic_virial: bool = False,
+    create_graph: bool = True,
+):
+    size = 1
+    for ii in vdef.shape:
+        size *= ii
+    vv1 = vv.reshape(list(get_leading_dims(vv, vdef)) + [size])  # noqa: RUF005
+    svv1 = svv.reshape(list(get_leading_dims(svv, vdef)) + [size])  # noqa: RUF005
+    split_vv1 = paddle.split(vv1, [1] * size, axis=-1)
+    split_svv1 = paddle.split(svv1, [1] * size, axis=-1)
+    split_ff, split_avir = [], []
+    for vvi, svvi in zip(split_vv1, split_svv1):
+        # nf x nloc x 3, nf x nloc x 9
+        ffi, aviri = task_deriv_one(
+            vvi,
+            svvi,
+            coord_ext,
+            do_virial=do_virial,
+            do_atomic_virial=do_atomic_virial,
+            create_graph=create_graph,
+        )
+        # nf x nloc x 1 x 3, nf x nloc x 1 x 9
+        ffi = ffi.unsqueeze(-2)
+        split_ff.append(ffi)
+        if do_virial:
+            assert aviri is not None
+            aviri = aviri.unsqueeze(-2)
+            split_avir.append(aviri)
+    # nf x nall x v_dim x 3, nf x nall x v_dim x 9
+    out_lead_shape = list(coord_ext.shape[:-1]) + vdef.shape
+    ff = paddle.concat(split_ff, axis=-2).reshape(out_lead_shape + [3])  # noqa: RUF005
+    if do_virial:
+        avir = paddle.concat(split_avir, axis=-2).reshape(out_lead_shape + [9])  # noqa: RUF005
+    else:
+        avir = None
+    return ff, avir
+
+
+def fit_output_to_model_output(
+    fit_ret: dict[str, paddle.Tensor],
+    fit_output_def: FittingOutputDef,
+    coord_ext: paddle.Tensor,
+    do_atomic_virial: bool = False,
+    create_graph: bool = True,
+) -> dict[str, paddle.Tensor]:
+    """Transform the output of the fitting network to
+    the model output.
+
+    """
+    redu_prec = env.GLOBAL_PD_ENER_FLOAT_PRECISION
+    model_ret = dict(fit_ret.items())
+    for kk, vv in fit_ret.items():
+        vdef = fit_output_def[kk]
+        shap = vdef.shape
+        atom_axis = -(len(shap) + 1)
+        if vdef.reducible:
+            kk_redu = get_reduce_name(kk)
+            model_ret[kk_redu] = paddle.sum(vv.astype(redu_prec), axis=atom_axis)
+            if vdef.r_differentiable:
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                dr, dc = take_deriv(
+                    vv,
+                    model_ret[kk_redu],
+                    vdef,
+                    coord_ext,
+                    do_virial=vdef.c_differentiable,
+                    do_atomic_virial=do_atomic_virial,
+                    create_graph=create_graph,
+                )
+                model_ret[kk_derv_r] = dr
+                if vdef.c_differentiable:
+                    assert dc is not None
+                    model_ret[kk_derv_c] = dc
+                    model_ret[kk_derv_c + "_redu"] = paddle.sum(
+                        model_ret[kk_derv_c].astype(redu_prec), axis=1
+                    )
+    return model_ret
+
+
+def communicate_extended_output(
+    model_ret: dict[str, paddle.Tensor],
+    model_output_def: ModelOutputDef,
+    mapping: paddle.Tensor,  # nf x nloc
+    do_atomic_virial: bool = False,
+) -> dict[str, paddle.Tensor]:
+    """Transform the output of the model network defined on
+    local and ghost (extended) atoms to local atoms.
+
+    """
+    redu_prec = env.GLOBAL_PD_ENER_FLOAT_PRECISION
+    new_ret = {}
+    for kk in model_output_def.keys_outp():
+        vv = model_ret[kk]
+        vdef = model_output_def[kk]
+        new_ret[kk] = vv
+        if vdef.reducible:
+            kk_redu = get_reduce_name(kk)
+            new_ret[kk_redu] = model_ret[kk_redu]
+            # nf x nloc
+            vldims = get_leading_dims(vv, vdef)
+            # nf x nall
+            mldims = list(mapping.shape)
+            kk_derv_r, kk_derv_c = get_deriv_name(kk)
+            if vdef.r_differentiable:
+                # vdim x 3
+                derv_r_ext_dims = list(vdef.shape) + [3]  # noqa:RUF005
+                mapping = mapping.reshape(mldims + [1] * len(derv_r_ext_dims)).expand(
+                    [-1] * len(mldims) + derv_r_ext_dims
+                )
+                force = paddle.zeros(vldims + derv_r_ext_dims, dtype=vv.dtype).to(
+                    device=vv.place
+                )
+                # nf x nloc x nvar x 3
+                new_ret[kk_derv_r] = decomp.scatter_reduce(
+                    force,
+                    1,
+                    index=mapping,
+                    src=model_ret[kk_derv_r],
+                    reduce="sum",
+                )
+            if vdef.c_differentiable:
+                assert vdef.r_differentiable
+                derv_c_ext_dims = list(vdef.shape) + [9]  # noqa:RUF005
+                # nf x nloc x nvar x 3 -> nf x nloc x nvar x 9
+                mapping = paddle.tile(
+                    mapping,
+                    [1] * (len(mldims) + len(vdef.shape)) + [3],
+                )
+                virial = paddle.zeros(vldims + derv_c_ext_dims, dtype=vv.dtype).to(
+                    device=vv.place
+                )
+                # nf x nloc x nvar x 9
+                new_ret[kk_derv_c] = decomp.scatter_reduce(
+                    virial,
+                    1,
+                    index=mapping,
+                    src=model_ret[kk_derv_c],
+                    reduce="sum",
+                )
+                new_ret[kk_derv_c + "_redu"] = paddle.sum(
+                    new_ret[kk_derv_c].to(redu_prec), axis=1
+                )
+                if not do_atomic_virial:
+                    # pop atomic virial, because it is not correctly calculated.
+                    new_ret.pop(kk_derv_c)
+    return new_ret
diff --git a/deepmd/pd/model/network/__init__.py b/deepmd/pd/model/network/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pd/model/network/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pd/model/network/init.py b/deepmd/pd/model/network/init.py
new file mode 100644
index 0000000000..dbdad56794
--- /dev/null
+++ b/deepmd/pd/model/network/init.py
@@ -0,0 +1,458 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+# Copyright (c) 2024 The PyTorch Authors. All rights reserved.
+#
+# This file includes source code from PyTorch of version v2.3.0, which is released under the BSD-3-Clause license.
+# For more information about PyTorch, visit https://pytorch.org/.
+
+
+# These no_grad_* functions are necessary as wrappers around the parts of these
+# functions that use `with paddle.no_grad()`. The JIT doesn't support context
+# managers, so these need to be implemented as builtins. Using these wrappers
+# lets us keep those builtins small and re-usable.
+
+from __future__ import (
+    annotations,
+)
+
+import math
+import warnings
+
+import paddle
+from paddle import (
+    Tensor,
+)
+
+PaddleGenerator = paddle.base.libpaddle.Generator
+
+
+def _no_grad_uniform_(tensor: paddle.Tensor, a, b, generator=None):
+    with paddle.no_grad():
+        return tensor.uniform_(a, b)
+
+
+def _no_grad_normal_(tensor: paddle.Tensor, mean, std, generator=None):
+    with paddle.no_grad():
+        return tensor.normal_(mean, std)
+
+
+def _no_grad_trunc_normal_(tensor: paddle.Tensor, mean, std, a, b, generator=None):
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+            "The distribution of values may be incorrect.",
+            stacklevel=2,
+        )
+
+    with paddle.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+
+        # Transform to proper mean, std
+        tensor.multiply_(std * math.sqrt(2.0))
+        tensor.add_(mean)
+
+        # Clamp to ensure it's in the proper range
+        tensor.clip_(min=a, max=b)
+        return tensor
+
+
+def _no_grad_zero_(tensor: paddle.Tensor):
+    with paddle.no_grad():
+        return tensor.zero_()
+
+
+def _no_grad_fill_(tensor: paddle.Tensor, val):
+    with paddle.no_grad():
+        return tensor.fill_(val)
+
+
+def calculate_gain(nonlinearity, param=None):
+    r"""Return the recommended gain value for the given nonlinearity function.
+
+    The values are as follows:
+
+    ================= ====================================================
+    nonlinearity      gain
+    ================= ====================================================
+    Linear / Identity :math:`1`
+    Conv{1,2,3}D      :math:`1`
+    Sigmoid           :math:`1`
+    Tanh              :math:`\frac{5}{3}`
+    ReLU              :math:`\sqrt{2}`
+    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
+    SELU              :math:`\frac{3}{4}`
+    ================= ====================================================
+
+    .. warning::
+        In order to implement `Self-Normalizing Neural Networks`_ ,
+        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
+        This gives the initial weights a variance of ``1 / N``,
+        which is necessary to induce a stable fixed point in the forward pass.
+        In contrast, the default gain for ``SELU`` sacrifices the normalization
+        effect for more stable gradient flow in rectangular layers.
+
+    Args:
+        nonlinearity: the non-linear function (`nn.functional` name)
+        param: optional parameter for the non-linear function
+
+    Examples
+    --------
+        >>> gain = nn.init.calculate_gain(
+        ...     "leaky_relu", 0.2
+        ... )  # leaky_relu with negative_slope=0.2
+
+    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
+    """
+    linear_fns = [
+        "linear",
+        "conv1d",
+        "conv2d",
+        "conv3d",
+        "conv_transpose1d",
+        "conv_transpose2d",
+        "conv_transpose3d",
+    ]
+    if nonlinearity in linear_fns or nonlinearity == "sigmoid":
+        return 1
+    elif nonlinearity == "tanh":
+        return 5.0 / 3
+    elif nonlinearity == "relu":
+        return math.sqrt(2.0)
+    elif nonlinearity == "leaky_relu":
+        if param is None:
+            negative_slope = 0.01
+        elif (
+            not isinstance(param, bool)
+            and isinstance(param, int)
+            or isinstance(param, float)
+        ):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError(f"negative_slope {param} not a valid number")
+        return math.sqrt(2.0 / (1 + negative_slope**2))
+    elif nonlinearity == "selu":
+        return (
+            3.0 / 4
+        )  # Value found empirically (https://github.com/pytorch/pytorch/pull/50664)
+    else:
+        raise ValueError(f"Unsupported nonlinearity {nonlinearity}")
+
+
+def _calculate_fan_in_and_fan_out(tensor, reverse=False):
+    dimensions = tensor.ndim
+    if dimensions < 2:
+        raise ValueError(
+            "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
+        )
+
+    if reverse:
+        num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1]
+    else:
+        num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0]
+
+    receptive_field_size = 1
+    if tensor.ndim > 2:
+        for s in tensor.shape[2:]:
+            receptive_field_size *= s
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
+
+
+def _calculate_correct_fan(tensor, mode, reverse=False):
+    mode = mode.lower()
+    valid_modes = ["fan_in", "fan_out"]
+    if mode not in valid_modes:
+        raise ValueError(f"Mode {mode} not supported, please use one of {valid_modes}")
+
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse)
+    return fan_in if mode == "fan_in" else fan_out
+
+
+def zeros_(tensor: Tensor) -> Tensor:
+    r"""Fill the input Tensor with the scalar value `0`.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.zeros_(w)
+    """
+    return _no_grad_zero_(tensor)
+
+
+def ones_(tensor: Tensor) -> Tensor:
+    r"""Fill the input Tensor with the scalar value `1`.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.ones_(w)
+    """
+    return _no_grad_fill_(tensor, 1.0)
+
+
+def constant_(tensor: Tensor, val: float) -> Tensor:
+    r"""Fill the input Tensor with the value :math:`\text{val}`.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        val: the value to fill the tensor with
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.constant_(w, 0.3)
+    """
+    return _no_grad_fill_(tensor, val)
+
+
+def normal_(
+    tensor: Tensor,
+    mean: float = 0.0,
+    std: float = 1.0,
+    generator: PaddleGenerator | None = None,
+) -> Tensor:
+    r"""Fill the input Tensor with values drawn from the normal distribution.
+
+    :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        generator: the paddle Generator to sample from (default: None)
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.normal_(w)
+    """
+    return _no_grad_normal_(tensor, mean, std, generator)
+
+
+def trunc_normal_(
+    tensor: Tensor,
+    mean: float = 0.0,
+    std: float = 1.0,
+    a: float = -2.0,
+    b: float = 2.0,
+    generator: PaddleGenerator | None = None,
+) -> Tensor:
+    r"""Fill the input Tensor with values drawn from a truncated normal distribution.
+
+    The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+        generator: the paddle Generator to sample from (default: None)
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+
+
+def kaiming_uniform_(
+    tensor: Tensor,
+    a: float = 0,
+    mode: str = "fan_in",
+    nonlinearity: str = "leaky_relu",
+    generator: PaddleGenerator | None = None,
+    reverse: bool = False,
+):
+    r"""Fill the input `Tensor` with values using a Kaiming uniform distribution.
+
+    The method is described in `Delving deep into rectifiers: Surpassing
+    human-level performance on ImageNet classification` - He, K. et al. (2015).
+    The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
+
+    .. math::
+        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
+
+    Also known as He initialization.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        a: the negative slope of the rectifier used after this layer (only
+            used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+        generator: the paddle Generator to sample from (default: None)
+        reverse (bool, optional): Tensor data format order, False by default as
+            [fout, fin, ...].. Defaults to False.
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.kaiming_uniform_(w, mode="fan_in", nonlinearity="relu")
+    """
+    if 0 in tensor.shape:
+        warnings.warn("Initializing zero-element tensors is a no-op")
+        return tensor
+    fan = _calculate_correct_fan(tensor, mode, reverse)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+    with paddle.no_grad():
+        return tensor.uniform_(-bound, bound)
+
+
+def kaiming_normal_(
+    tensor: Tensor,
+    a: float = 0,
+    mode: str = "fan_in",
+    nonlinearity: str = "leaky_relu",
+    generator: PaddleGenerator | None = None,
+    reverse: bool = False,
+):
+    r"""Fill the input `Tensor` with values using a Kaiming normal distribution.
+
+    The method is described in `Delving deep into rectifiers: Surpassing
+    human-level performance on ImageNet classification` - He, K. et al. (2015).
+    The resulting tensor will have values sampled from
+    :math:`\mathcal{N}(0, \text{std}^2)` where
+
+    .. math::
+        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
+
+    Also known as He initialization.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        a: the negative slope of the rectifier used after this layer (only
+            used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+        generator: the paddle Generator to sample from (default: None)
+        reverse (bool, optional): Tensor data format order, False by default as
+            [fout, fin, ...].. Defaults to False.
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.kaiming_normal_(w, mode="fan_out", nonlinearity="relu")
+    """
+    if 0 in tensor.shape:
+        warnings.warn("Initializing zero-element tensors is a no-op")
+        return tensor
+    fan = _calculate_correct_fan(tensor, mode, reverse)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    with paddle.no_grad():
+        return tensor.normal_(0, std)
+
+
+def xavier_uniform_(
+    tensor: Tensor,
+    gain: float = 1.0,
+    generator: PaddleGenerator | None = None,
+    reverse: bool = False,
+) -> Tensor:
+    r"""Fill the input `Tensor` with values using a Xavier uniform distribution.
+
+    The method is described in `Understanding the difficulty of training
+    deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
+    The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-a, a)` where
+
+    .. math::
+        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
+
+    Also known as Glorot initialization.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        gain: an optional scaling factor
+        generator: the paddle Generator to sample from (default: None)
+        reverse (bool, optional): Tensor data format order, False by default as
+            [fout, fin, ...].. Defaults to False.
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain("relu"))
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+    a = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+
+    return _no_grad_uniform_(tensor, -a, a, generator)
+
+
+def xavier_normal_(
+    tensor: Tensor,
+    gain: float = 1.0,
+    generator: PaddleGenerator | None = None,
+    reverse: bool = False,
+) -> Tensor:
+    r"""Fill the input `Tensor` with values using a Xavier normal distribution.
+
+    The method is described in `Understanding the difficulty of training deep feedforward
+    neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
+    will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where
+
+    .. math::
+        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
+
+    Also known as Glorot initialization.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        gain: an optional scaling factor
+        generator: the paddle Generator to sample from (default: None)
+        reverse (bool, optional): Tensor data format order, False by
+            default as [fout, fin, ...]. Defaults to False.
+
+    Examples
+    --------
+        >>> w = paddle.empty(3, 5)
+        >>> nn.init.xavier_normal_(w)
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+
+    return _no_grad_normal_(tensor, 0.0, std, generator)
diff --git a/deepmd/pd/model/network/layernorm.py b/deepmd/pd/model/network/layernorm.py
new file mode 100644
index 0000000000..76299040e8
--- /dev/null
+++ b/deepmd/pd/model/network/layernorm.py
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+
+from deepmd.dpmodel.utils.network import LayerNorm as DPLayerNorm
+from deepmd.pd.model.network.init import (
+    normal_,
+    ones_,
+    zeros_,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.utils import (
+    get_generator,
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+device = env.DEVICE
+
+
+def empty_t(shape, precision):
+    return paddle.empty(shape, dtype=precision).to(device=device)
+
+
+class LayerNorm(nn.Layer):
+    def __init__(
+        self,
+        num_in,
+        eps: float = 1e-5,
+        uni_init: bool = True,
+        bavg: float = 0.0,
+        stddev: float = 1.0,
+        precision: str = DEFAULT_PRECISION,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        super().__init__()
+        self.eps = eps
+        self.uni_init = uni_init
+        self.num_in = num_in
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.matrix = self.create_parameter(
+            shape=[num_in],
+            dtype=self.prec,
+            default_initializer=nn.initializer.Assign(
+                empty_t((num_in,), self.prec),
+            ),
+        )
+        self.bias = self.create_parameter(
+            shape=[num_in],
+            dtype=self.prec,
+            default_initializer=nn.initializer.Assign(empty_t([num_in], self.prec)),
+        )
+        random_generator = get_generator(seed)
+        if self.uni_init:
+            ones_(self.matrix.data)
+            zeros_(self.bias.data)
+        else:
+            normal_(self.bias.data, mean=bavg, std=stddev, generator=random_generator)
+            normal_(
+                self.matrix.data,
+                std=stddev / np.sqrt(self.num_in),
+                generator=random_generator,
+            )
+        self.trainable = trainable
+        if not self.trainable:
+            self.matrix.stop_gradient = True
+            self.bias.stop_gradient = True
+
+    def dim_out(self) -> int:
+        return self.matrix.shape[0]
+
+    def forward(
+        self,
+        xx: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """One Layer Norm used by DP model.
+
+        Parameters
+        ----------
+        xx : paddle.Tensor
+            The input of index.
+
+        Returns
+        -------
+        yy: paddle.Tensor
+            The output.
+        """
+        if xx.numel() > 0:
+            variance, mean = (
+                paddle.var(xx, axis=-1, unbiased=False, keepdim=True),
+                paddle.mean(xx, axis=-1, keepdim=True),
+            )
+            yy = (xx - mean) / paddle.sqrt(variance + self.eps)
+        else:
+            yy = xx
+        if self.matrix is not None and self.bias is not None:
+            yy = yy * self.matrix + self.bias
+        return yy
+
+    def serialize(self) -> dict:
+        """Serialize the layer to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized layer.
+        """
+        nl = DPLayerNorm(
+            self.matrix.shape[0],
+            eps=self.eps,
+            trainable=self.trainable,
+            precision=self.precision,
+        )
+        nl.w = to_numpy_array(self.matrix)
+        nl.b = to_numpy_array(self.bias)
+        data = nl.serialize()
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LayerNorm":
+        """Deserialize the layer from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        nl = DPLayerNorm.deserialize(data)
+        obj = cls(
+            nl["matrix"].shape[0],
+            eps=nl["eps"],
+            trainable=nl["trainable"],
+            precision=nl["precision"],
+        )
+        prec = PRECISION_DICT[obj.precision]
+
+        def check_load_param(ss):
+            if nl[ss] is not None:
+                tensor = to_paddle_tensor(nl[ss])
+                return paddle.create_parameter(
+                    tensor.shape,
+                    dtype=tensor.dtype,
+                    default_initializer=nn.initializer.Assign(tensor),
+                )
+            return None
+
+        obj.matrix = check_load_param("matrix")
+        obj.bias = check_load_param("bias")
+        return obj
diff --git a/deepmd/pd/model/network/mlp.py b/deepmd/pd/model/network/mlp.py
new file mode 100644
index 0000000000..370b0fa8fa
--- /dev/null
+++ b/deepmd/pd/model/network/mlp.py
@@ -0,0 +1,328 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from __future__ import (
+    annotations,
+)
+
+from typing import (
+    ClassVar,
+)
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+
+from deepmd.pd.utils import (
+    env,
+)
+
+device = env.DEVICE
+
+from deepmd.dpmodel.utils import (
+    NativeLayer,
+)
+from deepmd.dpmodel.utils import NetworkCollection as DPNetworkCollection
+from deepmd.dpmodel.utils import (
+    make_embedding_network,
+    make_fitting_network,
+    make_multilayer_network,
+)
+from deepmd.pd.model.network.init import (
+    PaddleGenerator,
+    kaiming_normal_,
+    normal_,
+    trunc_normal_,
+    xavier_uniform_,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+    get_generator,
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+
+def empty_t(shape, precision):
+    return paddle.empty(shape, dtype=precision).to(device=device)
+
+
+class Identity(nn.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self,
+        xx: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """The Identity operation layer."""
+        return xx
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Identity",
+            "@version": 1,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> Identity:
+        return Identity()
+
+
+class MLPLayer(nn.Layer):
+    def __init__(
+        self,
+        num_in,
+        num_out,
+        bias: bool = True,
+        use_timestep: bool = False,
+        activation_function: str | None = None,
+        resnet: bool = False,
+        bavg: float = 0.0,
+        stddev: float = 1.0,
+        precision: str = DEFAULT_PRECISION,
+        init: str = "default",
+        seed: int | list[int] | None = None,
+    ):
+        super().__init__()
+        # only use_timestep when skip connection is established.
+        self.use_timestep = use_timestep and (
+            num_out == num_in or num_out == num_in * 2
+        )
+        self.num_in = num_in
+        self.num_out = num_out
+        self.activate_name = activation_function
+        self.activate = ActivationFn(self.activate_name)
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.matrix = self.create_parameter(
+            (num_in, num_out),
+            dtype=self.prec,
+            default_initializer=nn.initializer.Assign(
+                empty_t((num_in, num_out), self.prec)
+            ),
+        )
+        random_generator = get_generator(seed)
+        if bias:
+            self.bias = self.create_parameter(
+                [num_out],
+                dtype=self.prec,
+                default_initializer=nn.initializer.Assign(
+                    empty_t([num_out], self.prec)
+                ),
+            )
+        else:
+            self.bias = None
+        if self.use_timestep:
+            self.idt = self.create_parameter(
+                [num_out],
+                dtype=self.prec,
+                default_initializer=nn.initializer.Assign(
+                    empty_t([num_out], self.prec)
+                ),
+            )
+        else:
+            self.idt = None
+        self.resnet = resnet
+        if init == "default":
+            self._default_normal_init(
+                bavg=bavg, stddev=stddev, generator=random_generator
+            )
+        elif init == "trunc_normal":
+            self._trunc_normal_init(1.0, generator=random_generator)
+        elif init == "relu":
+            self._trunc_normal_init(2.0, generator=random_generator)
+        elif init == "glorot":
+            self._glorot_uniform_init(generator=random_generator)
+        elif init == "gating":
+            self._zero_init(self.use_bias)
+        elif init == "kaiming_normal":
+            self._normal_init(generator=random_generator)
+        elif init == "final":
+            self._zero_init(False)
+        else:
+            raise ValueError(f"Unknown initialization method: {init}")
+
+    def check_type_consistency(self):
+        precision = self.precision
+
+        def check_var(var):
+            if var is not None:
+                # assertion "float64" == "double" would fail
+                assert PRECISION_DICT[var.dtype.name] is PRECISION_DICT[precision]
+
+        check_var(self.matrix)
+        check_var(self.bias)
+        check_var(self.idt)
+
+    def dim_in(self) -> int:
+        return self.matrix.shape[0]
+
+    def dim_out(self) -> int:
+        return self.matrix.shape[1]
+
+    def _default_normal_init(
+        self,
+        bavg: float = 0.0,
+        stddev: float = 1.0,
+        generator: PaddleGenerator | None = None,
+    ):
+        normal_(
+            self.matrix.data,
+            std=stddev / np.sqrt(self.num_out + self.num_in),
+            generator=generator,
+        )
+        if self.bias is not None:
+            normal_(self.bias.data, mean=bavg, std=stddev, generator=generator)
+        if self.idt is not None:
+            normal_(self.idt.data, mean=0.1, std=0.001, generator=generator)
+
+    def _trunc_normal_init(self, scale=1.0, generator: PaddleGenerator | None = None):
+        # Constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+        TRUNCATED_NORMAL_STDDEV_FACTOR = 0.87962566103423978
+        _, fan_in = self.matrix.shape
+        scale = scale / max(1, fan_in)
+        std = (scale**0.5) / TRUNCATED_NORMAL_STDDEV_FACTOR
+        trunc_normal_(self.matrix, mean=0.0, std=std, generator=generator)
+
+    def _glorot_uniform_init(self, generator: PaddleGenerator | None = None):
+        xavier_uniform_(self.matrix, gain=1, generator=generator)
+
+    def _zero_init(self, use_bias=True):
+        with paddle.no_grad():
+            self.matrix.fill_(0.0)
+            if use_bias and self.bias is not None:
+                with paddle.no_grad():
+                    self.bias.fill_(1.0)
+
+    def _normal_init(self, generator: PaddleGenerator | None = None):
+        kaiming_normal_(self.matrix, nonlinearity="linear", generator=generator)
+
+    def forward(
+        self,
+        xx: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """One MLP layer used by DP model.
+
+        Parameters
+        ----------
+        xx : paddle.Tensor
+            The input.
+
+        Returns
+        -------
+        yy: paddle.Tensor
+            The output.
+        """
+        ori_prec = xx.dtype
+        xx = xx.astype(self.prec)
+        yy = (
+            paddle.matmul(xx, self.matrix) + self.bias
+            if self.bias is not None
+            else paddle.matmul(xx, self.matrix)
+        )
+        yy = self.activate(yy).clone()
+        yy = yy * self.idt if self.idt is not None else yy
+        if self.resnet:
+            if xx.shape[-1] == yy.shape[-1]:
+                yy += xx
+            elif 2 * xx.shape[-1] == yy.shape[-1]:
+                yy += paddle.concat([xx, xx], axis=-1)
+            # else:
+            #     yy = yy
+        yy = yy.astype(ori_prec)
+        return yy
+
+    def serialize(self) -> dict:
+        """Serialize the layer to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized layer.
+        """
+        nl = NativeLayer(
+            self.matrix.shape[0],
+            self.matrix.shape[1],
+            bias=self.bias is not None,
+            use_timestep=self.idt is not None,
+            activation_function=self.activate_name,
+            resnet=self.resnet,
+            precision=self.precision,
+        )
+        nl.w, nl.b, nl.idt = (
+            to_numpy_array(self.matrix),
+            to_numpy_array(self.bias),
+            to_numpy_array(self.idt),
+        )
+        return nl.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict) -> MLPLayer:
+        """Deserialize the layer from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        nl = NativeLayer.deserialize(data)
+        obj = cls(
+            nl["matrix"].shape[0],
+            nl["matrix"].shape[1],
+            bias=nl["bias"] is not None,
+            use_timestep=nl["idt"] is not None,
+            activation_function=nl["activation_function"],
+            resnet=nl["resnet"],
+            precision=nl["precision"],
+        )
+        prec = PRECISION_DICT[obj.precision]
+
+        def check_load_param(ss):
+            if nl[ss] is not None:
+                tensor = to_paddle_tensor(nl[ss])
+                return paddle.create_parameter(
+                    tensor.shape,
+                    dtype=tensor.dtype,
+                    default_initializer=nn.initializer.Assign(tensor),
+                )
+            return None
+
+        obj.matrix = check_load_param("matrix")
+        obj.bias = check_load_param("bias")
+        obj.idt = check_load_param("idt")
+        return obj
+
+
+MLP_ = make_multilayer_network(MLPLayer, nn.Layer)
+
+
+class MLP(MLP_):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.layers = paddle.nn.LayerList(self.layers)
+
+    forward = MLP_.call
+
+
+EmbeddingNet = make_embedding_network(MLP, MLPLayer)
+
+FittingNet = make_fitting_network(EmbeddingNet, MLP, MLPLayer)
+
+
+class NetworkCollection(DPNetworkCollection, nn.Layer):
+    """Paddle implementation of NetworkCollection."""
+
+    NETWORK_TYPE_MAP: ClassVar[dict[str, type]] = {
+        "network": MLP,
+        "embedding_network": EmbeddingNet,
+        "fitting_network": FittingNet,
+    }
+
+    def __init__(self, *args, **kwargs):
+        # init both two base classes
+        DPNetworkCollection.__init__(self, *args, **kwargs)
+        nn.Layer.__init__(self)
+        self.networks = self._networks = paddle.nn.LayerList(self._networks)
diff --git a/deepmd/pd/model/network/network.py b/deepmd/pd/model/network/network.py
new file mode 100644
index 0000000000..21d6586476
--- /dev/null
+++ b/deepmd/pd/model/network/network.py
@@ -0,0 +1,555 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from deepmd.pd.model.network import (
+    init,
+)
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from typing import (
+        Final,
+    )
+except ImportError:
+    from paddle.jit import Final
+
+from deepmd.dpmodel.utils.type_embed import (
+    get_econf_tebd,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+    to_paddle_tensor,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+)
+
+
+def Tensor(*shape):
+    return paddle.empty(shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+        device=env.DEVICE
+    )
+
+
+class SimpleLinear(nn.Layer):
+    use_timestep: Final[bool]
+
+    def __init__(
+        self,
+        num_in,
+        num_out,
+        bavg=0.0,
+        stddev=1.0,
+        use_timestep=False,
+        activate=None,
+        bias: bool = True,
+    ):
+        """Construct a linear layer.
+
+        Args:
+        - num_in: Width of input tensor.
+        - num_out: Width of output tensor.
+        - use_timestep: Apply time-step to weight.
+        - activate: type of activate func.
+        """
+        super().__init__()
+        self.num_in = num_in
+        self.num_out = num_out
+        self.use_timestep = use_timestep
+        self.activate = ActivationFn(activate)
+
+        self.matrix = self.create_parameter(
+            [num_in, num_out],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        )
+        init.normal_(self.matrix, std=stddev / np.sqrt(num_out + num_in))
+        if bias:
+            self.bias = self.create_parameter(
+                (1, num_out),
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            )
+            init.normal_(self.bias, mean=bavg, std=stddev)
+        else:
+            self.bias = None
+        if self.use_timestep:
+            self.idt = self.create_parameter(
+                (1, num_out),
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            )
+            init.normal_(self.idt, mean=0.1, std=0.001)
+
+    def forward(self, inputs):
+        """Return X*W+b."""
+        xw = paddle.matmul(inputs, self.matrix)
+        hidden = xw + self.bias if self.bias is not None else xw
+        hidden = self.activate(hidden)
+        if self.use_timestep:
+            hidden = hidden * self.idt
+        return hidden
+
+
+class Linear(nn.Linear):
+    def __init__(
+        self,
+        d_in: int,
+        d_out: int,
+        bias: bool = True,
+        init: str = "default",
+    ):
+        super().__init__(
+            d_in,
+            d_out,
+            bias=bias,
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+
+        self.use_bias = bias
+
+        if self.use_bias:
+            with paddle.no_grad():
+                self.bias.fill_(0)
+
+        if init == "default":
+            self._trunc_normal_init(1.0)
+        elif init == "relu":
+            self._trunc_normal_init(2.0)
+        elif init == "glorot":
+            self._glorot_uniform_init()
+        elif init == "gating":
+            self._zero_init(self.use_bias)
+        elif init == "normal":
+            self._normal_init()
+        elif init == "final":
+            self._zero_init(False)
+        else:
+            raise ValueError("Invalid init method.")
+
+    def _trunc_normal_init(self, scale=1.0):
+        # Constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+        TRUNCATED_NORMAL_STDDEV_FACTOR = 0.87962566103423978
+        _, fan_in = self.weight.shape
+        scale = scale / max(1, fan_in)
+        std = (scale**0.5) / TRUNCATED_NORMAL_STDDEV_FACTOR
+        init.trunc_normal_(self.weight, mean=0.0, std=std)
+
+    def _glorot_uniform_init(self):
+        init.xavier_uniform_(self.weight, gain=1)
+
+    def _zero_init(self, use_bias=True):
+        with paddle.no_grad():
+            self.weight.fill_(0.0)
+            if use_bias:
+                with paddle.no_grad():
+                    self.bias.fill_(1.0)
+
+    def _normal_init(self):
+        init.kaiming_normal_(self.weight, nonlinearity="linear")
+
+
+class NonLinearHead(nn.Layer):
+    def __init__(self, input_dim, out_dim, activation_fn, hidden=None):
+        super().__init__()
+        hidden = input_dim if not hidden else hidden
+        self.linear1 = SimpleLinear(input_dim, hidden, activate=activation_fn)
+        self.linear2 = SimpleLinear(hidden, out_dim)
+
+    def forward(self, x):
+        x = self.linear1(x)
+        x = self.linear2(x)
+        return x
+
+
+class MaskLMHead(nn.Layer):
+    """Head for masked language modeling."""
+
+    def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
+        super().__init__()
+        self.dense = SimpleLinear(embed_dim, embed_dim)
+        self.activation_fn = ActivationFn(activation_fn)
+        self.layer_norm = nn.LayerNorm(embed_dim)
+
+        if weight is None:
+            weight = nn.Linear(embed_dim, output_dim, bias_attr=False).weight
+        self.weight = weight.T
+        self.bias = self.create_parameter(
+            [output_dim],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            default_initializer=nn.initializer.Constant(0),  # pylint: disable=no-explicit-dtype,no-explicit-device
+        )
+
+    def forward(
+        self, features, masked_tokens: Optional[paddle.Tensor] = None, **kwargs
+    ):
+        # Only project the masked tokens while training,
+        # saves both memory and computation
+        if masked_tokens is not None:
+            features = features[masked_tokens, :]
+
+        x = self.dense(features)
+        x = self.activation_fn(x)
+        x = self.layer_norm(x)
+        # project back to size of vocabulary with bias
+        x = F.linear(x, self.weight) + self.bias
+        return x
+
+
+class ResidualDeep(nn.Layer):
+    def __init__(
+        self, type_id, embedding_width, neuron, bias_atom_e, out_dim=1, resnet_dt=False
+    ):
+        """Construct a filter on the given element as neighbor.
+
+        Args:
+        - typei: Element ID.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the embedding net.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.type_id = type_id
+        self.neuron = [embedding_width, *neuron]
+        self.out_dim = out_dim
+
+        deep_layers = []
+        for ii in range(1, len(self.neuron)):
+            one = SimpleLinear(
+                num_in=self.neuron[ii - 1],
+                num_out=self.neuron[ii],
+                use_timestep=(
+                    resnet_dt and ii > 1 and self.neuron[ii - 1] == self.neuron[ii]
+                ),
+                activate="tanh",
+            )
+            deep_layers.append(one)
+        self.deep_layers = nn.LayerList(deep_layers)
+        if not env.ENERGY_BIAS_TRAINABLE:
+            bias_atom_e = 0
+        self.final_layer = SimpleLinear(self.neuron[-1], self.out_dim, bias_atom_e)
+
+    def forward(self, inputs):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - inputs: Embedding net output per atom. Its shape is [nframes*nloc, self.embedding_width].
+
+        Returns
+        -------
+        - `paddle.Tensor`: Output layer with shape [nframes*nloc, self.neuron[-1]].
+        """
+        outputs = inputs
+        for idx, linear in enumerate(self.deep_layers):
+            if idx > 0 and linear.num_in == linear.num_out:
+                outputs = outputs + linear(outputs)
+            else:
+                outputs = linear(outputs)
+        outputs = self.final_layer(outputs)
+        return outputs
+
+
+class TypeEmbedNet(nn.Layer):
+    def __init__(
+        self,
+        type_nums,
+        embed_dim,
+        bavg=0.0,
+        stddev=1.0,
+        precision="default",
+        seed: Optional[Union[int, list[int]]] = None,
+        use_econf_tebd=False,
+        use_tebd_bias: bool = False,
+        type_map=None,
+    ):
+        """Construct a type embedding net."""
+        super().__init__()
+        self.type_nums = type_nums
+        self.embed_dim = embed_dim
+        self.bavg = bavg
+        self.stddev = stddev
+        self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
+        self.type_map = type_map
+        self.embedding = TypeEmbedNetConsistent(
+            ntypes=self.type_nums,
+            neuron=[self.embed_dim],
+            padding=True,
+            activation_function="Linear",
+            use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
+            type_map=type_map,
+            precision=precision,
+            seed=seed,
+        )
+        # init.normal_(self.embedding.weight[:-1], mean=bavg, std=stddev)
+
+    def forward(self, atype):
+        """
+        Args:
+            atype: Type of each input, [nframes, nloc] or [nframes, nloc, nnei].
+
+        Returns
+        -------
+        type_embedding:
+
+        """
+        return self.embedding(atype.place)[atype]
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only TypeEmbedNet of the same type can share params!"
+        if shared_level == 0:
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._sub_layers:
+                self._sub_layers[item] = base_class._sub_layers[item]
+        else:
+            raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        self.embedding.change_type_map(type_map=type_map)
+
+
+class TypeEmbedNetConsistent(nn.Layer):
+    r"""Type embedding network that is consistent with other backends.
+
+    Parameters
+    ----------
+    ntypes : int
+        Number of atom types
+    neuron : list[int]
+        Number of neurons in each hidden layers of the embedding net
+    resnet_dt
+        Time-step `dt` in the resnet construction: y = x + dt * \phi (Wx + b)
+    activation_function
+        The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+        The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable
+        If the weights of embedding net are trainable.
+    seed
+        Random seed for initializing the network parameters.
+    padding
+        Concat the zero padding to the output, as the default embedding of empty type.
+    use_econf_tebd: bool, Optional
+        Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+        Whether to use bias in the type embedding layer.
+    type_map: list[str], Optional
+        A list of strings. Give the name to each type of atoms.
+    """
+
+    def __init__(
+        self,
+        *,
+        ntypes: int,
+        neuron: list[int],
+        resnet_dt: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        padding: bool = False,
+        use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
+        type_map: Optional[list[str]] = None,
+    ):
+        """Construct a type embedding net."""
+        super().__init__()
+        self.ntypes = ntypes
+        self.neuron = neuron
+        self.seed = seed
+        self.resnet_dt = resnet_dt
+        self.precision = precision
+        self.prec = env.PRECISION_DICT[self.precision]
+        self.activation_function = str(activation_function)
+        self.trainable = trainable
+        self.padding = padding
+        self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
+        self.type_map = type_map
+        self.econf_tebd = None
+        embed_input_dim = ntypes
+        if self.use_econf_tebd:
+            econf_tebd, embed_input_dim = get_econf_tebd(
+                self.type_map, precision=self.precision
+            )
+            self.econf_tebd = to_paddle_tensor(econf_tebd)
+        self.embedding_net = EmbeddingNet(
+            embed_input_dim,
+            self.neuron,
+            self.activation_function,
+            self.resnet_dt,
+            self.precision,
+            self.seed,
+            bias=self.use_tebd_bias,
+        )
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def forward(self, device: str):
+        """Caulate type embedding network.
+
+        Returns
+        -------
+        type_embedding: paddle.Tensor
+            Type embedding network.
+        """
+        if not self.use_econf_tebd:
+            embed = self.embedding_net(
+                paddle.eye(self.ntypes, dtype=self.prec).to(device=device)
+            )
+        else:
+            assert self.econf_tebd is not None
+            embed = self.embedding_net(self.econf_tebd.to(device))
+        if self.padding:
+            embed = paddle.concat(
+                [
+                    embed,
+                    paddle.zeros([1, embed.shape[1]], dtype=self.prec).to(
+                        device=device
+                    ),
+                ]
+            )
+        return embed
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        if not self.use_econf_tebd:
+            do_resnet = self.neuron[0] in [
+                self.ntypes,
+                self.ntypes * 2,
+                len(type_map),
+                len(type_map) * 2,
+            ]
+            assert (
+                not do_resnet or self.activation_function == "Linear"
+            ), "'activation_function' must be 'Linear' when performing type changing on resnet structure!"
+            first_layer_matrix = self.embedding_net.layers[0].matrix
+            eye_vector = paddle.eye(self.ntypes, dtype=self.prec).to(
+                device=first_layer_matrix.place
+            )
+            # preprocess for resnet connection
+            if self.neuron[0] == self.ntypes:
+                first_layer_matrix += eye_vector
+            elif self.neuron[0] == self.ntypes * 2:
+                first_layer_matrix += paddle.concat([eye_vector, eye_vector], axis=-1)
+
+            # randomly initialize params for the unseen types
+            if has_new_type:
+                extend_type_params = paddle.rand(
+                    [len(type_map), first_layer_matrix.shape[-1]],
+                    dtype=first_layer_matrix.dtype,
+                ).to(device=first_layer_matrix.place)
+                first_layer_matrix = paddle.concat(
+                    [first_layer_matrix, extend_type_params], axis=0
+                )
+
+            first_layer_matrix = first_layer_matrix[remap_index]
+            new_ntypes = len(type_map)
+            eye_vector = paddle.eye(new_ntypes, dtype=self.prec).to(
+                device=first_layer_matrix.place
+            )
+
+            if self.neuron[0] == new_ntypes:
+                first_layer_matrix -= eye_vector
+            elif self.neuron[0] == new_ntypes * 2:
+                first_layer_matrix -= paddle.concat([eye_vector, eye_vector], axis=-1)
+
+            self.embedding_net.layers[0].num_in = new_ntypes
+            self.embedding_net.layers[0].matrix = self.create_parameter(
+                first_layer_matrix.shape,
+                dtype=first_layer_matrix.dtype,
+                default_initializer=nn.initializer.Assign(first_layer_matrix),
+            )
+        else:
+            econf_tebd, embed_input_dim = get_econf_tebd(
+                type_map, precision=self.precision
+            )
+            self.econf_tebd = to_paddle_tensor(econf_tebd)
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        TypeEmbedNetConsistent
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data_cls = data.pop("@class")
+        assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
+
+        embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
+        type_embedding_net = cls(**data)
+        type_embedding_net.embedding_net = embedding_net
+        return type_embedding_net
+
+    def serialize(self) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        return {
+            "@class": "TypeEmbedNet",
+            "@version": 2,
+            "ntypes": self.ntypes,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "precision": self.precision,
+            "activation_function": self.activation_function,
+            "trainable": self.trainable,
+            "padding": self.padding,
+            "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
+            "type_map": self.type_map,
+            "embedding": self.embedding_net.serialize(),
+        }
diff --git a/deepmd/pd/model/task/__init__.py b/deepmd/pd/model/task/__init__.py
new file mode 100644
index 0000000000..02d852eab7
--- /dev/null
+++ b/deepmd/pd/model/task/__init__.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .base_fitting import (
+    BaseFitting,
+)
+from .denoise import (
+    DenoiseNet,
+)
+from .dipole import (
+    DipoleFittingNet,
+)
+from .dos import (
+    DOSFittingNet,
+)
+from .ener import (
+    EnergyFittingNet,
+    EnergyFittingNetDirect,
+)
+from .fitting import (
+    Fitting,
+)
+from .polarizability import (
+    PolarFittingNet,
+)
+from .property import (
+    PropertyFittingNet,
+)
+from .type_predict import (
+    TypePredictNet,
+)
+
+__all__ = [
+    "DenoiseNet",
+    "DipoleFittingNet",
+    "EnergyFittingNet",
+    "EnergyFittingNetDirect",
+    "Fitting",
+    "BaseFitting",
+    "TypePredictNet",
+    "PolarFittingNet",
+    "DOSFittingNet",
+    "PropertyFittingNet",
+]
diff --git a/deepmd/pd/model/task/base_fitting.py b/deepmd/pd/model/task/base_fitting.py
new file mode 100644
index 0000000000..9ad3b801cd
--- /dev/null
+++ b/deepmd/pd/model/task/base_fitting.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+
+from deepmd.dpmodel.fitting import (
+    make_base_fitting,
+)
+
+BaseFitting = make_base_fitting(paddle.Tensor, fwd_method_name="forward")
diff --git a/deepmd/pd/model/task/denoise.py b/deepmd/pd/model/task/denoise.py
new file mode 100644
index 0000000000..d1fca089f1
--- /dev/null
+++ b/deepmd/pd/model/task/denoise.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pd.model.network.network import (
+    MaskLMHead,
+    NonLinearHead,
+)
+from deepmd.pd.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+
+@fitting_check_output
+class DenoiseNet(Fitting):
+    def __init__(
+        self,
+        feature_dim,
+        ntypes,
+        attn_head=8,
+        prefactor=[0.5, 0.5],
+        activation_function="gelu",
+        **kwargs,
+    ):
+        """Construct a denoise net.
+
+        Args:
+        - ntypes: Element count.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the fitting net.
+        - bias_atom_e: Average enery per atom for each element.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.ntypes = ntypes
+        self.attn_head = attn_head
+        self.prefactor = paddle.to_tensor(
+            prefactor, dtype=env.GLOBAL_PD_FLOAT_PRECISION, device=env.DEVICE
+        )
+
+        self.lm_head = MaskLMHead(
+            embed_dim=self.feature_dim,
+            output_dim=ntypes,
+            activation_fn=activation_function,
+            weight=None,
+        )
+
+        if not isinstance(self.attn_head, list):
+            self.pair2coord_proj = NonLinearHead(
+                self.attn_head, 1, activation_fn=activation_function
+            )
+        else:
+            self.pair2coord_proj = []
+            self.ndescriptor = len(self.attn_head)
+            for ii in range(self.ndescriptor):
+                _pair2coord_proj = NonLinearHead(
+                    self.attn_head[ii], 1, activation_fn=activation_function
+                )
+                self.pair2coord_proj.append(_pair2coord_proj)
+            self.pair2coord_proj = paddle.nn.LayerList(self.pair2coord_proj)
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "updated_coord",
+                    [3],
+                    reducible=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+                OutputVariableDef(
+                    "logits",
+                    [-1],
+                    reducible=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        pair_weights,
+        diff,
+        nlist_mask,
+        features,
+        sw,
+        masked_tokens: Optional[paddle.Tensor] = None,
+    ):
+        """Calculate the updated coord.
+        Args:
+        - coord: Input noisy coord with shape [nframes, nloc, 3].
+        - pair_weights: Input pair weights with shape [nframes, nloc, nnei, head].
+        - diff: Input pair relative coord list with shape [nframes, nloc, nnei, 3].
+        - nlist_mask: Input nlist mask with shape [nframes, nloc, nnei].
+
+        Returns
+        -------
+        - denoised_coord: Denoised updated coord with shape [nframes, nloc, 3].
+        """
+        # [nframes, nloc, nnei, 1]
+        logits = self.lm_head(features, masked_tokens=masked_tokens)
+        if not isinstance(self.attn_head, list):
+            attn_probs = self.pair2coord_proj(pair_weights)
+            out_coord = (attn_probs * diff).sum(axis=-2) / (
+                sw.sum(axis=-1).unsqueeze(-1) + 1e-6
+            )
+        else:
+            assert len(self.prefactor) == self.ndescriptor
+            all_coord_update = []
+            assert len(pair_weights) == len(diff) == len(nlist_mask) == self.ndescriptor
+            for ii in range(self.ndescriptor):
+                _attn_probs = self.pair2coord_proj[ii](pair_weights[ii])
+                _coord_update = (_attn_probs * diff[ii]).sum(axis=-2) / (
+                    nlist_mask[ii].sum(axis=-1).unsqueeze(-1) + 1e-6
+                )
+                all_coord_update.append(_coord_update)
+            out_coord = self.prefactor[0] * all_coord_update[0]
+            for ii in range(self.ndescriptor - 1):
+                out_coord += self.prefactor[ii + 1] * all_coord_update[ii + 1]
+        return {
+            "updated_coord": out_coord,
+            "logits": logits,
+        }
diff --git a/deepmd/pd/model/task/dipole.py b/deepmd/pd/model/task/dipole.py
new file mode 100644
index 0000000000..42080761be
--- /dev/null
+++ b/deepmd/pd/model/task/dipole.py
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("dipole")
+class DipoleFittingNet(GeneralFitting):
+    """Construct a dipole fitting net.
+
+    Parameters
+    ----------
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron : list[int]
+        Number of neurons in each hidden layers of the fitting net.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    r_differentiable
+        If the variable is differentiated with respect to coordinates of atoms.
+        Only reducible variable are differentiable.
+    c_differentiable
+        If the variable is differentiated with respect to the cell tensor (pbc case).
+        Only reducible variable are differentiable.
+    type_map: list[str], Optional
+        A list of strings. Give the name to each type of atoms.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: list[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        r_differentiable: bool = True,
+        c_differentiable: bool = True,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        self.embedding_width = embedding_width
+        self.r_differentiable = r_differentiable
+        self.c_differentiable = c_differentiable
+        super().__init__(
+            var_name="dipole",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            type_map=type_map,
+            **kwargs,
+        )
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.embedding_width
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "dipole"
+        data["embedding_width"] = self.embedding_width
+        data["r_differentiable"] = self.r_differentiable
+        data["c_differentiable"] = self.c_differentiable
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("var_name", None)
+        return super().deserialize(data)
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [3],
+                    reducible=True,
+                    r_differentiable=self.r_differentiable,
+                    c_differentiable=self.c_differentiable,
+                ),
+            ]
+        )
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        pass
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nframes, nloc, _ = descriptor.shape
+        assert gr is not None, "Must provide the rotation matrix for dipole fitting."
+        # (nframes, nloc, m1)
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        # (nframes * nloc, 1, m1)
+        out = out.reshape([-1, 1, self.embedding_width])
+        # (nframes * nloc, m1, 3)
+        gr = gr.reshape([nframes * nloc, self.embedding_width, 3])
+        # (nframes, nloc, 3)
+        out = paddle.bmm(out, gr).squeeze(-2).reshape([nframes, nloc, 3])
+        return {self.var_name: out.to(env.GLOBAL_PD_FLOAT_PRECISION)}
+
+    # make jit happy with paddle 2.0.0
+    exclude_types: list[int]
diff --git a/deepmd/pd/model/task/dos.py b/deepmd/pd/model/task/dos.py
new file mode 100644
index 0000000000..35ce8cc16a
--- /dev/null
+++ b/deepmd/pd/model/task/dos.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("dos")
+class DOSFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        numb_dos: int = 300,
+        neuron: list[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        bias_dos: Optional[paddle.Tensor] = None,
+        trainable: Union[bool, list[bool]] = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        exclude_types: list[int] = [],
+        mixed_types: bool = True,
+        type_map: Optional[list[str]] = None,
+    ):
+        super().__init__(
+            var_name="dos",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=numb_dos,
+            neuron=neuron,
+            bias_atom_e=bias_dos,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            trainable=trainable,
+            type_map=type_map,
+        )
+        if bias_dos is not None:
+            self.bias_dos = bias_dos
+        else:
+            self.bias_dos = paddle.zeros((ntypes, numb_dos), dtype=dtype).to(
+                device=env.DEVICE
+            )
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reducible=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DOSFittingNet":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("@class", None)
+        data.pop("var_name", None)
+        data.pop("tot_ener_zero", None)
+        data.pop("layer_name", None)
+        data.pop("use_aparam_as_mask", None)
+        data.pop("spin", None)
+        data.pop("atom_ener", None)
+        data["numb_dos"] = data.pop("dim_out")
+        obj = super().deserialize(data)
+
+        return obj
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        # dd = super(InvarFitting, self).serialize()
+        dd = {
+            **InvarFitting.serialize(self),
+            "type": "dos",
+            "dim_out": self.dim_out,
+        }
+        dd["@variables"]["bias_atom_e"] = to_numpy_array(self.bias_atom_e)
+
+        return dd
+
+    # make jit happy with paddle 2.0.0
+    exclude_types: list[int]
diff --git a/deepmd/pd/model/task/ener.py b/deepmd/pd/model/task/ener.py
new file mode 100644
index 0000000000..24f563f799
--- /dev/null
+++ b/deepmd/pd/model/task/ener.py
@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pd.model.network.network import (
+    ResidualDeep,
+)
+from deepmd.pd.model.task.fitting import (
+    Fitting,
+    GeneralFitting,
+)
+from deepmd.pd.model.task.invar_fitting import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("ener")
+class EnergyFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: list[int] = [128, 128, 128],
+        bias_atom_e: Optional[paddle.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            "energy",
+            ntypes,
+            dim_descrpt,
+            1,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            seed=seed,
+            type_map=type_map,
+            **kwargs,
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("var_name")
+        data.pop("dim_out")
+        return super().deserialize(data)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            **super().serialize(),
+            "type": "ener",
+        }
+
+    # make jit happy with paddle 2.0.0
+    exclude_types: list[int]
+
+
+@Fitting.register("direct_force")
+@Fitting.register("direct_force_ener")
+@fitting_check_output
+class EnergyFittingNetDirect(Fitting):
+    def __init__(
+        self,
+        ntypes,
+        dim_descrpt,
+        neuron,
+        bias_atom_e=None,
+        out_dim=1,
+        resnet_dt=True,
+        use_tebd=True,
+        return_energy=False,
+        **kwargs,
+    ):
+        """Construct a fitting net for energy.
+
+        Args:
+        - ntypes: Element count.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the fitting net.
+        - bias_atom_e: Average enery per atom for each element.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.use_tebd = use_tebd
+        self.out_dim = out_dim
+        if bias_atom_e is None:
+            bias_atom_e = np.zeros([self.ntypes])  # pylint: disable=no-explicit-dtype
+        if not use_tebd:
+            assert self.ntypes == len(bias_atom_e), "Element count mismatches!"
+        bias_atom_e = paddle.to_tensor(bias_atom_e).to(device=env.DEVICE)  # pylint: disable=no-explicit-dtype
+        self.register_buffer("bias_atom_e", bias_atom_e)
+
+        filter_layers_dipole = []
+        for type_i in range(self.ntypes):
+            one = ResidualDeep(
+                type_i,
+                dim_descrpt,
+                neuron,
+                0.0,
+                out_dim=out_dim,
+                resnet_dt=resnet_dt,
+            )
+            filter_layers_dipole.append(one)
+        self.filter_layers_dipole = paddle.nn.LayerList(filter_layers_dipole)
+
+        self.return_energy = return_energy
+        filter_layers = []
+        if self.return_energy:
+            for type_i in range(self.ntypes):
+                bias_type = 0.0 if self.use_tebd else bias_atom_e[type_i]
+                one = ResidualDeep(
+                    type_i, dim_descrpt, neuron, bias_type, resnet_dt=resnet_dt
+                )
+                filter_layers.append(one)
+        self.filter_layers = paddle.nn.LayerList(filter_layers)
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reducible=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+                OutputVariableDef(
+                    "dforce",
+                    [3],
+                    reducible=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def deserialize(self) -> "EnergyFittingNetDirect":
+        raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        raise NotImplementedError
+
+    def get_type_map(self) -> list[str]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        inputs: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ) -> tuple[paddle.Tensor, None]:
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `paddle.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        nframes, nloc, _ = inputs.shape
+        if self.use_tebd:
+            # if atype_tebd is not None:
+            #     inputs = paddle.concat([inputs, atype_tebd], axis=-1)
+            vec_out = self.filter_layers_dipole[0](
+                inputs
+            )  # Shape is [nframes, nloc, m1]
+            assert list(vec_out.shape) == [nframes, nloc, self.out_dim]
+            # (nf x nloc) x 1 x od
+            vec_out = vec_out.reshape([-1, 1, self.out_dim])
+            assert gr is not None
+            # (nf x nloc) x od x 3
+            gr = gr.reshape([-1, self.out_dim, 3])
+            vec_out = (
+                paddle.bmm(vec_out, gr).squeeze(-2).reshape([nframes, nloc, 3])
+            )  # Shape is [nframes, nloc, 3]
+        else:
+            vec_out = paddle.zeros_like(atype).unsqueeze(-1)  # jit assertion
+            for type_i, filter_layer in enumerate(self.filter_layers_dipole):
+                mask = atype == type_i
+                vec_out_type = filter_layer(inputs)  # Shape is [nframes, nloc, m1]
+                vec_out_type = vec_out_type * mask.unsqueeze(-1)
+                vec_out = vec_out + vec_out_type  # Shape is [nframes, natoms[0], 1]
+
+        outs = paddle.zeros_like(atype).unsqueeze(-1)  # jit assertion
+        if self.return_energy:
+            if self.use_tebd:
+                atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[
+                    atype
+                ].unsqueeze(-1)
+                outs = (
+                    outs.astype(atom_energy.dtype) + atom_energy
+                )  # Shape is [nframes, natoms[0], 1]
+            else:
+                for type_i, filter_layer in enumerate(self.filter_layers):
+                    mask = atype == type_i
+                    atom_energy = filter_layer(inputs)
+                    if not env.ENERGY_BIAS_TRAINABLE:
+                        atom_energy = atom_energy + self.bias_atom_e[type_i]
+                    atom_energy = atom_energy * mask.unsqueeze(-1)
+                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+        return {
+            "energy": outs.to(env.GLOBAL_PD_FLOAT_PRECISION),
+            "dforce": vec_out,
+        }
diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py
new file mode 100644
index 0000000000..63a6ff682e
--- /dev/null
+++ b/deepmd/pd/model/task/fitting.py
@@ -0,0 +1,499 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from abc import (
+    abstractmethod,
+)
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.mlp import (
+    FittingNet,
+    NetworkCollection,
+)
+from deepmd.pd.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.exclude_mask import (
+    AtomExcludeMask,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_atom_exclude_types,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+class Fitting(paddle.nn.Layer, BaseFitting):
+    # plugin moved to BaseFitting
+
+    def __new__(cls, *args, **kwargs):
+        if cls is Fitting:
+            return BaseFitting.__new__(BaseFitting, *args, **kwargs)
+        return super().__new__(cls)
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only fitting nets of the same type can share params!"
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "bias_atom_e"):
+                self.bias_atom_e = base_class.bias_atom_e
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._sub_layers:
+                self._sub_layers[item] = base_class._sub_layers[item]
+        elif shared_level == 1:
+            # only not share the bias_atom_e
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._sub_layers:
+                self._sub_layers[item] = base_class._sub_layers[item]
+        else:
+            raise NotImplementedError
+
+
+class GeneralFitting(Fitting):
+    """Construct a general fitting net.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'energy', 'dipole', and 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    dim_out : int
+        The output dimension of the fitting net.
+    neuron : list[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_e : paddle.Tensor, optional
+        Average enery per atom for each element.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    exclude_types: list[int]
+        Atomic contributions of the excluded atom types are set zero.
+    trainable : Union[list[bool], bool]
+        If the parameters in the fitting net are trainable.
+        Now this only supports setting all the parameters in the fitting net at one state.
+        When in list[bool], the trainable will be True only if all the boolean parameters are True.
+    remove_vaccum_contribution: list[bool], optional
+        Remove vaccum contribution before the bias is added. The list assigned each
+        type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
+        length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
+    type_map: list[str], Optional
+        A list of strings. Give the name to each type of atoms.
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: list[int] = [128, 128, 128],
+        bias_atom_e: Optional[paddle.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        trainable: Union[bool, list[bool]] = True,
+        remove_vaccum_contribution: Optional[list[bool]] = None,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.var_name = var_name
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.neuron = neuron
+        self.mixed_types = mixed_types
+        self.resnet_dt = resnet_dt
+        self.numb_fparam = numb_fparam
+        self.numb_aparam = numb_aparam
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.rcond = rcond
+        self.seed = seed
+        self.type_map = type_map
+        # order matters, should be place after the assignment of ntypes
+        self.reinit_exclude(exclude_types)
+        self.trainable = trainable
+        # need support for each layer settings
+        self.trainable = (
+            all(self.trainable) if isinstance(self.trainable, list) else self.trainable
+        )
+        self.remove_vaccum_contribution = remove_vaccum_contribution
+
+        net_dim_out = self._net_out_dim()
+        # init constants
+        if bias_atom_e is None:
+            bias_atom_e = np.zeros([self.ntypes, net_dim_out], dtype=np.float64)
+        bias_atom_e = paddle.to_tensor(bias_atom_e, dtype=self.prec).to(device=device)
+        bias_atom_e = bias_atom_e.reshape([self.ntypes, net_dim_out])
+        if not self.mixed_types:
+            assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!"
+        self.register_buffer("bias_atom_e", bias_atom_e)
+
+        if self.numb_fparam > 0:
+            self.register_buffer(
+                "fparam_avg",
+                paddle.zeros([self.numb_fparam], dtype=self.prec).to(device=device),
+            )
+            self.register_buffer(
+                "fparam_inv_std",
+                paddle.ones([self.numb_fparam], dtype=self.prec).to(device=device),
+            )
+        else:
+            self.fparam_avg, self.fparam_inv_std = None, None
+        if self.numb_aparam > 0:
+            self.register_buffer(
+                "aparam_avg",
+                paddle.zeros([self.numb_aparam], dtype=self.prec).to(device=device),
+            )
+            self.register_buffer(
+                "aparam_inv_std",
+                paddle.ones([self.numb_aparam], dtype=self.prec).to(device=device),
+            )
+        else:
+            self.aparam_avg, self.aparam_inv_std = None, None
+
+        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+
+        self.filter_layers = NetworkCollection(
+            1 if not self.mixed_types else 0,
+            self.ntypes,
+            network_type="fitting_network",
+            networks=[
+                FittingNet(
+                    in_dim,
+                    net_dim_out,
+                    self.neuron,
+                    self.activation_function,
+                    self.resnet_dt,
+                    self.precision,
+                    bias_out=True,
+                    seed=child_seed(self.seed, ii),
+                )
+                for ii in range(self.ntypes if not self.mixed_types else 1)
+            ],
+        )
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not self.trainable
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[int] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = AtomExcludeMask(self.ntypes, self.exclude_types)
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        assert self.mixed_types, "Only models in mixed types can perform type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+        self.reinit_exclude(map_atom_exclude_types(self.exclude_types, remap_index))
+        if has_new_type:
+            extend_shape = [len(type_map), *list(self.bias_atom_e.shape[1:])]
+            extend_bias_atom_e = paddle.zeros(
+                extend_shape,
+                dtype=self.bias_atom_e.dtype,
+            ).to(device=self.bias_atom_e.place)
+            self.bias_atom_e = paddle.concat(
+                [self.bias_atom_e, extend_bias_atom_e], axis=0
+            )
+        self.bias_atom_e = self.bias_atom_e[remap_index]
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            "@class": "Fitting",
+            "@version": 2,
+            "var_name": self.var_name,
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "mixed_types": self.mixed_types,
+            "nets": self.filter_layers.serialize(),
+            "rcond": self.rcond,
+            "exclude_types": self.exclude_types,
+            "@variables": {
+                "bias_atom_e": to_numpy_array(self.bias_atom_e),
+                "fparam_avg": to_numpy_array(self.fparam_avg),
+                "fparam_inv_std": to_numpy_array(self.fparam_inv_std),
+                "aparam_avg": to_numpy_array(self.aparam_avg),
+                "aparam_inv_std": to_numpy_array(self.aparam_inv_std),
+            },
+            "type_map": self.type_map,
+            # "tot_ener_zero": self.tot_ener_zero ,
+            # "trainable": self.trainable ,
+            # "atom_ener": self.atom_ener ,
+            # "layer_name": self.layer_name ,
+            # "use_aparam_as_mask": self.use_aparam_as_mask ,
+            # "spin": self.spin ,
+            ## NOTICE:  not supported by far
+            "tot_ener_zero": False,
+            "trainable": [self.trainable] * (len(self.neuron) + 1),
+            "layer_name": None,
+            "use_aparam_as_mask": False,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables")
+        nets = data.pop("nets")
+        obj = cls(**data)
+        for kk in variables.keys():
+            obj[kk] = to_paddle_tensor(variables[kk])
+        obj.filter_layers = NetworkCollection.deserialize(nets)
+        return obj
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.numb_fparam
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.numb_aparam
+
+    # make jit happy
+    exclude_types: list[int]
+
+    def get_sel_type(self) -> list[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        # make jit happy
+        sel_type: list[int] = []
+        for ii in range(self.ntypes):
+            if ii not in self.exclude_types:
+                sel_type.append(ii)
+        return sel_type
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def __setitem__(self, key, value):
+        if key in ["bias_atom_e"]:
+            value = value.reshape([self.ntypes, self._net_out_dim()])
+            self.bias_atom_e = value
+        elif key in ["fparam_avg"]:
+            self.fparam_avg = value
+        elif key in ["fparam_inv_std"]:
+            self.fparam_inv_std = value
+        elif key in ["aparam_avg"]:
+            self.aparam_avg = value
+        elif key in ["aparam_inv_std"]:
+            self.aparam_inv_std = value
+        elif key in ["scale"]:
+            self.scale = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["bias_atom_e"]:
+            return self.bias_atom_e
+        elif key in ["fparam_avg"]:
+            return self.fparam_avg
+        elif key in ["fparam_inv_std"]:
+            return self.fparam_inv_std
+        elif key in ["aparam_avg"]:
+            return self.aparam_avg
+        elif key in ["aparam_inv_std"]:
+            return self.aparam_inv_std
+        elif key in ["scale"]:
+            return self.scale
+        else:
+            raise KeyError(key)
+
+    @abstractmethod
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        pass
+
+    def _extend_f_avg_std(self, xx: paddle.Tensor, nb: int) -> paddle.Tensor:
+        return paddle.tile(xx.reshape([1, self.numb_fparam]), [nb, 1])
+
+    def _extend_a_avg_std(self, xx: paddle.Tensor, nb: int, nloc: int) -> paddle.Tensor:
+        return paddle.tile(xx.reshape([1, 1, self.numb_aparam]), [nb, nloc, 1])
+
+    def _forward_common(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        xx = descriptor
+        if self.remove_vaccum_contribution is not None:
+            # TODO: compute the input for vaccm when remove_vaccum_contribution is set
+            # Idealy, the input for vaccum should be computed;
+            # we consider it as always zero for convenience.
+            # Needs a compute_input_stats for vaccum passed from the
+            # descriptor.
+            xx_zeros = paddle.zeros_like(xx)
+        else:
+            xx_zeros = None
+        nf, nloc, nd = xx.shape
+        net_dim_out = self._net_out_dim()
+
+        if nd != self.dim_descrpt:
+            raise ValueError(
+                "get an input descriptor of dim {nd},"
+                "which is not consistent with {self.dim_descrpt}."
+            )
+        # check fparam dim, concate to input descriptor
+        if self.numb_fparam > 0:
+            assert fparam is not None, "fparam should not be None"
+            assert self.fparam_avg is not None
+            assert self.fparam_inv_std is not None
+            if fparam.shape[-1] != self.numb_fparam:
+                raise ValueError(
+                    "get an input fparam of dim {fparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_fparam}.",
+                )
+            fparam = fparam.reshape([nf, self.numb_fparam])
+            nb, _ = fparam.shape
+            t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb)
+            t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb)
+            fparam = (fparam - t_fparam_avg) * t_fparam_inv_std
+            fparam = paddle.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
+            xx = paddle.concat(
+                [xx, fparam],
+                axis=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = paddle.concat(
+                    [xx_zeros, fparam],
+                    axis=-1,
+                )
+        # check aparam dim, concate to input descriptor
+        if self.numb_aparam > 0:
+            assert aparam is not None, "aparam should not be None"
+            assert self.aparam_avg is not None
+            assert self.aparam_inv_std is not None
+            if aparam.shape[-1] != self.numb_aparam:
+                raise ValueError(
+                    f"get an input aparam of dim {aparam.shape[-1]}, ",
+                    f"which is not consistent with {self.numb_aparam}.",
+                )
+            aparam = aparam.reshape([nf, -1, self.numb_aparam])
+            nb, nloc, _ = aparam.shape
+            t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc)
+            t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc)
+            aparam = (aparam - t_aparam_avg) * t_aparam_inv_std
+            xx = paddle.concat(
+                [xx, aparam],
+                axis=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = paddle.concat(
+                    [xx_zeros, aparam],
+                    axis=-1,
+                )
+
+        outs = paddle.zeros(
+            (nf, nloc, net_dim_out),
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        ).to(device=descriptor.place)  # jit assertion
+        if self.mixed_types:
+            atom_property = self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
+            if xx_zeros is not None:
+                atom_property -= self.filter_layers.networks[0](xx_zeros)
+            outs = outs + atom_property  # Shape is [nframes, natoms[0], net_dim_out]
+        else:
+            for type_i, ll in enumerate(self.filter_layers.networks):
+                mask = (atype == type_i).unsqueeze(-1)
+                mask.stop_gradient = True
+                mask = paddle.tile(mask, (1, 1, net_dim_out))
+                atom_property = ll(xx)
+                if xx_zeros is not None:
+                    # must assert, otherwise jit is not happy
+                    assert self.remove_vaccum_contribution is not None
+                    if not (
+                        len(self.remove_vaccum_contribution) > type_i
+                        and not self.remove_vaccum_contribution[type_i]
+                    ):
+                        atom_property -= ll(xx_zeros)
+                atom_property = atom_property + self.bias_atom_e[type_i]
+                atom_property = atom_property * mask.astype(atom_property.dtype)
+                outs = (
+                    outs + atom_property
+                )  # Shape is [nframes, natoms[0], net_dim_out]
+        # nf x nloc
+        mask = self.emask(atype)
+        # nf x nloc x nod
+        outs = outs * mask[:, :, None].astype(outs.dtype)
+        return {self.var_name: outs.astype(env.GLOBAL_PD_FLOAT_PRECISION)}
diff --git a/deepmd/pd/model/task/invar_fitting.py b/deepmd/pd/model/task/invar_fitting.py
new file mode 100644
index 0000000000..5a6cad7c2d
--- /dev/null
+++ b/deepmd/pd/model/task/invar_fitting.py
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pd.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("invar")
+@fitting_check_output
+class InvarFitting(GeneralFitting):
+    """Construct a fitting net for energy.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'energy', 'dipole', and 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    dim_out : int
+        The output dimension of the fitting net.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_e : paddle.Tensor, optional
+        Average enery per atom for each element.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    exclude_types: List[int]
+        Atomic contributions of the excluded atom types are set zero.
+    atom_ener: List[Optional[paddle.Tensor]], optional
+        Specifying atomic energy contribution in vacuum.
+        The value is a list specifying the bias. the elements can be None or np.array of output shape.
+        For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
+        The `set_davg_zero` key in the descrptor should be set.
+    type_map: List[str], Optional
+        A list of strings. Give the name to each type of atoms.
+
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: list[int] = [128, 128, 128],
+        bias_atom_e: Optional[paddle.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        atom_ener: Optional[list[Optional[paddle.Tensor]]] = None,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        self.dim_out = dim_out
+        self.atom_ener = atom_ener
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            remove_vaccum_contribution=None
+            if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
+            else [x is not None for x in atom_ener],
+            type_map=type_map,
+            **kwargs,
+        )
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.dim_out
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "invar"
+        data["dim_out"] = self.dim_out
+        data["atom_ener"] = self.atom_ener
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        return super().deserialize(data)
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `paddle.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
+
+    # make jit happy with paddle 2.0.0
+    exclude_types: list[int]
diff --git a/deepmd/pd/model/task/polarizability.py b/deepmd/pd/model/task/polarizability.py
new file mode 100644
index 0000000000..ab13b51076
--- /dev/null
+++ b/deepmd/pd/model/task/polarizability.py
@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("polar")
+class PolarFittingNet(GeneralFitting):
+    """Construct a polar fitting net.
+
+    Parameters
+    ----------
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron : list[int]
+        Number of neurons in each hidden layers of the fitting net.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    fit_diag : bool
+        Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to
+        normal polarizability matrix by contracting with the rotation matrix.
+    scale : list[float]
+        The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
+    shift_diag : bool
+        Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
+    type_map: list[str], Optional
+        A list of strings. Give the name to each type of atoms.
+
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: list[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[Union[int, list[int]]] = None,
+        exclude_types: list[int] = [],
+        fit_diag: bool = True,
+        scale: Optional[Union[list[float], float]] = None,
+        shift_diag: bool = True,
+        type_map: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        self.embedding_width = embedding_width
+        self.fit_diag = fit_diag
+        self.scale = scale
+        if self.scale is None:
+            self.scale = [1.0 for _ in range(ntypes)]
+        else:
+            if isinstance(self.scale, list):
+                assert (
+                    len(self.scale) == ntypes
+                ), "Scale should be a list of length ntypes."
+            elif isinstance(self.scale, float):
+                self.scale = [self.scale for _ in range(ntypes)]
+            else:
+                raise ValueError(
+                    "Scale must be a list of float of length ntypes or a float."
+                )
+        super().__init__(
+            var_name="polar",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            type_map=type_map,
+            **kwargs,
+        )
+        self.scale = paddle.to_tensor(
+            self.scale, dtype=env.GLOBAL_PD_FLOAT_PRECISION, place=env.DEVICE
+        ).reshape([ntypes, 1])
+        self.shift_diag = shift_diag
+        self.constant_matrix = paddle.zeros(
+            [ntypes], dtype=env.GLOBAL_PD_FLOAT_PRECISION
+        ).to(device=env.DEVICE)
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return (
+            self.embedding_width
+            if self.fit_diag
+            else self.embedding_width * self.embedding_width
+        )
+
+    def __setitem__(self, key, value):
+        if key in ["constant_matrix"]:
+            self.constant_matrix = value
+        else:
+            super().__setitem__(key, value)
+
+    def __getitem__(self, key):
+        if key in ["constant_matrix"]:
+            return self.constant_matrix
+        else:
+            return super().__getitem__(key)
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        assert self.mixed_types, "Only models in mixed types can perform type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        super().change_type_map(type_map=type_map)
+        if has_new_type:
+            extend_shape = [len(type_map), *list(self.scale.shape[1:])]
+            extend_scale = paddle.ones(
+                extend_shape, dtype=self.scale.dtype, device=self.scale.place
+            )
+            self.scale = paddle.concat([self.scale, extend_scale], axis=0)
+            extend_shape = [len(type_map), *list(self.constant_matrix.shape[1:])]
+            extend_constant_matrix = paddle.zeros(
+                extend_shape,
+                dtype=self.constant_matrix.dtype,
+            ).to(device=self.constant_matrix.place)
+            self.constant_matrix = paddle.concat(
+                [self.constant_matrix, extend_constant_matrix], axis=0
+            )
+        self.scale = self.scale[remap_index]
+        self.constant_matrix = self.constant_matrix[remap_index]
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "polar"
+        data["@version"] = 3
+        data["embedding_width"] = self.embedding_width
+        data["fit_diag"] = self.fit_diag
+        data["shift_diag"] = self.shift_diag
+        data["@variables"]["scale"] = to_numpy_array(self.scale)
+        data["@variables"]["constant_matrix"] = to_numpy_array(self.constant_matrix)
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        data.pop("var_name", None)
+        return super().deserialize(data)
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "polarizability",
+                    [3, 3],
+                    reducible=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nframes, nloc, _ = descriptor.shape
+        assert (
+            gr is not None
+        ), "Must provide the rotation matrix for polarizability fitting."
+        # (nframes, nloc, _net_out_dim)
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        out = out * (self.scale.to(atype.place))[atype]
+        gr = gr.reshape(
+            [nframes * nloc, self.embedding_width, 3]
+        )  # (nframes * nloc, m1, 3)
+
+        if self.fit_diag:
+            out = out.reshape([-1, self.embedding_width])
+            out = paddle.einsum("ij,ijk->ijk", out, gr)
+        else:
+            out = out.reshape([-1, self.embedding_width, self.embedding_width])
+            out = (out + out.transpose([0, 2, 1])) / 2
+            out = paddle.einsum("bim,bmj->bij", out, gr)  # (nframes * nloc, m1, 3)
+        out = paddle.einsum(
+            "bim,bmj->bij", gr.transpose([0, 2, 1]), out
+        )  # (nframes * nloc, 3, 3)
+        out = out.reshape([nframes, nloc, 3, 3])
+        return {"polarizability": out.to(env.GLOBAL_PD_FLOAT_PRECISION)}
+
+    # make jit happy with paddle 2.0.0
+    exclude_types: list[int]
diff --git a/deepmd/pd/model/task/property.py b/deepmd/pd/model/task/property.py
new file mode 100644
index 0000000000..0d95d24a47
--- /dev/null
+++ b/deepmd/pd/model/task/property.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("property")
+class PropertyFittingNet(InvarFitting):
+    """Fitting the rotationally invariant porperties of `task_dim` of the system.
+
+    Parameters
+    ----------
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    task_dim : int
+            The dimension of outputs of fitting net.
+    neuron : list[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_p : paddle.Tensor, optional
+        Average property per atom for each element.
+    intensive : bool, optional
+        Whether the fitting property is intensive.
+    bias_method : str, optional
+        The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'.
+        If 'normal' is used, the computed bias will be added to the atomic output.
+        If 'no_bias' is used, no bias will be added to the atomic output.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    seed : int, optional
+        Random seed.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        task_dim: int = 1,
+        neuron: list[int] = [128, 128, 128],
+        bias_atom_p: Optional[paddle.Tensor] = None,
+        intensive: bool = False,
+        bias_method: str = "normal",
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        seed: Optional[int] = None,
+        **kwargs,
+    ):
+        self.task_dim = task_dim
+        self.intensive = intensive
+        self.bias_method = bias_method
+        super().__init__(
+            var_name="property",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=task_dim,
+            neuron=neuron,
+            bias_atom_e=bias_atom_p,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            seed=seed,
+            **kwargs,
+        )
+
+    def get_bias_method(self) -> str:
+        return self.bias_method
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reducible=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                    intensive=self.intensive,
+                ),
+            ]
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "PropertyFittingNet":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("dim_out")
+        data.pop("var_name")
+        obj = super().deserialize(data)
+
+        return obj
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        dd = {
+            **InvarFitting.serialize(self),
+            "type": "property",
+            "task_dim": self.task_dim,
+        }
+
+        return dd
+
+    exclude_types: list[int]
diff --git a/deepmd/pd/model/task/task.py b/deepmd/pd/model/task/task.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pd/model/task/task.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pd/model/task/type_predict.py b/deepmd/pd/model/task/type_predict.py
new file mode 100644
index 0000000000..241d4837d5
--- /dev/null
+++ b/deepmd/pd/model/task/type_predict.py
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import paddle
+
+from deepmd.pd.model.network.network import (
+    MaskLMHead,
+)
+from deepmd.pd.model.task import (
+    Fitting,
+)
+
+
+class TypePredictNet(Fitting):
+    def __init__(self, feature_dim, ntypes, activation_function="gelu", **kwargs):
+        """Construct a type predict net.
+
+        Args:
+        - feature_dim: Input dm.
+        - ntypes: Numer of types to predict.
+        - activation_function: Activate function.
+        """
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.ntypes = ntypes
+        self.lm_head = MaskLMHead(
+            embed_dim=self.feature_dim,
+            output_dim=ntypes,
+            activation_fn=activation_function,
+            weight=None,
+        )
+
+    def forward(self, features, masked_tokens: Optional[paddle.Tensor] = None):
+        """Calculate the predicted logits.
+        Args:
+        - features: Input features with shape [nframes, nloc, feature_dim].
+        - masked_tokens: Input masked tokens with shape [nframes, nloc].
+
+        Returns
+        -------
+        - logits: Predicted probs with shape [nframes, nloc, ntypes].
+        """
+        # [nframes, nloc, ntypes]
+        logits = self.lm_head(features, masked_tokens=masked_tokens)
+        return logits
diff --git a/deepmd/pd/optimizer/KFWrapper.py b/deepmd/pd/optimizer/KFWrapper.py
new file mode 100644
index 0000000000..32838fc389
--- /dev/null
+++ b/deepmd/pd/optimizer/KFWrapper.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import math
+
+import numpy as np
+import paddle
+import paddle.distributed as dist
+import paddle.nn as nn
+from paddle.optimizer import (
+    Optimizer,
+)
+
+
+def _mask_update(tensor: paddle.Tensor, mask: paddle.Tensor, value: paddle.Tensor):
+    """
+    Paddle now not do not support updating a Tensor with another Tensor by mask,
+    so we use other API to achieve this.
+    """
+    mask_coord = paddle.concat(
+        paddle.nonzero(mask, as_tuple=True),
+        axis=1,
+    )
+    t = paddle.scatter_nd_add(
+        tensor * (~mask).astype(tensor.dtype),
+        mask_coord,
+        value,
+    )
+    paddle.assign(t, tensor)  # inplace update
+    return tensor
+
+
+class KFOptimizerWrapper:
+    def __init__(
+        self,
+        model: nn.Layer,
+        optimizer: Optimizer,
+        atoms_selected: int,
+        atoms_per_group: int,
+        is_distributed: bool = False,
+    ) -> None:
+        self.model = model
+        self.optimizer = optimizer
+        self.atoms_selected = atoms_selected  # 24
+        self.atoms_per_group = atoms_per_group  # 6
+        self.is_distributed = is_distributed
+
+    def update_energy(
+        self, inputs: dict, Etot_label: paddle.Tensor, update_prefactor: float = 1
+    ) -> None:
+        model_pred, _, _ = self.model(**inputs, inference_only=True)
+        Etot_predict = model_pred["energy"]
+        natoms_sum = int(inputs["atype"].shape[-1])
+        self.optimizer.set_grad_prefactor(natoms_sum)
+
+        self.optimizer.clear_grad()
+        bs = Etot_label.shape[0]
+        error = Etot_label - Etot_predict
+        error = error / natoms_sum
+        mask = error < 0
+
+        error = error * update_prefactor
+        # error[mask] = -1 * error[mask]
+        error = _mask_update(error, mask, -error[mask])
+        error = error.mean()
+
+        if self.is_distributed:
+            dist.all_reduce(error)
+            error /= dist.get_world_size()
+
+        Etot_predict = update_prefactor * Etot_predict
+        # Etot_predict[mask] = -Etot_predict[mask]
+        Etot_predict = _mask_update(Etot_predict, mask, -Etot_predict[mask])
+
+        Etot_predict.sum().backward()
+        error = error * math.sqrt(bs)
+        self.optimizer.step(error)
+        return Etot_predict
+
+    def update_force(
+        self, inputs: dict, Force_label: paddle.Tensor, update_prefactor: float = 1
+    ) -> None:
+        natoms_sum = int(inputs["atype"].shape[-1])
+        bs = Force_label.shape[0]
+        self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3)
+
+        index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum)
+
+        for i in range(index.shape[0]):
+            self.optimizer.clear_grad()
+            model_pred, _, _ = self.model(**inputs, inference_only=True)
+            Etot_predict = model_pred["energy"]
+            natoms_sum = int(inputs["atype"].shape[-1])
+            force_predict = model_pred["force"]
+            error_tmp = Force_label[:, index[i]] - force_predict[:, index[i]]
+            error_tmp = update_prefactor * error_tmp
+            mask = error_tmp < 0
+            error_tmp = _mask_update(error_tmp, mask, -error_tmp[mask])
+            # error_tmp[mask] = -1 * error_tmp[mask]
+            error = error_tmp.mean() / natoms_sum
+
+            if self.is_distributed:
+                dist.all_reduce(error)
+                error /= dist.get_world_size()
+
+            tmp_force_predict = force_predict[:, index[i]] * update_prefactor
+            tmp_force_predict = _mask_update(
+                tmp_force_predict, mask, -1 * tmp_force_predict[mask]
+            )
+            # tmp_force_predict[mask] = -tmp_force_predict[mask]
+
+            (tmp_force_predict.sum() + Etot_predict.sum() * 0).backward()
+            error = error * math.sqrt(bs)
+            self.optimizer.step(error)
+        return Etot_predict, force_predict
+
+    def update_denoise_coord(
+        self,
+        inputs: dict,
+        clean_coord: paddle.Tensor,
+        update_prefactor: float = 1,
+        mask_loss_coord: bool = True,
+        coord_mask: paddle.Tensor = None,
+    ) -> None:
+        natoms_sum = int(inputs["atype"].shape[-1])
+        bs = clean_coord.shape[0]
+        self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3)
+
+        index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum)
+
+        for i in range(index.shape[0]):
+            self.optimizer.clear_grad()
+            model_pred, _, _ = self.model(**inputs, inference_only=True)
+            updated_coord = model_pred["updated_coord"]
+            natoms_sum = int(inputs["atype"].shape[-1])
+            error_tmp = clean_coord[:, index[i]] - updated_coord[:, index[i]]
+            error_tmp = update_prefactor * error_tmp
+            if mask_loss_coord:
+                error_tmp[~coord_mask[:, index[i]]] = 0
+            mask = error_tmp < 0
+            error_tmp[mask] = -1 * error_tmp[mask]
+            error = error_tmp.mean() / natoms_sum
+
+            if self.is_distributed:
+                dist.all_reduce(error)
+                error /= dist.get_world_size()
+
+            tmp_coord_predict = updated_coord[:, index[i]] * update_prefactor
+            tmp_coord_predict[mask] = -update_prefactor * tmp_coord_predict[mask]
+
+            (tmp_coord_predict.sum() + updated_coord.sum() * 0).backward()
+            error = error * math.sqrt(bs)
+            self.optimizer.step(error)
+        return model_pred
+
+    def __sample(
+        self, atoms_selected: int, atoms_per_group: int, natoms: int
+    ) -> np.ndarray:
+        if atoms_selected % atoms_per_group:
+            raise Exception("divider")
+        index = range(natoms)
+        rng = np.random.default_rng()
+        res = rng.choice(index, atoms_selected).reshape([-1, atoms_per_group])
+        return res
diff --git a/deepmd/pd/optimizer/LKF.py b/deepmd/pd/optimizer/LKF.py
new file mode 100644
index 0000000000..d77b4a9232
--- /dev/null
+++ b/deepmd/pd/optimizer/LKF.py
@@ -0,0 +1,330 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import math
+from collections import (
+    defaultdict,
+)
+
+import paddle
+import paddle.distributed as dist
+from paddle.optimizer import (
+    Optimizer,
+)
+
+
+def distribute_indices(total_length, num_workers):
+    indices_per_worker = total_length // num_workers
+    remainder = total_length % num_workers
+
+    indices = []
+    start = 0
+
+    for i in range(num_workers):
+        end = start + indices_per_worker + (1 if i < remainder else 0)
+        indices.append((start, end))
+        start = end
+
+    return indices, remainder
+
+
+class LKFOptimizer(Optimizer):
+    def __init__(
+        self,
+        params,
+        kalman_lambda=0.98,
+        kalman_nue=0.9987,
+        block_size=5120,
+    ):
+        defaults = {"lr": 0.1, "kalman_nue": kalman_nue, "block_size": block_size}
+
+        super().__init__(
+            defaults["lr"],
+            params,
+        )
+        self.state = defaultdict(dict)
+        self._params = self._param_groups[0]["params"]
+        for param_group in self._param_groups:
+            param_group.update(defaults)
+
+        if len(self._param_groups) != 1 or len(self._params) == 0:
+            raise ValueError(
+                "LKF doesn't support per-parameter options " "(parameter groups)"
+            )
+
+        # NOTE: LKF has only global state, but we register it as state for
+        # the first param, because this helps with casting in set_state_dict
+        self._state = self.state[self._params[0]]
+        self._state.setdefault("kalman_lambda", kalman_lambda)
+        self.dist_init = dist.is_available() and dist.is_initialized()
+        self.rank = dist.get_rank() if self.dist_init else 0
+        self.dindex = []
+        self.remainder = 0
+        self.__init_P()
+
+    def __init_P(self):
+        param_nums = []
+        param_sum = 0
+        block_size = self.__get_blocksize()
+        data_type = self._params[0].dtype
+        device = self._params[0].place
+
+        for param_group in self._param_groups:
+            params = param_group["params"]
+            for param in params:
+                param_num = param.data.numel().item()
+                if param_sum + param_num > block_size:
+                    if param_sum > 0:
+                        param_nums.append(param_sum)
+                    param_sum = param_num
+                else:
+                    param_sum += param_num
+
+        param_nums.append(param_sum)
+
+        P = []
+        params_packed_index = []
+        logging.info(f"LKF parameter nums: {param_nums}")
+        if self.dist_init:
+            block_num = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num += math.ceil(param_num / block_size)
+                else:
+                    block_num += 1
+            num_workers = dist.get_world_size()
+            self.dindex, self.remainder = distribute_indices(block_num, num_workers)
+            index = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        device_id = self.get_device_id(index)
+                        index += 1
+                        dist_device = "gpu:" + str(device_id)
+                        if i != block_num - 1:
+                            params_packed_index.append(block_size)
+                            if self.rank == device_id:
+                                P.append(
+                                    paddle.eye(
+                                        block_size,
+                                        dtype=data_type,
+                                    ).to(device=dist_device)
+                                )
+                            else:
+                                continue
+                        else:
+                            params_packed_index.append(param_num - block_size * i)
+                            if self.rank == device_id:
+                                P.append(
+                                    paddle.eye(
+                                        param_num - block_size * i,
+                                        dtype=data_type,
+                                    ).to(device=dist_device)
+                                )
+                            else:
+                                continue
+
+                else:
+                    device_id = self.get_device_id(index)
+                    index += 1
+                    params_packed_index.append(param_num)
+                    if self.rank == device_id:
+                        dist_device = "gpu:" + str(device_id)
+                        P.append(
+                            paddle.eye(param_num, dtype=data_type).to(
+                                device=dist_device
+                            )
+                        )
+        else:
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        if i != block_num - 1:
+                            P.append(
+                                paddle.eye(
+                                    block_size,
+                                    dtype=data_type,
+                                ).to(device=device)
+                            )
+                            params_packed_index.append(block_size)
+                        else:
+                            P.append(
+                                paddle.eye(
+                                    param_num - block_size * i,
+                                    dtype=data_type,
+                                ).to(device=device)
+                            )
+                            params_packed_index.append(param_num - block_size * i)
+                else:
+                    P.append(paddle.eye(param_num, dtype=data_type).to(device=device))
+                    params_packed_index.append(param_num)
+        self._state.setdefault("P", P)
+        self._state.setdefault("weights_num", len(P))
+        self._state.setdefault("params_packed_index", params_packed_index)
+
+    def __get_blocksize(self):
+        return self._param_groups[0]["block_size"]
+
+    def __get_nue(self):
+        return self._param_groups[0]["kalman_nue"]
+
+    def __split_weights(self, weight):
+        block_size = self.__get_blocksize()
+        param_num = weight.numel().item()
+        res = []
+        if param_num < block_size:
+            res.append(weight)
+        else:
+            block_num = math.ceil(param_num / block_size)
+            for i in range(block_num):
+                if i != block_num - 1:
+                    res.append(weight[i * block_size : (i + 1) * block_size])
+                else:
+                    res.append(weight[i * block_size :])
+        return res
+
+    def __update(self, H, error, weights):
+        P = self._state.get("P")
+        kalman_lambda = self._state.get("kalman_lambda")
+        weights_num = self._state.get("weights_num")
+        params_packed_index = self._state.get("params_packed_index")
+
+        block_size = self.__get_blocksize()
+        kalman_nue = self.__get_nue()
+
+        tmp = 0
+        for i in range(weights_num):
+            tmp = tmp + (
+                kalman_lambda + paddle.matmul(paddle.matmul(H[i].T, P[i]), H[i])
+            )
+        if self.dist_init:
+            dist.all_reduce(tmp, op=dist.ReduceOp.SUM)
+        A = 1 / tmp
+        for i in range(weights_num):
+            K = paddle.matmul(P[i], H[i])
+
+            weights[i] = weights[i] + A * error * K
+
+            P[i] = (1 / kalman_lambda) * (P[i] - A * paddle.matmul(K, K.T))
+        if self.dist_init:
+            device = "gpu:" + str(self.rank)
+            local_shape = [tensor.shape[0] for tensor in weights]
+            shape_list = [
+                paddle.zeros_like(paddle.empty(1), dtype=paddle.float64).to(
+                    device=device
+                )  # pylint: disable=no-explicit-dtype,no-explicit-device
+                for _ in range(dist.get_world_size())
+            ]
+            dist.all_gather_object(shape_list, local_shape)
+            weight_tensor = paddle.concat(weights)
+            world_shape = [sum(inner_list) for inner_list in shape_list]
+            weight_list = [None] * len(world_shape)
+            for i in range(len(world_shape)):
+                weight_list[i] = paddle.zeros(
+                    [world_shape[i]], dtype=paddle.float64
+                ).to(device=device)
+            dist.all_gather(weight_list, weight_tensor)
+            result = []
+            for i in range(dist.get_world_size()):
+                result = result + list(paddle.split(weight_list[i], shape_list[i]))
+            weights = result
+        kalman_lambda = kalman_nue * kalman_lambda + 1 - kalman_nue
+        self._state.update({"kalman_lambda": kalman_lambda})
+
+        i = 0
+        param_sum = 0
+        for param_group in self._param_groups:
+            params = param_group["params"]
+            for param in params:
+                param_num = param.numel().item()
+                weight_tmp = weights[i][param_sum : param_sum + param_num]
+                if param_num < block_size:
+                    if param.ndim > 1:
+                        param.data = weight_tmp.reshape(
+                            param.data.T.shape
+                        ).T.contiguous()
+                    else:
+                        param.data = weight_tmp.reshape(param.data.shape)
+
+                    param_sum += param_num
+
+                    if param_sum == params_packed_index[i]:
+                        i += 1
+                        param_sum = 0
+                else:
+                    block_num = math.ceil(param_num / block_size)
+                    for j in range(block_num):
+                        if j == 0:
+                            tmp_weight = weights[i]
+                        else:
+                            tmp_weight = paddle.concat([tmp_weight, weights[i]], axis=0)
+                        i += 1
+                    param.data = tmp_weight.reshape(param.data.T.shape).T.contiguous()
+
+    def set_grad_prefactor(self, grad_prefactor):
+        self.grad_prefactor = grad_prefactor
+
+    @paddle.no_grad()
+    def step(self, error):
+        params_packed_index = self._state.get("params_packed_index")
+
+        weights = []
+        H = []
+        param_index = 0
+        param_sum = 0
+
+        for param in self._params:
+            if param.ndim > 1:
+                tmp = param.data.T.contiguous().reshape([param.data.numel().item(), 1])
+                if param.grad is None:
+                    tmp_grad = paddle.zeros_like(tmp)
+                else:
+                    tmp_grad = (
+                        (param.grad / self.grad_prefactor)
+                        .T.contiguous()
+                        .reshape([param.grad.numel().item(), 1])
+                    )
+            else:
+                tmp = param.data.reshape([param.data.numel().item(), 1])
+                if param.grad is None:
+                    tmp_grad = paddle.zeros_like(tmp)
+                else:
+                    tmp_grad = (param.grad / self.grad_prefactor).reshape(
+                        [param.grad.numel().item(), 1]
+                    )
+
+            tmp = self.__split_weights(tmp)
+            tmp_grad = self.__split_weights(tmp_grad)
+
+            for split_grad, split_weight in zip(tmp_grad, tmp):
+                numel = split_grad.numel().item()
+
+                if param_sum == 0:
+                    res_grad = split_grad
+                    res = split_weight
+                else:
+                    res_grad = paddle.concat((res_grad, split_grad), axis=0)
+                    res = paddle.concat((res, split_weight), axis=0)
+
+                param_sum += numel
+
+                if param_sum == params_packed_index[param_index]:
+                    param_sum = 0
+                    if self.dist_init:
+                        device_id = self.get_device_id(param_index)
+                        if self.rank == device_id:
+                            weights.append(res)
+                            H.append(res_grad)
+                    else:
+                        weights.append(res)
+                        H.append(res_grad)
+                    param_index += 1
+
+        self.__update(H, error, weights)
+
+    def get_device_id(self, index):
+        for i, (start, end) in enumerate(self.dindex):
+            if start <= index < end:
+                return i
+        return None
diff --git a/deepmd/pd/optimizer/__init__.py b/deepmd/pd/optimizer/__init__.py
new file mode 100644
index 0000000000..db340b3bb9
--- /dev/null
+++ b/deepmd/pd/optimizer/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .KFWrapper import (
+    KFOptimizerWrapper,
+)
+from .LKF import (
+    LKFOptimizer,
+)
+
+__all__ = ["KFOptimizerWrapper", "LKFOptimizer"]
diff --git a/deepmd/pd/train/__init__.py b/deepmd/pd/train/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pd/train/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py
new file mode 100644
index 0000000000..f0d11a4a81
--- /dev/null
+++ b/deepmd/pd/train/training.py
@@ -0,0 +1,1315 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+import logging
+import time
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+)
+
+import numpy as np
+import paddle
+import paddle.distributed as dist
+from paddle.distributed import (
+    fleet,
+)
+from paddle.io import (
+    DataLoader,
+)
+
+from deepmd.common import (
+    symlink_prefix_files,
+)
+from deepmd.loggers.training import (
+    format_training_message,
+    format_training_message_per_task,
+)
+from deepmd.pd.loss import (
+    DenoiseLoss,
+    DOSLoss,
+    EnergySpinLoss,
+    EnergyStdLoss,
+    PropertyLoss,
+    TaskLoss,
+    TensorLoss,
+)
+from deepmd.pd.model.model import (
+    get_model,
+    get_zbl_model,
+)
+from deepmd.pd.optimizer import (
+    KFOptimizerWrapper,
+    LKFOptimizer,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils import (
+    dp_random,
+)
+from deepmd.pd.utils.dataloader import (
+    BufferedIterator,
+    get_weighted_sampler,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    JIT,
+    NUM_WORKERS,
+    SAMPLER_RECORD,
+    enable_prim,
+)
+from deepmd.pd.utils.learning_rate import (
+    LearningRateExp,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+from deepmd.utils.path import (
+    DPH5Path,
+)
+
+# if paddle.__version__.startswith("2"):
+#     import paddle._dynamo
+
+
+log = logging.getLogger(__name__)
+
+
+class Trainer:
+    def __init__(
+        self,
+        config: dict[str, Any],
+        training_data,
+        stat_file_path=None,
+        validation_data=None,
+        init_model=None,
+        restart_model=None,
+        finetune_model=None,
+        force_load=False,
+        shared_links=None,
+        finetune_links=None,
+        init_frz_model=None,
+    ):
+        """Construct a DeePMD trainer.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        enable_prim(True)
+        if init_model is not None:
+            resume_model = init_model
+        elif restart_model is not None:
+            resume_model = restart_model
+        elif finetune_model is not None:
+            resume_model = finetune_model
+        else:
+            resume_model = None
+        resuming = resume_model is not None
+        self.restart_training = restart_model is not None
+        model_params = config["model"]
+        training_params = config["training"]
+        self.multi_task = "model_dict" in model_params
+        self.finetune_links = finetune_links
+        self.finetune_update_stat = False
+        self.model_keys = (
+            list(model_params["model_dict"]) if self.multi_task else ["Default"]
+        )
+        self.rank = (
+            dist.get_rank() if dist.is_available() and dist.is_initialized() else 0
+        )
+        self.world_size = (
+            dist.get_world_size()
+            if dist.is_available() and dist.is_initialized()
+            else 1
+        )
+        self.num_model = len(self.model_keys)
+
+        # Iteration config
+        self.num_steps = training_params["numb_steps"]
+        self.disp_file = training_params.get("disp_file", "lcurve.out")
+        self.disp_freq = training_params.get("disp_freq", 1000)
+        self.save_ckpt = training_params.get("save_ckpt", "model.ckpt")
+        self.save_freq = training_params.get("save_freq", 1000)
+        self.max_ckpt_keep = training_params.get("max_ckpt_keep", 5)
+        self.display_in_training = training_params.get("disp_training", True)
+        self.timing_in_training = training_params.get("time_training", True)
+        self.change_bias_after_training = training_params.get(
+            "change_bias_after_training", False
+        )
+        self.lcurve_should_print_header = True
+
+        def get_opt_param(params):
+            opt_type = params.get("opt_type", "Adam")
+            opt_param = {
+                "kf_blocksize": params.get("kf_blocksize", 5120),
+                "kf_start_pref_e": params.get("kf_start_pref_e", 1),
+                "kf_limit_pref_e": params.get("kf_limit_pref_e", 1),
+                "kf_start_pref_f": params.get("kf_start_pref_f", 1),
+                "kf_limit_pref_f": params.get("kf_limit_pref_f", 1),
+            }
+            return opt_type, opt_param
+
+        def get_data_loader(_training_data, _validation_data, _training_params):
+            def get_dataloader_and_buffer(_data, _params):
+                if "auto_prob" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data, _params["training_data"]["auto_prob"]
+                    )
+                elif "sys_probs" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data,
+                        _params["training_data"]["sys_probs"],
+                        sys_prob=True,
+                    )
+                else:
+                    _sampler = get_weighted_sampler(_data, "prob_sys_size")
+
+                if _sampler is None:
+                    log.warning(
+                        "Sampler not specified!"
+                    )  # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration.
+                _dataloader = DataLoader(
+                    _data,
+                    batch_sampler=paddle.io.BatchSampler(
+                        sampler=_sampler,
+                        drop_last=False,
+                    ),
+                    num_workers=NUM_WORKERS
+                    if dist.is_available()
+                    else 0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+                    collate_fn=lambda batch: batch[0],  # prevent extra conversion
+                    # pin_memory=True,
+                )
+                _data_buffered = BufferedIterator(iter(_dataloader))
+                return _dataloader, _data_buffered
+
+            training_dataloader, training_data_buffered = get_dataloader_and_buffer(
+                _training_data, _training_params
+            )
+
+            if _validation_data is not None:
+                (
+                    validation_dataloader,
+                    validation_data_buffered,
+                ) = get_dataloader_and_buffer(_validation_data, _training_params)
+                valid_numb_batch = _training_params["validation_data"].get(
+                    "numb_btch", 1
+                )
+            else:
+                validation_dataloader = None
+                validation_data_buffered = None
+                valid_numb_batch = 1
+            return (
+                training_dataloader,
+                training_data_buffered,
+                validation_dataloader,
+                validation_data_buffered,
+                valid_numb_batch,
+            )
+
+        def single_model_stat(
+            _model,
+            _data_stat_nbatch,
+            _training_data,
+            _validation_data,
+            _stat_file_path,
+            _data_requirement,
+            finetune_has_new_type=False,
+        ):
+            _data_requirement += get_additional_data_requirement(_model)
+            _training_data.add_data_requirement(_data_requirement)
+            if _validation_data is not None:
+                _validation_data.add_data_requirement(_data_requirement)
+
+            @functools.lru_cache
+            def get_sample():
+                sampled = make_stat_input(
+                    _training_data.systems,
+                    _training_data.dataloaders,
+                    _data_stat_nbatch,
+                )
+                return sampled
+
+            if (not resuming or finetune_has_new_type) and self.rank == 0:
+                _model.compute_or_load_stat(
+                    sampled_func=get_sample,
+                    stat_file_path=_stat_file_path,
+                )
+                if isinstance(_stat_file_path, DPH5Path):
+                    _stat_file_path.root.close()
+            return get_sample
+
+        def get_lr(lr_params):
+            assert (
+                lr_params.get("type", "exp") == "exp"
+            ), "Only learning rate `exp` is supported!"
+            lr_params["stop_steps"] = self.num_steps - self.warmup_steps
+            lr_exp = LearningRateExp(**lr_params)
+            return lr_exp
+
+        # Optimizer
+        if self.multi_task and training_params.get("optim_dict", None) is not None:
+            self.optim_dict = training_params.get("optim_dict")
+            missing_keys = [
+                key for key in self.model_keys if key not in self.optim_dict
+            ]
+            assert (
+                not missing_keys
+            ), f"These keys are not in optim_dict: {missing_keys}!"
+            self.opt_type = {}
+            self.opt_param = {}
+            for model_key in self.model_keys:
+                self.opt_type[model_key], self.opt_param[model_key] = get_opt_param(
+                    self.optim_dict[model_key]
+                )
+        else:
+            self.opt_type, self.opt_param = get_opt_param(training_params)
+
+        # Model
+        self.model = get_model_for_wrapper(model_params)
+
+        # Loss
+        if not self.multi_task:
+            self.loss = get_loss(
+                config["loss"],
+                config["learning_rate"]["start_lr"],
+                len(model_params["type_map"]),
+                self.model,
+            )
+        else:
+            self.loss = {}
+            for model_key in self.model_keys:
+                loss_param = config["loss_dict"][model_key]
+                if config.get("learning_rate_dict", None) is not None:
+                    lr_param = config["learning_rate_dict"][model_key]["start_lr"]
+                else:
+                    lr_param = config["learning_rate"]["start_lr"]
+                ntypes = len(model_params["model_dict"][model_key]["type_map"])
+                self.loss[model_key] = get_loss(
+                    loss_param, lr_param, ntypes, self.model[model_key]
+                )
+
+        # Data
+        if not self.multi_task:
+            self.get_sample_func = single_model_stat(
+                self.model,
+                model_params.get("data_stat_nbatch", 10),
+                training_data,
+                validation_data,
+                stat_file_path,
+                self.loss.label_requirement,
+                finetune_has_new_type=self.finetune_links["Default"].get_has_new_type()
+                if self.finetune_links is not None
+                else False,
+            )
+            (
+                self.training_dataloader,
+                self.training_data,
+                self.validation_dataloader,
+                self.validation_data,
+                self.valid_numb_batch,
+            ) = get_data_loader(training_data, validation_data, training_params)
+            training_data.print_summary(
+                "training",
+                to_numpy_array(self.training_dataloader.batch_sampler.sampler.weights),
+            )
+            if validation_data is not None:
+                validation_data.print_summary(
+                    "validation",
+                    to_numpy_array(
+                        self.validation_dataloader.batch_sampler.sampler.weights
+                    ),
+                )
+        else:
+            (
+                self.training_dataloader,
+                self.training_data,
+                self.validation_dataloader,
+                self.validation_data,
+                self.valid_numb_batch,
+                self.get_sample_func,
+            ) = {}, {}, {}, {}, {}, {}
+            for model_key in self.model_keys:
+                self.get_sample_func[model_key] = single_model_stat(
+                    self.model[model_key],
+                    model_params["model_dict"][model_key].get("data_stat_nbatch", 10),
+                    training_data[model_key],
+                    validation_data[model_key],
+                    stat_file_path[model_key],
+                    self.loss[model_key].label_requirement,
+                    finetune_has_new_type=self.finetune_links[
+                        model_key
+                    ].get_has_new_type()
+                    if self.finetune_links is not None
+                    else False,
+                )
+                (
+                    self.training_dataloader[model_key],
+                    self.training_data[model_key],
+                    self.validation_dataloader[model_key],
+                    self.validation_data[model_key],
+                    self.valid_numb_batch[model_key],
+                ) = get_data_loader(
+                    training_data[model_key],
+                    validation_data[model_key],
+                    training_params["data_dict"][model_key],
+                )
+
+                training_data[model_key].print_summary(
+                    f"training in {model_key}",
+                    to_numpy_array(
+                        self.training_dataloader[
+                            model_key
+                        ].batch_sampler.sampler.weights
+                    ),
+                )
+                if (
+                    validation_data is not None
+                    and validation_data[model_key] is not None
+                ):
+                    validation_data[model_key].print_summary(
+                        f"validation in {model_key}",
+                        to_numpy_array(
+                            self.validation_dataloader[
+                                model_key
+                            ].batch_sampler.sampler.weights
+                        ),
+                    )
+
+        # Learning rate
+        self.warmup_steps = training_params.get("warmup_steps", 0)
+        self.gradient_max_norm = training_params.get("gradient_max_norm", 0.0)
+        assert (
+            self.num_steps - self.warmup_steps > 0 or self.warmup_steps == 0
+        ), "Warm up steps must be less than total training steps!"
+        if self.multi_task and config.get("learning_rate_dict", None) is not None:
+            self.lr_exp = {}
+            for model_key in self.model_keys:
+                self.lr_exp[model_key] = get_lr(config["learning_rate_dict"][model_key])
+        else:
+            self.lr_exp = get_lr(config["learning_rate"])
+
+        # JIT
+        if JIT:
+            raise NotImplementedError(
+                "JIT is not supported yet when training with Paddle"
+            )
+            self.model = paddle.jit.to_static(self.model)
+
+        # Model Wrapper
+        self.wrapper = ModelWrapper(self.model, self.loss, model_params=model_params)
+        self.start_step = 0
+
+        # resuming and finetune
+        optimizer_state_dict = None
+        if resuming:
+            log.info(f"Resuming from {resume_model}.")
+            state_dict = paddle.load(resume_model)
+            if "model" in state_dict:
+                optimizer_state_dict = (
+                    state_dict["optimizer"] if finetune_model is None else None
+                )
+                state_dict = state_dict["model"]
+            self.start_step = (
+                state_dict["_extra_state"]["train_infos"]["step"]
+                if self.restart_training
+                else 0
+            )
+            if self.rank == 0:
+                if force_load:
+                    input_keys = list(state_dict.keys())
+                    target_keys = list(self.wrapper.state_dict().keys())
+                    missing_keys = [
+                        item for item in target_keys if item not in input_keys
+                    ]
+                    if missing_keys:
+                        target_state_dict = self.wrapper.state_dict()
+                        slim_keys = []
+                        for item in missing_keys:
+                            state_dict[item] = target_state_dict[item].clone().detach()
+                            new_key = True
+                            for slim_key in slim_keys:
+                                if slim_key in item:
+                                    new_key = False
+                                    break
+                            if new_key:
+                                tmp_keys = ".".join(item.split(".")[:3])
+                                slim_keys.append(tmp_keys)
+                        slim_keys = [i + ".*" for i in slim_keys]
+                        log.warning(
+                            f"Force load mode allowed! These keys are not in ckpt and will re-init: {slim_keys}"
+                        )
+                # update model params in the pretrained model
+                if finetune_model is not None:
+                    new_state_dict = {}
+                    target_state_dict = self.wrapper.state_dict()
+                    # pretrained_model
+                    pretrained_model = get_model_for_wrapper(
+                        state_dict["_extra_state"]["model_params"]
+                    )
+                    pretrained_model_wrapper = ModelWrapper(pretrained_model)
+                    pretrained_model_wrapper.set_state_dict(state_dict)
+                    # update type related params
+                    for model_key in self.model_keys:
+                        finetune_rule_single = self.finetune_links[model_key]
+                        _model_key_from = finetune_rule_single.get_model_branch()
+                        # skip if updated
+                        if (
+                            finetune_rule_single.get_finetune_tmap()
+                            != pretrained_model_wrapper.model[
+                                _model_key_from
+                            ].get_type_map()
+                        ):
+                            model_with_new_type_stat = None
+                            if finetune_rule_single.get_has_new_type():
+                                self.finetune_update_stat = True
+                                model_with_new_type_stat = self.wrapper.model[model_key]
+                            pretrained_model_wrapper.model[
+                                _model_key_from
+                            ].change_type_map(
+                                finetune_rule_single.get_finetune_tmap(),
+                                model_with_new_type_stat=model_with_new_type_stat,
+                            )
+                    state_dict = pretrained_model_wrapper.state_dict()
+
+                    def collect_single_finetune_params(
+                        _model_key,
+                        _finetune_rule_single,
+                        _new_state_dict,
+                        _origin_state_dict,
+                        _random_state_dict,
+                    ):
+                        _new_fitting = _finetune_rule_single.get_random_fitting()
+                        _model_key_from = _finetune_rule_single.get_model_branch()
+                        target_keys = [
+                            i
+                            for i in _random_state_dict.keys()
+                            if i != "_extra_state" and f".{_model_key}." in i
+                        ]
+                        for item_key in target_keys:
+                            if _new_fitting and (".descriptor." not in item_key):
+                                # print(f'Keep {item_key} in old model!')
+                                _new_state_dict[item_key] = (
+                                    _random_state_dict[item_key].clone().detach()
+                                )
+                            else:
+                                new_key = item_key.replace(
+                                    f".{_model_key}.", f".{_model_key_from}."
+                                )
+                                # print(f'Replace {item_key} with {new_key} in pretrained_model!')
+                                _new_state_dict[item_key] = (
+                                    _origin_state_dict[new_key].clone().detach()
+                                )
+
+                    # collect model params from the pretrained model
+                    for model_key in self.model_keys:
+                        finetune_rule_single = self.finetune_links[model_key]
+                        collect_single_finetune_params(
+                            model_key,
+                            finetune_rule_single,
+                            new_state_dict,
+                            state_dict,
+                            target_state_dict,
+                        )
+                    state_dict = new_state_dict
+                    state_dict["_extra_state"] = self.wrapper.state_dict()[
+                        "_extra_state"
+                    ]
+
+                self.wrapper.set_state_dict(state_dict)
+
+                # change bias for fine-tuning
+                if finetune_model is not None:
+
+                    def single_model_finetune(
+                        _model,
+                        _finetune_rule_single,
+                        _sample_func,
+                    ):
+                        _model = model_change_out_bias(
+                            _model,
+                            _sample_func,
+                            _bias_adjust_mode="change-by-statistic"
+                            if not _finetune_rule_single.get_random_fitting()
+                            else "set-by-statistic",
+                        )
+                        return _model
+
+                    if not self.multi_task:
+                        finetune_rule_single = self.finetune_links["Default"]
+                        self.model = single_model_finetune(
+                            self.model, finetune_rule_single, self.get_sample_func
+                        )
+                    else:
+                        for model_key in self.model_keys:
+                            finetune_rule_single = self.finetune_links[model_key]
+                            if not finetune_rule_single.get_resuming():
+                                log.info(
+                                    f"Model branch {model_key} will be fine-tuned. This may take a long time..."
+                                )
+                                self.model[model_key] = single_model_finetune(
+                                    self.model[model_key],
+                                    finetune_rule_single,
+                                    self.get_sample_func[model_key],
+                                )
+                            else:
+                                log.info(
+                                    f"Model branch {model_key} will resume training."
+                                )
+
+        if init_frz_model is not None:
+            frz_model = paddle.jit.load(init_frz_model)
+            self.model.set_state_dict(frz_model.state_dict())
+
+        # Multi-task share params
+        if shared_links is not None:
+            self.wrapper.share_params(
+                shared_links,
+                resume=(resuming and not self.finetune_update_stat) or self.rank != 0,
+            )
+
+        # TODO add lr warmups for multitask
+        # author: iProzd
+        def warm_up_linear(step, warmup_steps):
+            if step < warmup_steps:
+                return step / warmup_steps
+            else:
+                return self.lr_exp.value(step - warmup_steps) / self.lr_exp.start_lr
+
+        # TODO add optimizers for multitask
+        # author: iProzd
+        if self.opt_type == "Adam":
+            self.scheduler = paddle.optimizer.lr.LambdaDecay(
+                learning_rate=self.lr_exp.start_lr,
+                lr_lambda=lambda step: warm_up_linear(
+                    step + self.start_step, self.warmup_steps
+                ),
+            )
+            self.optimizer = paddle.optimizer.Adam(
+                learning_rate=self.scheduler, parameters=self.wrapper.parameters()
+            )
+            if optimizer_state_dict is not None and self.restart_training:
+                self.optimizer.set_state_dict(optimizer_state_dict)
+        elif self.opt_type == "LKF":
+            self.optimizer = LKFOptimizer(
+                [{"params": self.wrapper.parameters()}],
+                0.98,
+                0.99870,
+                self.opt_param["kf_blocksize"],
+            )
+        else:
+            raise ValueError(f"Not supported optimizer type '{self.opt_type}'")
+
+        if dist.is_available() and dist.is_initialized():
+            # DDP will guarantee the model parameters are identical across all processes
+            self.wrapper = fleet.distributed_model(
+                self.wrapper,
+                # find_unused_parameters=True,
+            )
+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
+
+        # Get model prob for multi-task
+        if self.multi_task:
+            self.model_prob = np.array([0.0 for key in self.model_keys])
+            if training_params.get("model_prob", None) is not None:
+                model_prob = training_params["model_prob"]
+                for ii, model_key in enumerate(self.model_keys):
+                    if model_key in model_prob:
+                        self.model_prob[ii] += float(model_prob[model_key])
+            else:
+                for ii, model_key in enumerate(self.model_keys):
+                    self.model_prob[ii] += float(len(self.training_data[model_key]))
+            sum_prob = np.sum(self.model_prob)
+            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
+            self.model_prob = self.model_prob / sum_prob
+
+        # Tensorboard
+        self.enable_tensorboard = training_params.get("tensorboard", False)
+        self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log")
+        self.tensorboard_freq = training_params.get("tensorboard_freq", 1)
+        self.enable_profiler = training_params.get("enable_profiler", False)
+        self.profiling = training_params.get("profiling", False)
+        self.profiling_file = training_params.get("profiling_file", "timeline.json")
+
+    def run(self):
+        fout = (
+            open(
+                self.disp_file,
+                mode="w" if not self.restart_training else "a",
+                buffering=1,
+            )
+            if self.rank == 0
+            else None
+        )  # line buffered
+        if SAMPLER_RECORD:
+            record_file = f"Sample_rank_{self.rank}.txt"
+            fout1 = open(record_file, mode="w", buffering=1)
+        log.info("Start to train %d steps.", self.num_steps)
+        if dist.is_available() and dist.is_initialized():
+            log.info(f"Rank: {dist.get_rank()}/{dist.get_world_size()}")
+        if self.enable_tensorboard:
+            from tensorboardX import (
+                SummaryWriter,
+            )
+
+            writer = SummaryWriter(log_dir=self.tensorboard_log_dir)
+        if self.enable_profiler or self.profiling:
+            prof = paddle.profiler.profile(
+                schedule=paddle.profiler.schedule(wait=1, warmup=1, active=3, repeat=1),
+                on_trace_ready=paddle.profiler.tensorboard_trace_handler(
+                    self.tensorboard_log_dir
+                )
+                if self.enable_profiler
+                else None,
+                record_shapes=True,
+                with_stack=True,
+            )
+            prof.start()
+
+        def step(_step_id, task_key="Default"):
+            # Paddle Profiler
+            if self.enable_profiler or self.profiling:
+                prof.step()
+            self.wrapper.train()
+            if isinstance(self.lr_exp, dict):
+                _lr = self.lr_exp[task_key]
+            else:
+                _lr = self.lr_exp
+            cur_lr = _lr.value(_step_id)
+            pref_lr = cur_lr
+            self.optimizer.clear_grad(set_to_zero=False)
+            input_dict, label_dict, log_dict = self.get_data(
+                is_train=True, task_key=task_key
+            )
+            if SAMPLER_RECORD:
+                print_str = f"Step {_step_id}: sample system{log_dict['sid']}  frame{log_dict['fid']}\n"
+                fout1.write(print_str)
+                fout1.flush()
+            if self.opt_type == "Adam":
+                cur_lr = self.scheduler.get_lr()
+                if _step_id < self.warmup_steps:
+                    pref_lr = _lr.start_lr
+                else:
+                    pref_lr = cur_lr
+                model_pred, loss, more_loss = self.wrapper(
+                    **input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
+                )
+                loss.backward()
+                if self.gradient_max_norm > 0.0:
+                    grad_norm = paddle.nn.utils.clip_grad_norm_(
+                        self.wrapper.parameters(), self.gradient_max_norm
+                    )
+                    if not paddle.isfinite(grad_norm).all():
+                        # check local gradnorm single GPU case, trigger NanDetector
+                        raise FloatingPointError("gradients are Nan/Inf")
+                self.optimizer.step()
+                self.scheduler.step()
+            elif self.opt_type == "LKF":
+                if isinstance(self.loss, EnergyStdLoss):
+                    KFOptWrapper = KFOptimizerWrapper(
+                        self.wrapper,
+                        self.optimizer,
+                        24,
+                        6,
+                        dist.is_available() and dist.is_initialized(),
+                    )
+                    pref_e = self.opt_param["kf_start_pref_e"] * (
+                        self.opt_param["kf_limit_pref_e"]
+                        / self.opt_param["kf_start_pref_e"]
+                    ) ** (_step_id / self.num_steps)
+                    _ = KFOptWrapper.update_energy(
+                        input_dict, label_dict["energy"], pref_e
+                    )
+                    pref_f = self.opt_param["kf_start_pref_f"] * (
+                        self.opt_param["kf_limit_pref_f"]
+                        / self.opt_param["kf_start_pref_f"]
+                    ) ** (_step_id / self.num_steps)
+                    p_energy, p_force = KFOptWrapper.update_force(
+                        input_dict, label_dict["force"], pref_f
+                    )
+                    # [coord, atype, natoms, mapping, shift, nlist, box]
+                    model_pred = {"energy": p_energy, "force": p_force}
+                    module = (
+                        self.wrapper.module
+                        if dist.is_available() and dist.is_initialized()
+                        else self.wrapper
+                    )
+
+                    def fake_model():
+                        return model_pred
+
+                    _, loss, more_loss = module.loss[task_key](
+                        {},
+                        fake_model,
+                        label_dict,
+                        int(input_dict["atype"].shape[-1]),
+                        learning_rate=pref_lr,
+                    )
+                elif isinstance(self.loss, DenoiseLoss):
+                    KFOptWrapper = KFOptimizerWrapper(
+                        self.wrapper,
+                        self.optimizer,
+                        24,
+                        6,
+                        dist.is_available() and dist.is_initialized(),
+                    )
+                    module = (
+                        self.wrapper.module
+                        if dist.is_available() and dist.is_initialized()
+                        else self.wrapper
+                    )
+                    model_pred = KFOptWrapper.update_denoise_coord(
+                        input_dict,
+                        label_dict["clean_coord"],
+                        1,
+                        module.loss[task_key].mask_loss_coord,
+                        label_dict["coord_mask"],
+                    )
+                    loss, more_loss = module.loss[task_key](
+                        model_pred,
+                        label_dict,
+                        input_dict["natoms"],
+                        learning_rate=pref_lr,
+                    )
+            else:
+                raise ValueError(f"Not supported optimizer type '{self.opt_type}'")
+
+            # Log and persist
+            display_step_id = _step_id + 1
+            if self.display_in_training and (
+                display_step_id % self.disp_freq == 0 or display_step_id == 1
+            ):
+                self.wrapper.eval()
+
+                def log_loss_train(_loss, _more_loss, _task_key="Default"):
+                    results = {}
+                    rmse_val = {
+                        item: _more_loss[item]
+                        for item in _more_loss
+                        if "l2_" not in item
+                    }
+                    for item in sorted(rmse_val.keys()):
+                        results[item] = rmse_val[item]
+                    return results
+
+                def log_loss_valid(_task_key="Default"):
+                    single_results = {}
+                    sum_natoms = 0
+                    if not self.multi_task:
+                        valid_numb_batch = self.valid_numb_batch
+                    else:
+                        valid_numb_batch = self.valid_numb_batch[_task_key]
+                    for ii in range(valid_numb_batch):
+                        self.optimizer.clear_grad()
+                        input_dict, label_dict, _ = self.get_data(
+                            is_train=False, task_key=_task_key
+                        )
+                        if input_dict == {}:
+                            # no validation data
+                            return {}
+                        _, loss, more_loss = self.wrapper(
+                            **input_dict,
+                            cur_lr=pref_lr,
+                            label=label_dict,
+                            task_key=_task_key,
+                        )
+                        # more_loss.update({"rmse": math.sqrt(loss)})
+                        natoms = int(input_dict["atype"].shape[-1])
+                        sum_natoms += natoms
+                        for k, v in more_loss.items():
+                            if "l2_" not in k:
+                                single_results[k] = (
+                                    single_results.get(k, 0.0) + v * natoms
+                                )
+                    results = {k: v / sum_natoms for k, v in single_results.items()}
+                    return results
+
+                if not self.multi_task:
+                    train_results = log_loss_train(loss, more_loss)
+                    valid_results = log_loss_valid()
+                    if self.rank == 0:
+                        log.info(
+                            format_training_message_per_task(
+                                batch=display_step_id,
+                                task_name="trn",
+                                rmse=train_results,
+                                learning_rate=cur_lr,
+                            )
+                        )
+                        if valid_results:
+                            log.info(
+                                format_training_message_per_task(
+                                    batch=display_step_id,
+                                    task_name="val",
+                                    rmse=valid_results,
+                                    learning_rate=None,
+                                )
+                            )
+                else:
+                    train_results = {_key: {} for _key in self.model_keys}
+                    valid_results = {_key: {} for _key in self.model_keys}
+                    train_results[task_key] = log_loss_train(
+                        loss, more_loss, _task_key=task_key
+                    )
+                    for _key in self.model_keys:
+                        if _key != task_key:
+                            self.optimizer.clear_grad()
+                            input_dict, label_dict, _ = self.get_data(
+                                is_train=True, task_key=_key
+                            )
+                            _, loss, more_loss = self.wrapper(
+                                **input_dict,
+                                cur_lr=pref_lr,
+                                label=label_dict,
+                                task_key=_key,
+                            )
+                            train_results[_key] = log_loss_train(
+                                loss, more_loss, _task_key=_key
+                            )
+                        valid_results[_key] = log_loss_valid(_task_key=_key)
+                        if self.rank == 0:
+                            log.info(
+                                format_training_message_per_task(
+                                    batch=display_step_id,
+                                    task_name=_key + "_trn",
+                                    rmse=train_results[_key],
+                                    learning_rate=cur_lr,
+                                )
+                            )
+                            if valid_results[_key]:
+                                log.info(
+                                    format_training_message_per_task(
+                                        batch=display_step_id,
+                                        task_name=_key + "_val",
+                                        rmse=valid_results[_key],
+                                        learning_rate=None,
+                                    )
+                                )
+
+                current_time = time.time()
+                train_time = current_time - self.t0
+                self.t0 = current_time
+                if self.rank == 0 and self.timing_in_training:
+                    log.info(
+                        format_training_message(
+                            batch=display_step_id,
+                            wall_time=train_time,
+                        )
+                    )
+                # the first training time is not accurate
+                if (
+                    (_step_id + 1 - self.start_step) > self.disp_freq
+                    or self.num_steps - self.start_step < 2 * self.disp_freq
+                ):
+                    self.total_train_time += train_time
+
+                if fout:
+                    if self.lcurve_should_print_header:
+                        self.print_header(fout, train_results, valid_results)
+                        self.lcurve_should_print_header = False
+                    self.print_on_training(
+                        fout, display_step_id, cur_lr, train_results, valid_results
+                    )
+
+            if (
+                ((_step_id + 1) % self.save_freq == 0 and _step_id != self.start_step)
+                or (_step_id + 1) == self.num_steps
+            ) and (self.rank == 0 or dist.get_rank() == 0):
+                # Handle the case if rank 0 aborted and re-assigned
+                self.latest_model = Path(self.save_ckpt + f"-{_step_id + 1}.pd")
+
+                module = (
+                    self.wrapper.module
+                    if dist.is_available() and dist.is_initialized()
+                    else self.wrapper
+                )
+                self.save_model(self.latest_model, lr=cur_lr, step=_step_id)
+                log.info(f"Saved model to {self.latest_model}")
+                symlink_prefix_files(self.latest_model.stem, self.save_ckpt)
+                with open("checkpoint", "w") as f:
+                    f.write(str(self.latest_model))
+
+            # tensorboard
+            if self.enable_tensorboard and (
+                display_step_id % self.tensorboard_freq == 0 or display_step_id == 1
+            ):
+                writer.add_scalar(f"{task_key}/lr", cur_lr, display_step_id)
+                writer.add_scalar(f"{task_key}/loss", loss, display_step_id)
+                for item in more_loss:
+                    writer.add_scalar(
+                        f"{task_key}/{item}", more_loss[item].item(), _step_id
+                    )
+
+        self.t0 = time.time()
+        self.total_train_time = 0.0
+        for step_id in range(self.num_steps):
+            if step_id < self.start_step:
+                continue
+            if self.multi_task:
+                chosen_index_list = dp_random.choice(
+                    np.arange(
+                        self.num_model, dtype=np.int32
+                    ),  # int32 should be enough for # models...
+                    p=np.array(self.model_prob),
+                    size=self.world_size,
+                    replace=True,
+                )
+                assert chosen_index_list.size == self.world_size
+                model_index = chosen_index_list[self.rank]
+                model_key = self.model_keys[model_index]
+            else:
+                model_key = "Default"
+            step(step_id, model_key)
+            if JIT:
+                break
+
+        if self.change_bias_after_training and (self.rank == 0 or dist.get_rank() == 0):
+            if not self.multi_task:
+                self.model = model_change_out_bias(
+                    self.model,
+                    self.get_sample_func,
+                    _bias_adjust_mode="change-by-statistic",
+                )
+            else:
+                for model_key in self.model_keys:
+                    self.model[model_key] = model_change_out_bias(
+                        self.model[model_key],
+                        self.get_sample_func[model_key],
+                        _bias_adjust_mode="change-by-statistic",
+                    )
+            self.latest_model = Path(self.save_ckpt + f"-{self.num_steps}.pd")
+            cur_lr = self.lr_exp.value(self.num_steps - 1)
+            self.save_model(self.latest_model, lr=cur_lr, step=self.num_steps - 1)
+            log.info(f"Saved model to {self.latest_model}")
+            symlink_prefix_files(self.latest_model.stem, self.save_ckpt)
+            with open("checkpoint", "w") as f:
+                f.write(str(self.latest_model))
+
+        if (
+            self.rank == 0 or dist.get_rank() == 0
+        ):  # Handle the case if rank 0 aborted and re-assigned
+            if self.num_steps == 0:
+                # when num_steps is 0, the checkpoint is never not saved
+                self.latest_model = Path(self.save_ckpt + "-0.pd")
+                self.save_model(self.latest_model, lr=0, step=0)
+                log.info(f"Saved model to {self.latest_model}")
+                symlink_prefix_files(self.latest_model.stem, self.save_ckpt)
+                with open("checkpoint", "w") as f:
+                    f.write(str(self.latest_model))
+
+            elapsed_batch = self.num_steps - self.start_step
+            if self.timing_in_training and elapsed_batch // self.disp_freq > 0:
+                if self.start_step >= 2 * self.disp_freq:
+                    log.info(
+                        "average training time: %.4f s/batch (exclude first %d batches)",
+                        self.total_train_time
+                        / (
+                            elapsed_batch // self.disp_freq * self.disp_freq
+                            - self.disp_freq
+                        ),
+                        self.disp_freq,
+                    )
+                else:
+                    log.info(
+                        "average training time: %.4f s/batch",
+                        self.total_train_time
+                        / (elapsed_batch // self.disp_freq * self.disp_freq),
+                    )
+
+            if JIT:
+                raise NotImplementedError(
+                    "Paddle JIT saving during training is not supported yet."
+                )
+            log.info(f"Trained model has been saved to: {self.save_ckpt}")
+
+        if fout:
+            fout.close()
+        if SAMPLER_RECORD:
+            fout1.close()
+        if self.enable_tensorboard:
+            writer.close()
+        if self.enable_profiler or self.profiling:
+            prof.stop()
+            if self.profiling:
+                prof.export_chrome_trace(self.profiling_file)
+                log.info(
+                    f"The profiling trace have been saved to: {self.profiling_file}"
+                )
+
+    def save_model(self, save_path: Path, lr=0.0, step=0):
+        module = (
+            self.wrapper.module
+            if dist.is_available() and dist.is_initialized()
+            else self.wrapper
+        )
+        module.train_infos["lr"] = lr
+        module.train_infos["step"] = step
+        paddle.save(
+            {"model": module.state_dict(), "optimizer": self.optimizer.state_dict()},
+            str(save_path),
+        )
+        checkpoint_dir = save_path.parent
+        checkpoint_files = [
+            f
+            for f in checkpoint_dir.glob("*.pd")
+            if not f.is_symlink() and f.name.startswith(self.save_ckpt)
+        ]
+        if len(checkpoint_files) > self.max_ckpt_keep:
+            checkpoint_files.sort(key=lambda x: x.stat().st_mtime)
+            checkpoint_files[0].unlink()
+
+    def get_data(self, is_train=True, task_key="Default"):
+        if not self.multi_task:
+            if is_train:
+                try:
+                    batch_data = next(iter(self.training_data))
+                except StopIteration:
+                    # Refresh the status of the dataloader to start from a new epoch
+                    self.training_data = BufferedIterator(
+                        iter(self.training_dataloader)
+                    )
+                    batch_data = next(iter(self.training_data))
+            else:
+                if self.validation_data is None:
+                    return {}, {}, {}
+                try:
+                    batch_data = next(iter(self.validation_data))
+                except StopIteration:
+                    self.validation_data = BufferedIterator(
+                        iter(self.validation_dataloader)
+                    )
+                    batch_data = next(iter(self.validation_data))
+        else:
+            if is_train:
+                try:
+                    batch_data = next(iter(self.training_data[task_key]))
+                except StopIteration:
+                    # Refresh the status of the dataloader to start from a new epoch
+                    self.training_data[task_key] = BufferedIterator(
+                        iter(self.training_dataloader[task_key])
+                    )
+                    batch_data = next(iter(self.training_data[task_key]))
+            else:
+                if self.validation_data[task_key] is None:
+                    return {}, {}, {}
+                try:
+                    batch_data = next(iter(self.validation_data[task_key]))
+                except StopIteration:
+                    self.validation_data[task_key] = BufferedIterator(
+                        iter(self.validation_dataloader[task_key])
+                    )
+                    batch_data = next(iter(self.validation_data[task_key]))
+
+        for key in batch_data.keys():
+            if key == "sid" or key == "fid" or key == "box" or "find_" in key:
+                continue
+            elif not isinstance(batch_data[key], list):
+                if batch_data[key] is not None:
+                    batch_data[key] = batch_data[key].to(DEVICE)
+            else:
+                batch_data[key] = [item.to(DEVICE) for item in batch_data[key]]
+        # we may need a better way to classify which are inputs and which are labels
+        # now wrapper only supports the following inputs:
+        input_keys = [
+            "coord",
+            "atype",
+            "spin",
+            "box",
+            "fparam",
+            "aparam",
+        ]
+        input_dict = {item_key: None for item_key in input_keys}
+        label_dict = {}
+        for item_key in batch_data:
+            if item_key in input_keys:
+                input_dict[item_key] = batch_data[item_key]
+            else:
+                if item_key not in ["sid", "fid"]:
+                    label_dict[item_key] = batch_data[item_key]
+        log_dict = {}
+        if "fid" in batch_data:
+            log_dict["fid"] = batch_data["fid"]
+        log_dict["sid"] = batch_data["sid"]
+        return input_dict, label_dict, log_dict
+
+    def print_header(self, fout, train_results, valid_results):
+        train_keys = sorted(train_results.keys())
+        print_str = ""
+        print_str += "# %5s" % "step"
+        if not self.multi_task:
+            if valid_results:
+                prop_fmt = "   %11s %11s"
+                for k in train_keys:
+                    print_str += prop_fmt % (k + "_val", k + "_trn")
+            else:
+                prop_fmt = "   %11s"
+                for k in train_keys:
+                    print_str += prop_fmt % (k + "_trn")
+        else:
+            for model_key in self.model_keys:
+                if valid_results[model_key]:
+                    prop_fmt = "   %11s %11s"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (
+                            k + f"_val_{model_key}",
+                            k + f"_trn_{model_key}",
+                        )
+                else:
+                    prop_fmt = "   %11s"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (k + f"_trn_{model_key}")
+        print_str += "   %8s\n" % "lr"
+        print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n"
+        fout.write(print_str)
+        fout.flush()
+
+    def print_on_training(self, fout, step_id, cur_lr, train_results, valid_results):
+        train_keys = sorted(train_results.keys())
+        print_str = ""
+        print_str += "%7d" % step_id
+        if not self.multi_task:
+            if valid_results:
+                prop_fmt = "   %11.2e %11.2e"
+                for k in train_keys:
+                    print_str += prop_fmt % (valid_results[k], train_results[k])
+            else:
+                prop_fmt = "   %11.2e"
+                for k in train_keys:
+                    print_str += prop_fmt % (train_results[k])
+        else:
+            for model_key in self.model_keys:
+                if valid_results[model_key]:
+                    prop_fmt = "   %11.2e %11.2e"
+                    for k in sorted(valid_results[model_key].keys()):
+                        print_str += prop_fmt % (
+                            valid_results[model_key][k],
+                            train_results[model_key][k],
+                        )
+                else:
+                    prop_fmt = "   %11.2e"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (train_results[model_key][k])
+        print_str += f"   {cur_lr:8.1e}\n"
+        fout.write(print_str)
+        fout.flush()
+
+
+def get_additional_data_requirement(_model):
+    additional_data_requirement = []
+    if _model.get_dim_fparam() > 0:
+        fparam_requirement_items = [
+            DataRequirementItem(
+                "fparam", _model.get_dim_fparam(), atomic=False, must=True
+            )
+        ]
+        additional_data_requirement += fparam_requirement_items
+    if _model.get_dim_aparam() > 0:
+        aparam_requirement_items = [
+            DataRequirementItem(
+                "aparam", _model.get_dim_aparam(), atomic=True, must=True
+            )
+        ]
+        additional_data_requirement += aparam_requirement_items
+    has_spin = getattr(_model, "has_spin", False)
+    if callable(has_spin):
+        has_spin = has_spin()
+    if has_spin:
+        spin_requirement_items = [
+            DataRequirementItem("spin", ndof=3, atomic=True, must=True)
+        ]
+        additional_data_requirement += spin_requirement_items
+    return additional_data_requirement
+
+
+def get_loss(loss_params, start_lr, _ntypes, _model):
+    loss_type = loss_params.get("type", "ener")
+    if loss_type == "ener":
+        loss_params["starter_learning_rate"] = start_lr
+        return EnergyStdLoss(**loss_params)
+    elif loss_type == "dos":
+        loss_params["starter_learning_rate"] = start_lr
+        loss_params["numb_dos"] = _model.model_output_def()["dos"].output_size
+        return DOSLoss(**loss_params)
+    elif loss_type == "ener_spin":
+        loss_params["starter_learning_rate"] = start_lr
+        return EnergySpinLoss(**loss_params)
+    elif loss_type == "denoise":
+        loss_params["ntypes"] = _ntypes
+        return DenoiseLoss(**loss_params)
+    elif loss_type == "tensor":
+        model_output_type = _model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        tensor_name = model_output_type[0]
+        loss_params["tensor_name"] = tensor_name
+        loss_params["tensor_size"] = _model.model_output_def()[tensor_name].output_size
+        label_name = tensor_name
+        if label_name == "polarizability":
+            label_name = "polar"
+        loss_params["label_name"] = label_name
+        loss_params["tensor_name"] = label_name
+        return TensorLoss(**loss_params)
+    elif loss_type == "property":
+        task_dim = _model.get_task_dim()
+        loss_params["task_dim"] = task_dim
+        return PropertyLoss(**loss_params)
+    else:
+        loss_params["starter_learning_rate"] = start_lr
+        return TaskLoss.get_class_by_type(loss_type).get_loss(loss_params)
+
+
+def get_single_model(
+    _model_params,
+):
+    if "use_srtab" in _model_params:
+        model = get_zbl_model(deepcopy(_model_params)).to(DEVICE)
+    else:
+        model = get_model(deepcopy(_model_params)).to(DEVICE)
+    return model
+
+
+def get_model_for_wrapper(_model_params):
+    if "model_dict" not in _model_params:
+        _model = get_single_model(
+            _model_params,
+        )
+    else:
+        _model = {}
+        model_keys = list(_model_params["model_dict"])
+        for _model_key in model_keys:
+            _model[_model_key] = get_single_model(
+                _model_params["model_dict"][_model_key],
+            )
+    return _model
+
+
+def model_change_out_bias(
+    _model,
+    _sample_func,
+    _bias_adjust_mode="change-by-statistic",
+):
+    old_bias = deepcopy(_model.get_out_bias())
+    _model.change_out_bias(
+        _sample_func,
+        bias_adjust_mode=_bias_adjust_mode,
+    )
+    new_bias = deepcopy(_model.get_out_bias())
+
+    model_type_map = _model.get_type_map()
+    log.info(
+        f"Change output bias of {model_type_map!s} "
+        f"from {to_numpy_array(old_bias).reshape(-1)!s} "
+        f"to {to_numpy_array(new_bias).reshape(-1)!s}."
+    )
+    return _model
diff --git a/deepmd/pd/train/wrapper.py b/deepmd/pd/train/wrapper.py
new file mode 100644
index 0000000000..7c07cbf675
--- /dev/null
+++ b/deepmd/pd/train/wrapper.py
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from __future__ import (
+    annotations,
+)
+
+import logging
+from collections import (
+    OrderedDict,
+)
+from typing import (
+    Union,
+)
+
+import paddle
+
+_StateDict = Union[dict[str, paddle.Tensor], OrderedDict[str, paddle.Tensor]]
+
+# if paddle.__version__.startswith("2"):
+#     import paddle._dynamo
+
+
+log = logging.getLogger(__name__)
+
+
+class ModelWrapper(paddle.nn.Layer):
+    def __init__(
+        self,
+        model: paddle.nn.Layer | dict,
+        loss: paddle.nn.Layer | dict = None,
+        model_params=None,
+        shared_links=None,
+    ):
+        """Construct a DeePMD model wrapper.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        super().__init__()
+        self.model_params = model_params if model_params is not None else {}
+        self.train_infos = {
+            "lr": 0,
+            "step": 0,
+        }
+        self.multi_task = False
+        self.model = paddle.nn.LayerDict()
+        # Model
+        if isinstance(model, paddle.nn.Layer):
+            self.model["Default"] = model
+        elif isinstance(model, dict):
+            self.multi_task = True
+            for task_key in model:
+                assert isinstance(
+                    model[task_key], paddle.nn.Layer
+                ), f"{task_key} in model_dict is not a paddle.nn.Layer!"
+                self.model[task_key] = model[task_key]
+        # Loss
+        self.loss = None
+        if loss is not None:
+            self.loss = paddle.nn.LayerDict()
+            if isinstance(loss, paddle.nn.Layer):
+                self.loss["Default"] = loss
+            elif isinstance(loss, dict):
+                for task_key in loss:
+                    assert isinstance(
+                        loss[task_key], paddle.nn.Layer
+                    ), f"{task_key} in loss_dict is not a paddle.nn.Layer!"
+                    self.loss[task_key] = loss[task_key]
+        self.inference_only = self.loss is None
+
+    def share_params(self, shared_links, resume=False):
+        """
+        Share the parameters of classes following rules defined in shared_links during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        supported_types = ["descriptor", "fitting_net"]
+        for shared_item in shared_links:
+            class_name = shared_links[shared_item]["type"]
+            shared_base = shared_links[shared_item]["links"][0]
+            class_type_base = shared_base["shared_type"]
+            model_key_base = shared_base["model_key"]
+            shared_level_base = shared_base["shared_level"]
+            if "descriptor" in class_type_base:
+                if class_type_base == "descriptor":
+                    base_class = self.model[model_key_base].get_descriptor()
+                elif "hybrid" in class_type_base:
+                    hybrid_index = int(class_type_base.split("_")[-1])
+                    base_class = (
+                        self.model[model_key_base]
+                        .get_descriptor()
+                        .descriptor_list[hybrid_index]
+                    )
+                else:
+                    raise RuntimeError(f"Unknown class_type {class_type_base}!")
+                for link_item in shared_links[shared_item]["links"][1:]:
+                    class_type_link = link_item["shared_type"]
+                    model_key_link = link_item["model_key"]
+                    shared_level_link = int(link_item["shared_level"])
+                    assert (
+                        shared_level_link >= shared_level_base
+                    ), "The shared_links must be sorted by shared_level!"
+                    assert (
+                        "descriptor" in class_type_link
+                    ), f"Class type mismatched: {class_type_base} vs {class_type_link}!"
+                    if class_type_link == "descriptor":
+                        link_class = self.model[model_key_link].get_descriptor()
+                    elif "hybrid" in class_type_link:
+                        hybrid_index = int(class_type_link.split("_")[-1])
+                        link_class = (
+                            self.model[model_key_link]
+                            .get_descriptor()
+                            .descriptor_list[hybrid_index]
+                        )
+                    else:
+                        raise RuntimeError(f"Unknown class_type {class_type_link}!")
+                    link_class.share_params(
+                        base_class, shared_level_link, resume=resume
+                    )
+                    log.warning(
+                        f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
+                    )
+            else:
+                if hasattr(self.model[model_key_base], class_type_base):
+                    base_class = self.model[model_key_base].__getattr__(class_type_base)
+                    for link_item in shared_links[shared_item]["links"][1:]:
+                        class_type_link = link_item["shared_type"]
+                        model_key_link = link_item["model_key"]
+                        shared_level_link = int(link_item["shared_level"])
+                        assert (
+                            shared_level_link >= shared_level_base
+                        ), "The shared_links must be sorted by shared_level!"
+                        assert (
+                            class_type_base == class_type_link
+                        ), f"Class type mismatched: {class_type_base} vs {class_type_link}!"
+                        link_class = self.model[model_key_link].__getattr__(
+                            class_type_link
+                        )
+                        link_class.share_params(
+                            base_class, shared_level_link, resume=resume
+                        )
+                        log.warning(
+                            f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
+                        )
+
+    def forward(
+        self,
+        coord,
+        atype,
+        spin: paddle.Tensor | None = None,
+        box: paddle.Tensor | None = None,
+        cur_lr: paddle.Tensor | None = None,
+        label: paddle.Tensor | None = None,
+        task_key: paddle.Tensor | None = None,
+        inference_only=False,
+        do_atomic_virial=False,
+        fparam: paddle.Tensor | None = None,
+        aparam: paddle.Tensor | None = None,
+    ):
+        if not self.multi_task:
+            task_key = "Default"
+        else:
+            assert (
+                task_key is not None
+            ), f"Multitask model must specify the inference task! Supported tasks are {list(self.model.keys())}."
+        input_dict = {
+            "coord": coord,
+            "atype": atype,
+            "box": box,
+            "do_atomic_virial": do_atomic_virial,
+            "fparam": fparam,
+            "aparam": aparam,
+        }
+        has_spin = getattr(self.model[task_key], "has_spin", False)
+        if callable(has_spin):
+            has_spin = has_spin()
+        if has_spin:
+            input_dict["spin"] = spin
+
+        if self.inference_only or inference_only:
+            model_pred = self.model[task_key](**input_dict)
+            return model_pred, None, None
+        else:
+            natoms = atype.shape[-1]
+            model_pred, loss, more_loss = self.loss[task_key](
+                input_dict,
+                self.model[task_key],
+                label,
+                natoms=natoms,
+                learning_rate=cur_lr,
+            )
+            return model_pred, loss, more_loss
+
+    def load_state_dict(
+        self,
+        state_dict: _StateDict,
+    ) -> tuple[list[str], list[str]]:
+        self.set_extra_state(state_dict.pop("_extra_state"))
+        return super().set_state_dict(state_dict)
+
+    def set_state_dict(
+        self,
+        state_dict: _StateDict,
+    ) -> tuple[list[str], list[str]]:
+        return self.load_state_dict(state_dict)
+
+    def state_dict(self):
+        state_dict = super().state_dict()
+        extra_state = self.get_extra_state()
+        state_dict.update({"_extra_state": extra_state})
+        return state_dict
+
+    def set_extra_state(self, extra_state: dict):
+        self.model_params = extra_state["model_params"]
+        self.train_infos = extra_state["train_infos"]
+        return None
+
+    def get_extra_state(self) -> dict:
+        extra_state = {
+            "model_params": self.model_params,
+            "train_infos": self.train_infos,
+        }
+        return extra_state
diff --git a/deepmd/pd/utils/__init__.py b/deepmd/pd/utils/__init__.py
new file mode 100644
index 0000000000..7e1043eda4
--- /dev/null
+++ b/deepmd/pd/utils/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+from .exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+
+__all__ = [
+    "PairExcludeMask",
+    "AtomExcludeMask",
+]
diff --git a/deepmd/pd/utils/ase_calc.py b/deepmd/pd/utils/ase_calc.py
new file mode 100644
index 0000000000..6bcb9cdc5e
--- /dev/null
+++ b/deepmd/pd/utils/ase_calc.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.calculator import DP as DPCalculator
+
+__all__ = [
+    "DPCalculator",
+]
diff --git a/deepmd/pd/utils/auto_batch_size.py b/deepmd/pd/utils/auto_batch_size.py
new file mode 100644
index 0000000000..8cdb5ddea2
--- /dev/null
+++ b/deepmd/pd/utils/auto_batch_size.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
+
+
+class AutoBatchSize(AutoBatchSizeBase):
+    """Auto batch size.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
+        is not set
+    factor : float, default: 2.
+        increased factor
+
+    """
+
+    def __init__(
+        self,
+        initial_batch_size: int = 1024,
+        factor: float = 2.0,
+    ):
+        super().__init__(
+            initial_batch_size=initial_batch_size,
+            factor=factor,
+        )
+
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+        return paddle.device.cuda.device_count() > 0
+
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+        """
+        # several sources think CUSOLVER_STATUS_INTERNAL_ERROR is another out-of-memory error,
+        # such as https://github.com/JuliaGPU/CUDA.jl/issues/1924
+        # (the meaningless error message should be considered as a bug in cusolver)
+        if isinstance(e, RuntimeError) and (
+            "CUDA out of memory." in e.args[0]
+            or "CUDA driver error: out of memory" in e.args[0]
+            or "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR" in e.args[0]
+        ):
+            # Release all unoccupied cached memory
+            # paddle.device.cuda.empty_cache()
+            return True
+        return False
diff --git a/deepmd/pd/utils/cache.py b/deepmd/pd/utils/cache.py
new file mode 100644
index 0000000000..c40c4050b7
--- /dev/null
+++ b/deepmd/pd/utils/cache.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy as copy_lib
+import functools
+
+
+def lru_cache(maxsize=16, typed=False, copy=False, deepcopy=False):
+    if deepcopy:
+
+        def decorator(f):
+            cached_func = functools.lru_cache(maxsize, typed)(f)
+
+            @functools.wraps(f)
+            def wrapper(*args, **kwargs):
+                return copy_lib.deepcopy(cached_func(*args, **kwargs))
+
+            return wrapper
+
+    elif copy:
+
+        def decorator(f):
+            cached_func = functools.lru_cache(maxsize, typed)(f)
+
+            @functools.wraps(f)
+            def wrapper(*args, **kwargs):
+                return copy_lib.copy(cached_func(*args, **kwargs))
+
+            return wrapper
+
+    else:
+        decorator = functools.lru_cache(maxsize, typed)
+    return decorator
diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py
new file mode 100644
index 0000000000..7a2bf4fe9c
--- /dev/null
+++ b/deepmd/pd/utils/dataloader.py
@@ -0,0 +1,339 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+import queue
+import time
+from collections.abc import (
+    Iterator,
+)
+from multiprocessing.dummy import (
+    Pool,
+)
+from threading import (
+    Thread,
+)
+
+import h5py
+import numpy as np
+import paddle
+import paddle.distributed as dist
+
+# import paddle.multiprocessing
+from paddle.io import (
+    BatchSampler,
+    DataLoader,
+    Dataset,
+    DistributedBatchSampler,
+    WeightedRandomSampler,
+)
+from paddle.io.dataloader.collate import (
+    default_collate_fn,
+)
+
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+from deepmd.utils.data_system import (
+    print_summary,
+    prob_sys_size_ext,
+    process_sys_probs,
+)
+
+log = logging.getLogger(__name__)
+# paddle.multiprocessing.set_sharing_strategy("file_system")
+
+
+def setup_seed(seed):
+    paddle.seed(seed)
+    os.environ["FLAGS_cudnn_deterministic"] = "True"
+
+
+class DpLoaderSet(Dataset):
+    """A dataset for storing DataLoaders to multiple Systems.
+
+    Parameters
+    ----------
+    sys_path
+            Path to the data system
+    batch_size
+            Max frame count in a batch.
+    type_map
+            Gives the name of different atom types
+    seed
+            Random seed for dataloader
+    shuffle
+            If the data are shuffled (Only effective in serial mode. Always shuffle in distributed data parallelism)
+    """
+
+    def __init__(
+        self,
+        systems,
+        batch_size,
+        type_map,
+        seed=None,
+        shuffle=True,
+    ):
+        if seed is not None:
+            setup_seed(seed)
+        if isinstance(systems, str):
+            with h5py.File(systems) as file:
+                systems = [os.path.join(systems, item) for item in file.keys()]
+
+        self.systems: list[DeepmdDataSetForLoader] = []
+        if len(systems) >= 100:
+            log.info(f"Constructing DataLoaders from {len(systems)} systems")
+
+        def construct_dataset(system):
+            return DeepmdDataSetForLoader(
+                system=system,
+                type_map=type_map,
+            )
+
+        MAX_PROCESSES_NUM = 4
+        processes = min(
+            os.cpu_count()
+            // (
+                dist.get_world_size()
+                if dist.is_available() and dist.is_initialized()
+                else 1
+            ),
+            MAX_PROCESSES_NUM,
+        )
+        with Pool(processes) as pool:
+            self.systems = pool.map(construct_dataset, systems)
+
+        self.sampler_list: list[DistributedBatchSampler] = []
+        self.index = []
+        self.total_batch = 0
+
+        self.dataloaders = []
+        self.batch_sizes = []
+        if isinstance(batch_size, str):
+            if batch_size == "auto":
+                rule = 32
+            elif batch_size.startswith("auto:"):
+                rule = int(batch_size.split(":")[1])
+            else:
+                rule = None
+                log.error("Unsupported batch size type")
+            for ii in self.systems:
+                ni = ii._natoms
+                bsi = rule // ni
+                if bsi * ni < rule:
+                    bsi += 1
+                self.batch_sizes.append(bsi)
+        elif isinstance(batch_size, list):
+            self.batch_sizes = batch_size
+        else:
+            self.batch_sizes = batch_size * np.ones(len(systems), dtype=int)
+        assert len(self.systems) == len(self.batch_sizes)
+        for system, batch_size in zip(self.systems, self.batch_sizes):
+            if dist.is_available() and dist.is_initialized():
+                system_batch_sampler = DistributedBatchSampler(
+                    system,
+                    shuffle=(
+                        (not (dist.is_available() and dist.is_initialized()))
+                        and shuffle
+                    ),
+                    batch_size=int(batch_size),
+                )
+                self.sampler_list.append(system_batch_sampler)
+            else:
+                system_batch_sampler = BatchSampler(
+                    system,
+                    shuffle=(
+                        (not (dist.is_available() and dist.is_initialized()))
+                        and shuffle
+                    ),
+                    batch_size=int(batch_size),
+                )
+                self.sampler_list.append(system_batch_sampler)
+            system_dataloader = DataLoader(
+                dataset=system,
+                num_workers=0,  # Should be 0 to avoid too many threads forked
+                batch_sampler=system_batch_sampler,
+                collate_fn=collate_batch,
+                use_buffer_reader=False,
+                places=["cpu"],
+            )
+            self.dataloaders.append(system_dataloader)
+            self.index.append(len(system_dataloader))
+            self.total_batch += len(system_dataloader)
+
+        class LazyIter:
+            """Lazy iterator to prevent fetching data when iter(item)."""
+
+            def __init__(self, item):
+                self.item = item
+
+            def __iter__(self):
+                # directly return
+                return self
+
+            def __next__(self):
+                if not isinstance(self.item, Iterator):
+                    # make iterator here lazily
+                    self.item = iter(self.item)
+                return next(self.item)
+
+        self.iters = []
+        for item in self.dataloaders:
+            self.iters.append(LazyIter(item))
+
+    def set_noise(self, noise_settings):
+        # noise_settings['noise_type'] # "trunc_normal", "normal", "uniform"
+        # noise_settings['noise'] # float, default 1.0
+        # noise_settings['noise_mode'] # "prob", "fix_num"
+        # noise_settings['mask_num'] # if "fix_num", int
+        # noise_settings['mask_prob'] # if "prob", float
+        # noise_settings['same_mask'] # coord and type same mask?
+        for system in self.systems:
+            system.set_noise(noise_settings)
+
+    def __len__(self):
+        return len(self.dataloaders)
+
+    def __getitem__(self, idx):
+        # log.warning(str(paddle.distributed.get_rank())+" idx: "+str(idx)+" index: "+str(self.index[idx]))
+        try:
+            batch = next(self.iters[idx])
+        except StopIteration:
+            self.iters[idx] = iter(self.dataloaders[idx])
+            batch = next(self.iters[idx])
+        batch["sid"] = idx
+        return batch
+
+    def add_data_requirement(self, data_requirement: list[DataRequirementItem]):
+        """Add data requirement for each system in multiple systems."""
+        for system in self.systems:
+            system.add_data_requirement(data_requirement)
+
+    def print_summary(
+        self,
+        name: str,
+        prob: list[float],
+    ):
+        print_summary(
+            name,
+            len(self.systems),
+            [ss.system for ss in self.systems],
+            [ss._natoms for ss in self.systems],
+            self.batch_sizes,
+            [
+                ss._data_system.get_sys_numb_batch(self.batch_sizes[ii])
+                for ii, ss in enumerate(self.systems)
+            ],
+            prob,
+            [ss._data_system.pbc for ss in self.systems],
+        )
+
+
+_sentinel = object()
+QUEUESIZE = 32
+
+
+class BackgroundConsumer(Thread):
+    def __init__(self, queue, source, max_len):
+        Thread.__init__(self)
+        self._queue = queue
+        self._source = source  # Main DL iterator
+        self._max_len = max_len  #
+
+    def run(self):
+        for item in self._source:
+            self._queue.put(item)  # Blocking if the queue is full
+
+        # Signal the consumer we are done.
+        self._queue.put(_sentinel)
+
+
+class BufferedIterator:
+    def __init__(self, iterable):
+        self._queue = queue.Queue(QUEUESIZE)
+        self._iterable = iterable
+        self._consumer = None
+
+        self.start_time = time.time()
+        self.warning_time = None
+        self.total = len(iterable)
+
+    def _create_consumer(self):
+        self._consumer = BackgroundConsumer(self._queue, self._iterable, self.total)
+        self._consumer.daemon = True
+        self._consumer.start()
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return self.total
+
+    def __next__(self):
+        # Create consumer if not created yet
+        if self._consumer is None:
+            self._create_consumer()
+        # Notify the user if there is a data loading bottleneck
+        if self._queue.qsize() < min(2, max(1, self._queue.maxsize // 2)):
+            if time.time() - self.start_time > 5 * 60:
+                if (
+                    self.warning_time is None
+                    or time.time() - self.warning_time > 15 * 60
+                ):
+                    log.warning(
+                        "Data loading buffer is empty or nearly empty. This may "
+                        "indicate a data loading bottleneck, and increasing the "
+                        "number of workers (--num-workers) may help."
+                    )
+                    self.warning_time = time.time()
+
+        # Get next example
+        item = self._queue.get()
+        if isinstance(item, Exception):
+            raise item
+        if item is _sentinel:
+            raise StopIteration
+        return item
+
+
+def collate_batch(batch):
+    example = batch[0]
+    result = {}
+    for key in example.keys():
+        if "find_" in key:
+            result[key] = batch[0][key]
+        else:
+            if batch[0][key] is None:
+                result[key] = None
+            elif key == "fid":
+                result[key] = [d[key] for d in batch]
+            elif key == "type":
+                continue
+            else:
+                result[key] = default_collate_fn([d[key] for d in batch])
+    return result
+
+
+def get_weighted_sampler(training_data, prob_style, sys_prob=False):
+    if sys_prob is False:
+        if prob_style == "prob_uniform":
+            prob_v = 1.0 / float(training_data.__len__())
+            probs = [prob_v for ii in range(training_data.__len__())]
+        else:  # prob_sys_size;A:B:p1;C:D:p2 or prob_sys_size = prob_sys_size;0:nsys:1.0
+            if prob_style == "prob_sys_size":
+                style = f"prob_sys_size;0:{len(training_data)}:1.0"
+            else:
+                style = prob_style
+            probs = prob_sys_size_ext(style, len(training_data), training_data.index)
+    else:
+        probs = process_sys_probs(prob_style, training_data.index)
+    log.debug("Generated weighted sampler with prob array: " + str(probs))
+    # training_data.total_batch is the size of one epoch, you can increase it to avoid too many  rebuilding of iteraters
+    len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1)
+    sampler = WeightedRandomSampler(probs, len_sampler, replacement=True)
+    return sampler
diff --git a/deepmd/pd/utils/dataset.py b/deepmd/pd/utils/dataset.py
new file mode 100644
index 0000000000..1f0533d8fc
--- /dev/null
+++ b/deepmd/pd/utils/dataset.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+from typing import (
+    Optional,
+)
+
+from paddle.io import (
+    Dataset,
+)
+
+from deepmd.utils.data import (
+    DataRequirementItem,
+    DeepmdData,
+)
+
+
+class DeepmdDataSetForLoader(Dataset):
+    def __init__(self, system: str, type_map: Optional[list[str]] = None):
+        """Construct DeePMD-style dataset containing frames cross different systems.
+
+        Args:
+        - systems: Paths to systems.
+        - type_map: Atom types.
+        """
+        self.system = system
+        self._type_map = type_map
+        self._data_system = DeepmdData(sys_path=system, type_map=self._type_map)
+        self.mixed_type = self._data_system.mixed_type
+        self._ntypes = self._data_system.get_ntypes()
+        self._natoms = self._data_system.get_natoms()
+        self._natoms_vec = self._data_system.get_natoms_vec(self._ntypes)
+
+    def __len__(self):
+        return self._data_system.nframes
+
+    def __getitem__(self, index):
+        """Get a frame from the selected system."""
+        b_data = self._data_system.get_item_paddle(index)
+        b_data["natoms"] = self._natoms_vec
+        return b_data
+
+    def add_data_requirement(self, data_requirement: list[DataRequirementItem]):
+        """Add data requirement for this data system."""
+        for data_item in data_requirement:
+            self._data_system.add(
+                data_item["key"],
+                data_item["ndof"],
+                atomic=data_item["atomic"],
+                must=data_item["must"],
+                high_prec=data_item["high_prec"],
+                type_sel=data_item["type_sel"],
+                repeat=data_item["repeat"],
+                default=data_item["default"],
+                dtype=data_item["dtype"],
+                output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"],
+            )
diff --git a/deepmd/pd/utils/decomp.py b/deepmd/pd/utils/decomp.py
new file mode 100644
index 0000000000..25eac1b6d5
--- /dev/null
+++ b/deepmd/pd/utils/decomp.py
@@ -0,0 +1,247 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+# This file is used to implement some paddle functions with composite API,
+# so as to support high-order differentation when double-backward is needed.
+# For example: [norm] --decomposition--> [multiply, power, sum]
+# This file will be removed when implmented functions are decomposed into primitive
+# function in Paddle framework in the future.
+
+from __future__ import (
+    annotations,
+)
+
+import paddle
+
+__all__ = [
+    "softmax",
+    "norm",
+    "take_along_axis",
+    "scatter_reduce",
+    "sec",
+    "masked_add_",
+]
+
+
+# decomposition for forward function
+def softmax_decomp(x: paddle.Tensor, axis: int = -1) -> paddle.Tensor:
+    """Forward decompsition function of softmax.
+
+    Parameters
+    ----------
+    x : paddle.Tensor
+        Input.
+    axis : int, defaults: -1.
+        A dimension along which softmax will be computed.
+
+    Returns
+    -------
+    paddle.Tensor
+        Computed output.
+    """
+    x_max = paddle.max(x, axis=axis, keepdim=True)
+    x = x - x_max
+    return paddle.exp(x) / paddle.sum(paddle.exp(x), axis=axis, keepdim=True)
+
+
+def norm_decomp(
+    x: paddle.Tensor, p: float = 2, axis: bool = -1, keepdim: bool = False
+) -> paddle.Tensor:
+    """Forward decompsition function of norm.
+
+    Parameters
+    ----------
+    x : paddle.Tensor
+        Input
+    p : float, default: 2
+        Order of norm
+    axis : bool, default: -1
+        Dimensions over which to compute the vector or matrix norm
+    keepdim : bool, default: False
+        If set to True, the reduced dimensions are retained in the result as dimensions
+        with size one
+
+    Returns
+    -------
+    paddle.Tensor
+        A real-valued tensor, even when A is complex.
+    """
+    if p == 2 or p == 2.0:
+        # clip for negative indexing, or 1/(0^(k-1)) will cause inf in backward
+        return (x * x).sum(axis=axis, keepdim=keepdim).clip(1e-12) ** 0.5
+    return (x**p).sum(axis=axis, keepdim=keepdim) ** (1 / p)
+
+
+def take_along_axis_decomp(
+    x: paddle.Tensor, indices: paddle.Tensor, axis: int, broadcast: bool = True
+) -> paddle.Tensor:
+    """Forward decompsition function of take_along_axis.
+
+    Parameters
+    ----------
+    x : paddle.Tensor
+        The input tensor.
+    indices : paddle.Tensor
+        Indices to take along each 1d slice of array.
+    axis : int
+        The axis to take 1d slices along.
+    broadcast : bool, default: True
+        Whether the indices broadcast.
+
+    Returns
+    -------
+    paddle.Tensor
+        Computed output.
+    """
+    # manually contruct indices for gather_nd(ind_gather_nd.ndim == indices.ndim + 1,
+    # the lsat 1 represents the number of dimension(s) of indices)
+    ind_gather_nd = paddle.stack(
+        paddle.meshgrid(*[paddle.arange(v) for v in indices.shape], indexing="ij"),
+        axis=-1,
+    )
+    ind_gather_nd[..., axis] = indices
+    # compute output using constructed indices via gather_nd
+    out = paddle.gather_nd(x, ind_gather_nd)
+    return out
+
+
+def scatter_reduce_decomp(
+    input: paddle.Tensor,
+    axis: int,
+    index: paddle.Tensor,
+    src: paddle.Tensor,
+    reduce: str,
+) -> paddle.Tensor:
+    """Forward decompsition function of scatter_reduce.
+
+    Parameters
+    ----------
+    input : paddle.Tensor
+        Input tensor.
+    axis : int
+        The axis along which to index.
+    index : paddle.Tensor
+        The indices of elements to scatter and reduce.
+    src : paddle.Tensor
+        The source elements to scatter and reduce.
+    reduce : str
+        The reduction operation to apply for non-unique indices.
+        Supported modes: ("sum", "prod", "mean", "amax", "amin").
+
+    Returns
+    -------
+    paddle.Tensor
+        Computed output.
+    """
+    # reduce: "sum", "prod", "mean", "amax", "amin"
+    if reduce == "sum":
+        input.put_along_axis_(indices=index, values=src, axis=axis, reduce="add")
+    elif reduce == "mean":
+        input.put_along_axis_(indices=index, values=src, axis=axis, reduce="add")
+        dst_div = paddle.ones_like(input).put_along_axis(
+            indices=index,
+            values=paddle.to_tensor(1.0, dtype=input.dtype),
+            axis=axis,
+            reduce="add",
+        )
+        input = input / dst_div
+    elif reduce == "prod":
+        input = input.put_along_axis(indices=index, values=src, axis=axis, reduce="mul")
+    else:
+        raise NotImplementedError("only support mode in ['sum', 'prod', 'mean']!")
+    return input
+
+
+def sec(length: int, size: int) -> list[int]:
+    """Auxiliary function for decomposed functions.
+
+    If length is not divisible by size, the last chunk will be smaller.
+
+    Parameters
+    ----------
+    length : int
+        Length to be chunked.
+    size : int
+        Chunk size.
+
+    Returns
+    -------
+    list[int]
+        Chunked output list.
+    """
+    assert length > 0
+    assert size > 0
+    if length % size == 0:
+        return [size] * (length // size)
+    return [size] * (length // size) + [length % size]
+
+
+def masked_add__decomp(
+    x: paddle.Tensor, mask: paddle.Tensor, v: paddle.Tensor
+) -> paddle.Tensor:
+    """Forward decompsition function of masked_add_(inplace operator).
+
+    Parameters
+    ----------
+    x : paddle.Tensor
+        Input tensor.
+    mask : paddle.Tensor
+        Mask tensor.
+    v : paddle.Tensor
+        Value to add.
+
+    Returns
+    -------
+    paddle.Tensor
+        Computed output.
+    """
+    assert mask.dtype == paddle.bool, f"mask must be bool type, but got {mask.dtype}"
+    # indices is bool mask
+    mask_coord = paddle.concat(
+        paddle.nonzero(mask, as_tuple=True),
+        axis=1,
+    )  # [nz, dim]
+    if not paddle.is_tensor(v):
+        v = paddle.full([mask_coord.shape[0]], v, dtype=x.dtype)
+    t = paddle.scatter_nd_add(
+        x,
+        mask_coord,
+        v,
+    )
+    paddle.assign(t, x)  # inplace update
+    return x
+
+
+def normalize_decomp(
+    x: paddle.Tensor,
+    p: float = 2,
+    axis: int = 1,
+    epsilon: float = 1e-12,
+) -> paddle.Tensor:
+    """Forward decompsition function of normalize.
+
+    Parameters
+    ----------
+    x : paddle.Tensor
+        Input tensor.
+    p : float, optional
+        Order of the norm, default: 2
+    axis : int, optional
+        Axis on which to perform normalization, default: 1
+    epsilon : float, optional
+        Epislon value, default: 1e-12
+
+    Returns
+    -------
+    paddle.Tensor
+        Computed output.
+    """
+    return x / (norm(x, p=p, axis=axis, keepdim=True).clip(min=epsilon))
+
+
+# alias for decomposed functions for convinience
+normalize = normalize_decomp
+masked_add_ = masked_add__decomp
+scatter_reduce = scatter_reduce_decomp
+take_along_axis = take_along_axis_decomp
+norm = norm_decomp
+softmax = softmax_decomp
diff --git a/deepmd/pd/utils/dp_random.py b/deepmd/pd/utils/dp_random.py
new file mode 100644
index 0000000000..e81488c506
--- /dev/null
+++ b/deepmd/pd/utils/dp_random.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.utils.random import (
+    choice,
+    random,
+    seed,
+    shuffle,
+)
+
+__all__ = [
+    "choice",
+    "random",
+    "seed",
+    "shuffle",
+]
diff --git a/deepmd/pd/utils/env.py b/deepmd/pd/utils/env.py
new file mode 100644
index 0000000000..e9593d4c50
--- /dev/null
+++ b/deepmd/pd/utils/env.py
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+
+import numpy as np
+import paddle
+
+from deepmd.common import (
+    VALID_PRECISION,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    get_default_nthreads,
+    set_default_nthreads,
+)
+
+SAMPLER_RECORD = os.environ.get("SAMPLER_RECORD", False)
+try:
+    # only linux
+    ncpus = len(os.sched_getaffinity(0))
+except AttributeError:
+    ncpus = os.cpu_count()
+NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(0, ncpus)))
+# Make sure DDP uses correct device if applicable
+LOCAL_RANK = paddle.distributed.get_rank()
+
+if os.environ.get("DEVICE") == "cpu" or paddle.device.cuda.device_count() <= 0:
+    DEVICE = "cpu"
+else:
+    DEVICE = f"gpu:{LOCAL_RANK}"
+
+paddle.device.set_device(DEVICE)
+
+JIT = False
+CACHE_PER_SYS = 5  # keep at most so many sets per sys in memory
+ENERGY_BIAS_TRAINABLE = True
+
+PRECISION_DICT = {
+    "float16": paddle.float16,
+    "float32": paddle.float32,
+    "float64": paddle.float64,
+    "half": paddle.float16,
+    "single": paddle.float32,
+    "double": paddle.float64,
+    "int32": paddle.int32,
+    "int64": paddle.int64,
+    "bfloat16": paddle.bfloat16,
+    "bool": paddle.bool,
+}
+GLOBAL_PD_FLOAT_PRECISION = PRECISION_DICT[np.dtype(GLOBAL_NP_FLOAT_PRECISION).name]
+GLOBAL_PD_ENER_FLOAT_PRECISION = PRECISION_DICT[
+    np.dtype(GLOBAL_ENER_FLOAT_PRECISION).name
+]
+PRECISION_DICT["default"] = GLOBAL_PD_FLOAT_PRECISION
+assert VALID_PRECISION.issubset(PRECISION_DICT.keys())
+# cannot automatically generated
+RESERVED_PRECISON_DICT = {
+    paddle.float16: "float16",
+    paddle.float32: "float32",
+    paddle.float64: "float64",
+    paddle.int32: "int32",
+    paddle.int64: "int64",
+    paddle.bfloat16: "bfloat16",
+    paddle.bool: "bool",
+}
+assert set(PRECISION_DICT.values()) == set(RESERVED_PRECISON_DICT.keys())
+DEFAULT_PRECISION = "float64"
+
+# throw warnings if threads not set
+set_default_nthreads()
+inter_nthreads, intra_nthreads = get_default_nthreads()
+# if inter_nthreads > 0:  # the behavior of 0 is not documented
+#     paddle.set_num_interop_threads(inter_nthreads)
+# if intra_nthreads > 0:
+#     paddle.framework.core.set_num_threads(intra_nthreads)
+
+
+def enable_prim(enable: bool = True):
+    """Enable running program in primitive C++ API in eager/static mode."""
+    if enable:
+        from paddle.framework import (
+            core,
+        )
+
+        core.set_prim_eager_enabled(True)
+        core._set_prim_all_enabled(True)
+        log = logging.getLogger(__name__)
+        log.info("Enable prim in eager and static mode.")
+
+
+__all__ = [
+    "GLOBAL_ENER_FLOAT_PRECISION",
+    "GLOBAL_NP_FLOAT_PRECISION",
+    "GLOBAL_PD_FLOAT_PRECISION",
+    "GLOBAL_PD_ENER_FLOAT_PRECISION",
+    "DEFAULT_PRECISION",
+    "PRECISION_DICT",
+    "RESERVED_PRECISON_DICT",
+    "SAMPLER_RECORD",
+    "NUM_WORKERS",
+    "DEVICE",
+    "JIT",
+    "CACHE_PER_SYS",
+    "ENERGY_BIAS_TRAINABLE",
+    "LOCAL_RANK",
+]
diff --git a/deepmd/pd/utils/env_mat_stat.py b/deepmd/pd/utils/env_mat_stat.py
new file mode 100644
index 0000000000..1cc67ecfee
--- /dev/null
+++ b/deepmd/pd/utils/env_mat_stat.py
@@ -0,0 +1,235 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from collections.abc import (
+    Iterator,
+)
+from typing import (
+    TYPE_CHECKING,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.common import (
+    get_hash,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.utils.env_mat_stat import EnvMatStat as BaseEnvMatStat
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+
+if TYPE_CHECKING:
+    from deepmd.pd.model.descriptor import (
+        DescriptorBlock,
+    )
+
+
+class EnvMatStat(BaseEnvMatStat):
+    def compute_stat(self, env_mat: dict[str, paddle.Tensor]) -> dict[str, StatItem]:
+        """Compute the statistics of the environment matrix for a single system.
+
+        Parameters
+        ----------
+        env_mat : paddle.Tensor
+            The environment matrix.
+
+        Returns
+        -------
+        Dict[str, StatItem]
+            The statistics of the environment matrix.
+        """
+        stats = {}
+        for kk, vv in env_mat.items():
+            stats[kk] = StatItem(
+                number=vv.numel().item(),
+                sum=vv.sum().item() if vv.numel().item() != 0 else paddle.zeros([]),
+                squared_sum=paddle.square(vv).sum().item()
+                if vv.numel().item() != 0
+                else paddle.zeros([]),
+            )
+        return stats
+
+
+class EnvMatStatSe(EnvMatStat):
+    """Environmental matrix statistics for the se_a/se_r environemntal matrix.
+
+    Parameters
+    ----------
+    descriptor : DescriptorBlock
+        The descriptor of the model.
+    """
+
+    def __init__(self, descriptor: "DescriptorBlock"):
+        super().__init__()
+        self.descriptor = descriptor
+        self.last_dim = (
+            self.descriptor.ndescrpt // self.descriptor.nnei
+        )  # se_r=1, se_a=4
+
+    def iter(
+        self, data: list[dict[str, Union[paddle.Tensor, list[tuple[int, int]]]]]
+    ) -> Iterator[dict[str, StatItem]]:
+        """Get the iterator of the environment matrix.
+
+        Parameters
+        ----------
+        data : List[Dict[str, Union[paddle.Tensor, List[Tuple[int, int]]]]]
+            The data.
+
+        Yields
+        ------
+        Dict[str, StatItem]
+            The statistics of the environment matrix.
+        """
+        zero_mean = paddle.zeros(
+            [self.descriptor.get_ntypes(), self.descriptor.get_nsel(), self.last_dim],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        ).to(env.DEVICE)
+        one_stddev = paddle.ones(
+            [self.descriptor.get_ntypes(), self.descriptor.get_nsel(), self.last_dim],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        ).to(env.DEVICE)
+        if self.last_dim == 4:
+            radial_only = False
+        elif self.last_dim == 1:
+            radial_only = True
+        else:
+            raise ValueError(
+                "last_dim should be 1 for raial-only or 4 for full descriptor."
+            )
+        for system in data:
+            coord, atype, box, natoms = (
+                system["coord"],
+                system["atype"],
+                system["box"],
+                system["natoms"],
+            )
+            (
+                extended_coord,
+                extended_atype,
+                mapping,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                coord,
+                atype,
+                self.descriptor.get_rcut(),
+                self.descriptor.get_sel(),
+                mixed_types=self.descriptor.mixed_types(),
+                box=box,
+            )
+            env_mat, _, _ = prod_env_mat(
+                extended_coord,
+                nlist,
+                atype,
+                zero_mean,
+                one_stddev,
+                self.descriptor.get_rcut(),
+                self.descriptor.get_rcut_smth(),
+                radial_only,
+                protection=self.descriptor.get_env_protection(),
+            )
+            # apply excluded_types
+            exclude_mask = self.descriptor.emask(nlist, extended_atype)
+            env_mat *= exclude_mask.unsqueeze(-1).astype(env_mat.dtype)
+            # reshape to nframes * nloc at the atom level,
+            # so nframes/mixed_type do not matter
+            env_mat = env_mat.reshape(
+                [
+                    coord.shape[0] * coord.shape[1],
+                    self.descriptor.get_nsel(),
+                    self.last_dim,
+                ]
+            )
+            atype = atype.reshape([coord.shape[0] * coord.shape[1]])
+            # (1, nloc) eq (ntypes, 1), so broadcast is possible
+            # shape: (ntypes, nloc)
+            type_idx = paddle.equal(
+                atype.reshape([1, -1]),
+                paddle.arange(self.descriptor.get_ntypes(), dtype=atype.dtype)
+                .to(device=env.DEVICE)
+                .reshape([-1, 1]),
+            )
+            if "pair_exclude_types" in system:
+                # shape: (1, nloc, nnei)
+                exclude_mask = PairExcludeMask(
+                    self.descriptor.get_ntypes(), system["pair_exclude_types"]
+                )(nlist, extended_atype).reshape(
+                    [1, coord.shape[0] * coord.shape[1], -1]
+                )
+                # shape: (ntypes, nloc, nnei)
+                type_idx = paddle.logical_and(type_idx.unsqueeze(-1), exclude_mask)
+            for type_i in range(self.descriptor.get_ntypes()):
+                dd = env_mat[type_idx[type_i]]
+                dd = dd.reshape([-1, self.last_dim])  # typen_atoms * unmasked_nnei, 4
+                env_mats = {}
+                env_mats[f"r_{type_i}"] = dd[:, :1]
+                if self.last_dim == 4:
+                    env_mats[f"a_{type_i}"] = dd[:, 1:]
+                yield self.compute_stat(env_mats)
+
+    def get_hash(self) -> str:
+        """Get the hash of the environment matrix.
+
+        Returns
+        -------
+        str
+            The hash of the environment matrix.
+        """
+        dscpt_type = "se_a" if self.last_dim == 4 else "se_r"
+        return get_hash(
+            {
+                "type": dscpt_type,
+                "ntypes": self.descriptor.get_ntypes(),
+                "rcut": round(self.descriptor.get_rcut(), 2),
+                "rcut_smth": round(self.descriptor.rcut_smth, 2),
+                "nsel": self.descriptor.get_nsel(),
+                "sel": self.descriptor.get_sel(),
+                "mixed_types": self.descriptor.mixed_types(),
+            }
+        )
+
+    def __call__(self):
+        avgs = self.get_avg()
+        stds = self.get_std()
+
+        all_davg = []
+        all_dstd = []
+
+        for type_i in range(self.descriptor.get_ntypes()):
+            if self.last_dim == 4:
+                davgunit = [[avgs[f"r_{type_i}"], 0, 0, 0]]
+                dstdunit = [
+                    [
+                        stds[f"r_{type_i}"],
+                        stds[f"a_{type_i}"],
+                        stds[f"a_{type_i}"],
+                        stds[f"a_{type_i}"],
+                    ]
+                ]
+            elif self.last_dim == 1:
+                davgunit = [[avgs[f"r_{type_i}"]]]
+                dstdunit = [
+                    [
+                        stds[f"r_{type_i}"],
+                    ]
+                ]
+            davg = np.tile(davgunit, [self.descriptor.get_nsel(), 1])
+            dstd = np.tile(dstdunit, [self.descriptor.get_nsel(), 1])
+            all_davg.append(davg)
+            all_dstd.append(dstd)
+
+        mean = np.stack(all_davg)
+        stddev = np.stack(all_dstd)
+        return mean, stddev
diff --git a/deepmd/pd/utils/exclude_mask.py b/deepmd/pd/utils/exclude_mask.py
new file mode 100644
index 0000000000..088ac186a8
--- /dev/null
+++ b/deepmd/pd/utils/exclude_mask.py
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import numpy as np
+import paddle
+
+from deepmd.pd.utils import (
+    decomp,
+)
+from deepmd.pd.utils.utils import (
+    to_paddle_tensor,
+)
+
+
+class AtomExcludeMask(paddle.nn.Layer):
+    """Computes the type exclusion mask for atoms."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: list[int] = [],
+    ):
+        super().__init__()
+        self.reinit(ntypes, exclude_types)
+
+    def reinit(
+        self,
+        ntypes: int,
+        exclude_types: list[int] = [],
+    ):
+        self.ntypes = ntypes
+        self.exclude_types = exclude_types
+        self.type_mask = np.array(
+            [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)],
+            dtype=np.int32,
+        )
+        self.type_mask = to_paddle_tensor(self.type_mask).reshape([-1])
+
+    def get_exclude_types(self):
+        return self.exclude_types
+
+    def get_type_mask(self):
+        return self.type_mask
+
+    def forward(
+        self,
+        atype: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """Compute type exclusion mask for atoms.
+
+        Parameters
+        ----------
+        atype
+            The extended atom types. shape: nf x natom
+
+        Returns
+        -------
+        mask
+            The type exclusion mask for atoms. shape: nf x natom
+            Element [ff,ii] being 0 if type(ii) is excluded,
+            otherwise being 1.
+
+        """
+        nf, natom = atype.shape
+        return self.type_mask[atype].reshape([nf, natom]).to(atype.place)
+
+
+class PairExcludeMask(paddle.nn.Layer):
+    """Computes the type exclusion mask for atom pairs."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        super().__init__()
+        self.reinit(ntypes, exclude_types)
+
+    def reinit(
+        self,
+        ntypes: int,
+        exclude_types: list[tuple[int, int]] = [],
+    ):
+        self.ntypes = ntypes
+        self._exclude_types: set[tuple[int, int]] = set()
+        for tt in exclude_types:
+            assert len(tt) == 2
+            self._exclude_types.add((tt[0], tt[1]))
+            self._exclude_types.add((tt[1], tt[0]))
+        # ntypes + 1 for nlist masks
+        self.type_mask = np.array(
+            [
+                [
+                    1 if (tt_i, tt_j) not in self._exclude_types else 0
+                    for tt_i in range(ntypes + 1)
+                ]
+                for tt_j in range(ntypes + 1)
+            ],
+            dtype=np.int32,
+        )
+        # (ntypes+1 x ntypes+1)
+        self.type_mask = to_paddle_tensor(self.type_mask).reshape([-1])
+        self.no_exclusion = len(self._exclude_types) == 0
+
+    def get_exclude_types(self):
+        return self._exclude_types
+
+    # may have a better place for this method...
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        atype_ext: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """Compute type exclusion mask.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        atype_ext
+            The extended aotm types. shape: nf x nall
+
+        Returns
+        -------
+        mask
+            The type exclusion mask of shape: nf x nloc x nnei.
+            Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded,
+            otherwise being 1.
+
+        """
+        if self.no_exclusion:
+            # safely return 1 if nothing is excluded.
+            return paddle.ones_like(nlist, dtype=paddle.int32).to(device=nlist.place)
+        nf, nloc, nnei = nlist.shape
+        nall = atype_ext.shape[1]
+        # add virtual atom of type ntypes. nf x nall+1
+        ae = paddle.concat(
+            [
+                atype_ext,
+                self.ntypes
+                * paddle.ones([nf, 1], dtype=atype_ext.dtype).to(
+                    device=atype_ext.place
+                ),
+            ],
+            axis=-1,
+        )
+        type_i = atype_ext[:, :nloc].reshape([nf, nloc]) * (self.ntypes + 1)
+        # nf x nloc x nnei
+        index = paddle.where(nlist == -1, nall, nlist).reshape([nf, nloc * nnei])
+        # type_j = paddle.take_along_axis(ae, axis=1, indices=index).reshape(
+        #     [nf, nloc, nnei]
+        # )
+        type_j = decomp.take_along_axis(ae, axis=1, indices=index).reshape(
+            [nf, nloc, nnei]
+        )
+        type_ij = type_i[:, :, None] + type_j
+        # nf x (nloc x nnei)
+        type_ij = type_ij.reshape([nf, nloc * nnei])
+        mask = (
+            self.type_mask[type_ij]
+            .reshape([nf, nloc, nnei])
+            .to(atype_ext.place)
+            .astype("bool")
+        )
+        return mask
diff --git a/deepmd/pd/utils/finetune.py b/deepmd/pd/utils/finetune.py
new file mode 100644
index 0000000000..edac72d9c9
--- /dev/null
+++ b/deepmd/pd/utils/finetune.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from copy import (
+    deepcopy,
+)
+
+import paddle
+
+from deepmd.utils.finetune import (
+    FinetuneRuleItem,
+)
+
+log = logging.getLogger(__name__)
+
+
+def get_finetune_rule_single(
+    _single_param_target,
+    _model_param_pretrained,
+    from_multitask=False,
+    model_branch="Default",
+    model_branch_from="",
+    change_model_params=False,
+):
+    single_config = deepcopy(_single_param_target)
+    new_fitting = False
+    model_branch_chosen = "Default"
+
+    if not from_multitask:
+        single_config_chosen = deepcopy(_model_param_pretrained)
+        if model_branch_from == "RANDOM":
+            # not ["", "RANDOM"], because single-from-single finetune uses pretrained fitting in default
+            new_fitting = True
+    else:
+        model_dict_params = _model_param_pretrained["model_dict"]
+        if model_branch_from in ["", "RANDOM"]:
+            model_branch_chosen = next(iter(model_dict_params.keys()))
+            new_fitting = True
+            log.warning(
+                "The fitting net will be re-init instead of using that in the pretrained model! "
+                "The bias_adjust_mode will be set-by-statistic!"
+            )
+        else:
+            model_branch_chosen = model_branch_from
+        assert model_branch_chosen in model_dict_params, (
+            f"No model branch named '{model_branch_chosen}'! "
+            f"Available ones are {list(model_dict_params.keys())}."
+        )
+        single_config_chosen = deepcopy(model_dict_params[model_branch_chosen])
+    old_type_map, new_type_map = (
+        single_config_chosen["type_map"],
+        single_config["type_map"],
+    )
+    finetune_rule = FinetuneRuleItem(
+        p_type_map=old_type_map,
+        type_map=new_type_map,
+        model_branch=model_branch_chosen,
+        random_fitting=new_fitting,
+    )
+    if change_model_params:
+        trainable_param = {
+            "descriptor": single_config.get("descriptor", {}).get("trainable", True),
+            "fitting_net": single_config.get("fitting_net", {}).get("trainable", True),
+        }
+        single_config["descriptor"] = single_config_chosen["descriptor"]
+        if not new_fitting:
+            single_config["fitting_net"] = single_config_chosen["fitting_net"]
+        log.info(
+            f"Change the '{model_branch}' model configurations according to the model branch "
+            f"'{model_branch_chosen}' in the pretrained one..."
+        )
+        for net_type in trainable_param:
+            if net_type in single_config:
+                single_config[net_type]["trainable"] = trainable_param[net_type]
+            else:
+                single_config[net_type] = {"trainable": trainable_param[net_type]}
+    return single_config, finetune_rule
+
+
+def get_finetune_rules(
+    finetune_model, model_config, model_branch="", change_model_params=True
+):
+    """
+    Get fine-tuning rules and (optionally) change the model_params according to the pretrained one.
+
+    This function gets the fine-tuning rules and (optionally) changes input in different modes as follows:
+    1. Single-task fine-tuning from a single-task pretrained model:
+        - The model will be fine-tuned based on the pretrained model.
+        - (Optional) Updates the model parameters based on the pretrained model.
+    2. Single-task fine-tuning from a multi-task pretrained model:
+        - The model will be fine-tuned based on the selected branch in the pretrained model.
+          The chosen branch can be defined from the command-line or `finetune_head` input parameter.
+          If not defined, model parameters in the fitting network will be randomly initialized.
+        - (Optional) Updates the model parameters based on the selected branch in the pretrained model.
+    3. Multi-task fine-tuning from a single-task pretrained model:
+        - The model in each branch will be fine-tuned or resumed based on the single branch ('Default') in the pretrained model.
+          The chosen branches can be defined from the `finetune_head` input parameter of each branch.
+          - If `finetune_head` is defined as 'Default',
+            it will be fine-tuned based on the single branch ('Default') in the pretrained model.
+          - If `finetune_head` is not defined and the model_key is 'Default',
+            it will resume from the single branch ('Default') in the pretrained model without fine-tuning.
+          - If `finetune_head` is not defined and the model_key is not 'Default',
+            it will be fine-tuned based on the single branch ('Default') in the pretrained model,
+            while model parameters in the fitting network of the branch will be randomly initialized.
+        - (Optional) Updates model parameters in each branch based on the single branch ('Default') in the pretrained model.
+    4. Multi-task fine-tuning from a multi-task pretrained model:
+        - The model in each branch will be fine-tuned or resumed based on the chosen branches in the pretrained model.
+          The chosen branches can be defined from the `finetune_head` input parameter of each branch.
+            - If `finetune_head` is defined as one of the branches in the pretrained model,
+              it will be fine-tuned based on the chosen branch in the pretrained model.
+            - If `finetune_head` is not defined and the model_key is the same as one of those in the pretrained model,
+              it will resume from the model_key branch in the pretrained model without fine-tuning.
+            - If `finetune_head` is not defined and a new model_key is used,
+              it will be fine-tuned based on the chosen branch in the pretrained model,
+              while model parameters in the fitting network of the branch will be randomly initialized.
+        - (Optional) Updates model parameters in each branch based on the chosen branches in the pretrained model.
+
+    Parameters
+    ----------
+    finetune_model
+        The pretrained model.
+    model_config
+        The fine-tuning input parameters.
+    model_branch
+        The model branch chosen in command-line mode, only for single-task fine-tuning.
+    change_model_params
+        Whether to change the model parameters according to the pretrained one.
+
+    Returns
+    -------
+    model_config:
+        Updated model parameters.
+    finetune_links:
+        Fine-tuning rules in a dict format, with `model_branch`: FinetuneRuleItem pairs.
+    """
+    multi_task = "model_dict" in model_config
+    state_dict = paddle.load(finetune_model)
+    if "model" in state_dict:
+        state_dict = state_dict["model"]
+    last_model_params = state_dict["_extra_state"]["model_params"]
+    finetune_from_multi_task = "model_dict" in last_model_params
+    finetune_links = {}
+    if not multi_task:
+        # use command-line first
+        if model_branch == "" and "finetune_head" in model_config:
+            model_branch = model_config["finetune_head"]
+        model_config, finetune_rule = get_finetune_rule_single(
+            model_config,
+            last_model_params,
+            from_multitask=finetune_from_multi_task,
+            model_branch="Default",
+            model_branch_from=model_branch,
+            change_model_params=change_model_params,
+        )
+        finetune_links["Default"] = finetune_rule
+    else:
+        assert model_branch == "", (
+            "Multi-task fine-tuning does not support command-line branches chosen!"
+            "Please define the 'finetune_head' in each model params!"
+        )
+        target_keys = model_config["model_dict"].keys()
+        if not finetune_from_multi_task:
+            pretrained_keys = ["Default"]
+        else:
+            pretrained_keys = last_model_params["model_dict"].keys()
+        for model_key in target_keys:
+            resuming = False
+            if (
+                "finetune_head" in model_config["model_dict"][model_key]
+                and model_config["model_dict"][model_key]["finetune_head"] != "RANDOM"
+            ):
+                pretrained_key = model_config["model_dict"][model_key]["finetune_head"]
+                assert pretrained_key in pretrained_keys, (
+                    f"'{pretrained_key}' head chosen to finetune not exist in the pretrained model!"
+                    f"Available heads are: {list(pretrained_keys)}"
+                )
+                model_branch_from = pretrained_key
+            elif (
+                "finetune_head" not in model_config["model_dict"][model_key]
+                and model_key in pretrained_keys
+            ):
+                # not do anything if not defined "finetune_head" in heads that exist in the pretrained model
+                # this will just do resuming
+                model_branch_from = model_key
+                resuming = True
+            else:
+                # if not defined "finetune_head" in new heads or "finetune_head" is "RANDOM", the fitting net will bre randomly initialized
+                model_branch_from = "RANDOM"
+            model_config["model_dict"][model_key], finetune_rule = (
+                get_finetune_rule_single(
+                    model_config["model_dict"][model_key],
+                    last_model_params,
+                    from_multitask=finetune_from_multi_task,
+                    model_branch=model_key,
+                    model_branch_from=model_branch_from,
+                    change_model_params=change_model_params,
+                )
+            )
+            finetune_links[model_key] = finetune_rule
+            finetune_links[model_key].resuming = resuming
+    return model_config, finetune_links
diff --git a/deepmd/pd/utils/learning_rate.py b/deepmd/pd/utils/learning_rate.py
new file mode 100644
index 0000000000..94c657abd4
--- /dev/null
+++ b/deepmd/pd/utils/learning_rate.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+class LearningRateExp:
+    def __init__(
+        self,
+        start_lr,
+        stop_lr,
+        decay_steps,
+        stop_steps,
+        decay_rate=None,
+        **kwargs,
+    ):
+        """
+        Construct an exponential-decayed learning rate.
+
+        Parameters
+        ----------
+        start_lr
+            The learning rate at the start of the training.
+        stop_lr
+            The desired learning rate at the end of the training.
+            When decay_rate is explicitly set, this value will serve as
+            the minimum learning rate during training. In other words,
+            if the learning rate decays below stop_lr, stop_lr will be applied instead.
+        decay_steps
+            The learning rate is decaying every this number of training steps.
+        stop_steps
+            The total training steps for learning rate scheduler.
+        decay_rate
+            The decay rate for the learning rate.
+            If provided, the decay rate will be set instead of
+            calculating it through interpolation between start_lr and stop_lr.
+        """
+        self.start_lr = start_lr
+        default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
+        self.decay_steps = decay_steps
+        if self.decay_steps >= stop_steps:
+            self.decay_steps = default_ds
+        self.decay_rate = np.exp(
+            np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps)
+        )
+        if decay_rate is not None:
+            self.decay_rate = decay_rate
+        self.min_lr = stop_lr
+
+    def value(self, step):
+        """Get the learning rate at the given step."""
+        step_lr = self.start_lr * np.power(self.decay_rate, step // self.decay_steps)
+        if step_lr < self.min_lr:
+            step_lr = self.min_lr
+        return step_lr
diff --git a/deepmd/pd/utils/multi_task.py b/deepmd/pd/utils/multi_task.py
new file mode 100644
index 0000000000..680dc53c79
--- /dev/null
+++ b/deepmd/pd/utils/multi_task.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+
+from deepmd.pd.model.descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pd.model.task import (
+    BaseFitting,
+)
+
+
+def preprocess_shared_params(model_config):
+    """Preprocess the model params for multitask model, and generate the links dict for further sharing.
+
+    Args:
+        model_config: Model params of multitask model.
+
+    Returns
+    -------
+    model_config: Preprocessed model params of multitask model.
+        Those string names are replaced with real params in `shared_dict` of model params.
+    shared_links: Dict of link infos for further sharing.
+        Each item, whose key must be in `shared_dict`, is a dict with following keys:
+        - "type": The real class type of this item.
+        - "links": List of shared settings, each sub-item is a dict with following keys:
+            - "model_key": Model key in the `model_dict` to share this item.
+            - "shared_type": Type of this shard item.
+            - "shared_level": Shared level (int) of this item in this model.
+                Lower for more params to share, 0 means to share all params in this item.
+            This list are sorted by "shared_level".
+    For example, if one has `model_config` like this:
+    "model": {
+        "shared_dict": {
+            "my_type_map": ["foo", "bar"],
+            "my_des1": {
+                "type": "se_e2_a",
+                "neuron": [10, 20, 40]
+                },
+        },
+        "model_dict": {
+            "model_1": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            },
+            "model_2": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+            "model_3": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1:1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+        }
+    }
+    The above config will init three model branches named `model_1` and `model_2` and `model_3`,
+    in which:
+        - `model_2` and `model_3` will have the same `type_map` as that in `model_1`.
+        - `model_2` will share all the parameters of `descriptor` with `model_1`,
+        while `model_3` will share part of parameters of `descriptor` with `model_1`
+        on human-defined share-level `1` (default is `0`, meaning share all the parameters).
+        - `model_1`, `model_2` and `model_3` have three different `fitting_net`s.
+    The returned `model_config` will automatically fulfill the input `model_config` as if there's no sharing,
+    and the `shared_links` will keep all the sharing information with looking:
+    {
+    'my_des1': {
+        'type': 'DescrptSeA',
+        'links': [
+            {'model_key': 'model_1',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_2',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_3',
+            'shared_type': 'descriptor',
+            'shared_level': 1}
+            ]
+        }
+    }
+
+    """
+    assert "model_dict" in model_config, "only multi-task model can use this method!"
+    supported_types = ["type_map", "descriptor", "fitting_net"]
+    shared_dict = model_config.get("shared_dict", {})
+    shared_links = {}
+    type_map_keys = []
+
+    def replace_one_item(params_dict, key_type, key_in_dict, suffix="", index=None):
+        shared_type = key_type
+        shared_key = key_in_dict
+        shared_level = 0
+        if ":" in key_in_dict:
+            shared_key = key_in_dict.split(":")[0]
+            shared_level = int(key_in_dict.split(":")[1])
+        assert (
+            shared_key in shared_dict
+        ), f"Appointed {shared_type} {shared_key} are not in the shared_dict! Please check the input params."
+        if index is None:
+            params_dict[shared_type] = deepcopy(shared_dict[shared_key])
+        else:
+            params_dict[index] = deepcopy(shared_dict[shared_key])
+        if shared_type == "type_map":
+            if key_in_dict not in type_map_keys:
+                type_map_keys.append(key_in_dict)
+        else:
+            if shared_key not in shared_links:
+                class_name = get_class_name(shared_type, shared_dict[shared_key])
+                shared_links[shared_key] = {"type": class_name, "links": []}
+            link_item = {
+                "model_key": model_key,
+                "shared_type": shared_type + suffix,
+                "shared_level": shared_level,
+            }
+            shared_links[shared_key]["links"].append(link_item)
+
+    for model_key in model_config["model_dict"]:
+        model_params_item = model_config["model_dict"][model_key]
+        for item_key in model_params_item:
+            if item_key in supported_types:
+                item_params = model_params_item[item_key]
+                if isinstance(item_params, str):
+                    replace_one_item(model_params_item, item_key, item_params)
+                elif item_params.get("type", "") == "hybrid":
+                    for ii, hybrid_item in enumerate(item_params["list"]):
+                        if isinstance(hybrid_item, str):
+                            replace_one_item(
+                                model_params_item[item_key]["list"],
+                                item_key,
+                                hybrid_item,
+                                suffix=f"_hybrid_{ii}",
+                                index=ii,
+                            )
+    for shared_key in shared_links:
+        shared_links[shared_key]["links"] = sorted(
+            shared_links[shared_key]["links"],
+            key=lambda x: x["shared_level"]
+            - ("spin" in model_config["model_dict"][x["model_key"]]) * 100,
+        )
+        # little trick to make spin models in the front to be the base models,
+        # because its type embeddings are more general.
+    assert len(type_map_keys) == 1, "Multitask model must have only one type_map!"
+    return model_config, shared_links
+
+
+def get_class_name(item_key, item_params):
+    if item_key == "descriptor":
+        return BaseDescriptor.get_class_by_type(item_params.get("type", "se_e2_a"))
+    elif item_key == "fitting_net":
+        return BaseFitting.get_class_by_type(item_params.get("type", "ener"))
+    else:
+        raise RuntimeError(f"Unknown class_name type {item_key}")
diff --git a/deepmd/pd/utils/neighbor_stat.py b/deepmd/pd/utils/neighbor_stat.py
new file mode 100644
index 0000000000..a1e60459ca
--- /dev/null
+++ b/deepmd/pd/utils/neighbor_stat.py
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from collections.abc import (
+    Iterator,
+)
+from typing import (
+    Optional,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+)
+from deepmd.pd.utils.nlist import (
+    extend_coord_with_ghosts,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat
+
+
+class NeighborStatOP(paddle.nn.Layer):
+    """Class for getting neighbor statics data information.
+
+    Parameters
+    ----------
+    ntypes
+        The num of atom types
+    rcut
+        The cut-off radius
+    mixed_types : bool, optional
+        If True, treat neighbors of all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_types: bool,
+    ) -> None:
+        super().__init__()
+        self.rcut = rcut
+        self.ntypes = ntypes
+        self.mixed_types = mixed_types
+
+    def forward(
+        self,
+        coord: paddle.Tensor,
+        atype: paddle.Tensor,
+        cell: Optional[paddle.Tensor],
+    ) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Calculate the neareest neighbor distance between atoms, maximum nbor size of
+        atoms and the output data range of the environment matrix.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+
+        Returns
+        -------
+        paddle.Tensor
+            The minimal squared distance between two atoms, in the shape of (nframes,)
+        paddle.Tensor
+            The maximal number of neighbors
+        """
+        nframes = coord.shape[0]
+        coord = coord.reshape([nframes, -1, 3])
+        nloc = coord.shape[1]
+        coord = coord.reshape([nframes, nloc * 3])
+        extend_coord, extend_atype, _ = extend_coord_with_ghosts(
+            coord, atype, cell, self.rcut
+        )
+
+        coord1 = extend_coord.reshape([nframes, -1])
+        nall = coord1.shape[1] // 3
+        coord0 = coord1[:, : nloc * 3]
+        diff: paddle.Tensor = coord1.reshape([nframes, -1, 3]).unsqueeze(
+            1
+        ) - coord0.reshape([nframes, -1, 3]).unsqueeze(2)
+        assert list(diff.shape) == [nframes, nloc, nall, 3]
+        # remove the diagonal elements
+        mask = paddle.eye(nloc, nall).to(dtype=paddle.bool, device=diff.place)
+        # diff[:, mask] = float("inf")
+        # diff.masked_fill_(
+        #     paddle.broadcast_to(mask.unsqueeze([0, -1]), diff.shape),
+        #     paddle.to_tensor(float("inf")),
+        # )
+        diff[paddle.broadcast_to(mask.unsqueeze([0, -1]), diff.shape)] = float("inf")
+        rr2 = paddle.sum(paddle.square(diff), axis=-1)
+        min_rr2 = paddle.min(rr2, axis=-1)
+        # count the number of neighbors
+        if not self.mixed_types:
+            mask = rr2 < self.rcut**2
+            nnei = paddle.zeros((nframes, nloc, self.ntypes), dtype=paddle.int64)
+            for ii in range(self.ntypes):
+                nnei[:, :, ii] = paddle.sum(
+                    mask & ((extend_atype == ii)[:, None, :]), axis=-1
+                )
+        else:
+            mask = rr2 < self.rcut**2
+            # virtual types (<0) are not counted
+            nnei = paddle.sum(
+                mask & ((extend_atype >= 0).unsqueeze(1)), axis=-1
+            ).reshape([nframes, nloc, 1])
+        max_nnei = paddle.max(nnei, axis=1)
+        return min_rr2, max_nnei
+
+
+class NeighborStat(BaseNeighborStat):
+    """Neighbor statistics using pure NumPy.
+
+    Parameters
+    ----------
+    ntypes : int
+        The num of atom types
+    rcut : float
+        The cut-off radius
+    mixed_type : bool, optional, default=False
+        Treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_type: bool = False,
+    ) -> None:
+        super().__init__(ntypes, rcut, mixed_type)
+        op = NeighborStatOP(ntypes, rcut, mixed_type)
+        # self.op = paddle.jit.to_static(op)
+        self.op = op
+        self.auto_batch_size = AutoBatchSize()
+
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[tuple[np.ndarray, float, str]]:
+        """Abstract method for producing data.
+
+        Yields
+        ------
+        np.ndarray
+            The maximal number of neighbors
+        float
+            The squared minimal distance between two atoms
+        str
+            The directory of the data system
+        """
+        for ii in range(len(data.system_dirs)):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]
+                data_set_data = data_set._load_set(jj)
+                minrr2, max_nnei = self.auto_batch_size.execute_all(
+                    self._execute,
+                    data_set_data["coord"].shape[0],
+                    data_set.get_natoms(),
+                    data_set_data["coord"],
+                    data_set_data["type"],
+                    data_set_data["box"] if data_set.pbc else None,
+                )
+                yield np.max(max_nnei, axis=0), np.min(minrr2), jj
+
+    def _execute(
+        self,
+        coord: np.ndarray,
+        atype: np.ndarray,
+        cell: Optional[np.ndarray],
+    ):
+        """Execute the operation.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+        """
+        with paddle.no_grad():
+            minrr2, max_nnei = self.op(
+                paddle.to_tensor(coord, place=DEVICE),
+                paddle.to_tensor(atype, place=DEVICE),
+                paddle.to_tensor(cell, place=DEVICE) if cell is not None else None,
+            )
+        minrr2 = minrr2.numpy()
+        max_nnei = max_nnei.numpy()
+        return minrr2, max_nnei
diff --git a/deepmd/pd/utils/nlist.py b/deepmd/pd/utils/nlist.py
new file mode 100644
index 0000000000..851ff5293d
--- /dev/null
+++ b/deepmd/pd/utils/nlist.py
@@ -0,0 +1,534 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.region import (
+    normalize_coord,
+    to_face_distance,
+)
+
+
+def extend_input_and_build_neighbor_list(
+    coord,
+    atype,
+    rcut: float,
+    sel: list[int],
+    mixed_types: bool = False,
+    box: Optional[paddle.Tensor] = None,
+):
+    nframes, nloc = atype.shape[:2]
+    if box is not None:
+        box_gpu = box.to(coord.place)
+        coord_normalized = normalize_coord(
+            coord.reshape([nframes, nloc, 3]),
+            box_gpu.reshape([nframes, 3, 3]),
+        )
+    else:
+        box_gpu = None
+        coord_normalized = coord.clone()
+    extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+        coord_normalized, atype, box_gpu, rcut, box
+    )
+    nlist = build_neighbor_list(
+        extended_coord,
+        extended_atype,
+        nloc,
+        rcut,
+        sel,
+        distinguish_types=(not mixed_types),
+    )
+    extended_coord = extended_coord.reshape([nframes, -1, 3])
+    return extended_coord, extended_atype, mapping, nlist
+
+
+def build_neighbor_list(
+    coord: paddle.Tensor,
+    atype: paddle.Tensor,
+    nloc: int,
+    rcut: float,
+    sel: Union[int, list[int]],
+    distinguish_types: bool = True,
+) -> paddle.Tensor:
+    """Build neightbor list for a single frame. keeps nsel neighbors.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        exptended coordinates of shape [batch_size, nall x 3]
+    atype : paddle.Tensor
+        extended atomic types of shape [batch_size, nall]
+        if type < 0 the atom is treat as virtual atoms.
+    nloc : int
+        number of local atoms.
+    rcut : float
+        cut-off radius
+    sel : int or List[int]
+        maximal number of neighbors (of each type).
+        if distinguish_types==True, nsel should be list and
+        the length of nsel should be equal to number of
+        types.
+    distinguish_types : bool
+        distinguish different types.
+
+    Returns
+    -------
+    neighbor_list : paddle.Tensor
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        are stored in an ascending order. If the number of
+        neighbors is less than nsel, the positions are masked
+        with -1. The neighbor list of an atom looks like
+        |------ nsel ------|
+        xx xx xx xx -1 -1 -1
+        if distinguish_types==True and we have two types
+        |---- nsel[0] -----| |---- nsel[1] -----|
+        xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
+
+    """
+    batch_size = coord.shape[0]
+    coord = coord.reshape([batch_size, -1])
+    nall = coord.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    if coord.numel() > 0:
+        xmax = paddle.max(coord) + 2.0 * rcut
+    else:
+        xmax = paddle.zeros([], dtype=coord.dtype).to(device=coord.place) + 2.0 * rcut
+    # nf x nall
+    is_vir = atype < 0
+    coord1 = paddle.where(
+        is_vir[:, :, None], xmax, coord.reshape([batch_size, nall, 3])
+    ).reshape([batch_size, nall * 3])
+    if isinstance(sel, int):
+        sel = [sel]
+    # nloc x 3
+    coord0 = coord1[:, : nloc * 3]
+    # nloc x nall x 3
+    diff = coord1.reshape([batch_size, -1, 3]).unsqueeze(1) - coord0.reshape(
+        [batch_size, -1, 3]
+    ).unsqueeze(2)
+    if paddle.in_dynamic_mode():
+        assert list(diff.shape) == [batch_size, nloc, nall, 3]
+    # nloc x nall
+    # rr = paddle.linalg.norm(diff, axis=-1)
+    rr = decomp.norm(diff, axis=-1)
+    # if central atom has two zero distances, sorting sometimes can not exclude itself
+    rr = rr - paddle.eye(nloc, nall, dtype=rr.dtype).to(device=rr.place).unsqueeze(0)
+    rr, nlist = paddle.sort(rr, axis=-1), paddle.argsort(rr, axis=-1)
+    # nloc x (nall-1)
+    rr = rr[:, :, 1:]
+    nlist = nlist[:, :, 1:]
+    t = _trim_mask_distinguish_nlist(
+        is_vir, atype, rr, nlist, rcut, sel, distinguish_types
+    )
+    return t
+
+
+def _trim_mask_distinguish_nlist(
+    is_vir_cntl: paddle.Tensor,
+    atype_neig: paddle.Tensor,
+    rr: paddle.Tensor,
+    nlist: paddle.Tensor,
+    rcut: float,
+    sel: list[int],
+    distinguish_types: bool,
+) -> paddle.Tensor:
+    """Trim the size of nlist, mask if any central atom is virtual, distinguish types if necessary."""
+    nsel = sum(sel)
+    # nloc x nsel
+    batch_size, nloc, nnei = rr.shape
+    if paddle.in_dynamic_mode():
+        assert batch_size == is_vir_cntl.shape[0]
+    if nsel <= nnei:
+        rr = rr[:, :, :nsel]
+        nlist = nlist[:, :, :nsel]
+    else:
+        rr = paddle.concat(
+            [
+                rr,
+                paddle.ones([batch_size, nloc, nsel - nnei]).to(
+                    device=rr.place, dtype=rr.dtype
+                )
+                + rcut,
+            ],  # pylint: disable=no-explicit-dtype
+            axis=-1,
+        )
+        nlist = paddle.concat(
+            [
+                nlist,
+                paddle.ones([batch_size, nloc, nsel - nnei], dtype=nlist.dtype).to(
+                    device=rr.place
+                ),
+            ],
+            axis=-1,
+        )
+        if paddle.in_dynamic_mode():
+            assert list(nlist.shape) == [batch_size, nloc, nsel]
+    nlist = paddle.where(
+        paddle.logical_or((rr > rcut), is_vir_cntl[:, :nloc, None]), -1, nlist
+    )
+    if distinguish_types:
+        return nlist_distinguish_types(nlist, atype_neig, sel)
+    else:
+        return nlist
+
+
+def build_directional_neighbor_list(
+    coord_cntl: paddle.Tensor,
+    atype_cntl: paddle.Tensor,
+    coord_neig: paddle.Tensor,
+    atype_neig: paddle.Tensor,
+    rcut: float,
+    sel: Union[int, list[int]],
+    distinguish_types: bool = True,
+) -> paddle.Tensor:
+    """Build directional neighbor list.
+
+    With each central atom, all the neighbor atoms in the cut-off radius will
+    be recorded in the neighbor list. The maximum neighbors is nsel. If the real
+    number of neighbors is larger than nsel, the neighbors will be sorted with the
+    distance and the first nsel neighbors are kept.
+
+    Important: the central and neighboring atoms are assume to be different atoms.
+
+    Parameters
+    ----------
+    coord_central : paddle.Tensor
+        coordinates of central atoms. assumed to be local atoms.
+        shape [batch_size, nloc_central x 3]
+    atype_central : paddle.Tensor
+        atomic types of central atoms. shape [batch_size, nloc_central]
+        if type < 0 the atom is treated as virtual atoms.
+    coord_neighbor : paddle.Tensor
+        extended coordinates of neighbors atoms. shape [batch_size, nall_neighbor x 3]
+    atype_central : paddle.Tensor
+        extended atomic types of neighbors atoms. shape [batch_size, nall_neighbor]
+        if type < 0 the atom is treated as virtual atoms.
+    rcut : float
+        cut-off radius
+    sel : int or List[int]
+        maximal number of neighbors (of each type).
+        if distinguish_types==True, nsel should be list and
+        the length of nsel should be equal to number of
+        types.
+    distinguish_types : bool
+        distinguish different types.
+
+    Returns
+    -------
+    neighbor_list : paddle.Tensor
+        Neighbor list of shape [batch_size, nloc_central, nsel], the neighbors
+        are stored in an ascending order. If the number of neighbors is less than nsel,
+        the positions are masked with -1. The neighbor list of an atom looks like
+        |------ nsel ------|
+        xx xx xx xx -1 -1 -1
+        if distinguish_types==True and we have two types
+        |---- nsel[0] -----| |---- nsel[1] -----|
+        xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
+    """
+    batch_size = coord_cntl.shape[0]
+    coord_cntl = coord_cntl.reshape([batch_size, -1])
+    nloc_cntl = coord_cntl.shape[1] // 3
+    coord_neig = coord_neig.reshape([batch_size, -1])
+    nall_neig = coord_neig.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    if coord_neig.numel() > 0:
+        xmax = paddle.max(coord_cntl) + 2.0 * rcut
+    else:
+        xmax = (
+            paddle.zeros([1], dtype=coord_neig.dtype, device=coord_neig.place)
+            + 2.0 * rcut
+        )
+    # nf x nloc
+    is_vir_cntl = atype_cntl < 0
+    # nf x nall
+    is_vir_neig = atype_neig < 0
+    # nf x nloc x 3
+    coord_cntl = coord_cntl.reshape([batch_size, nloc_cntl, 3])
+    # nf x nall x 3
+    coord_neig = paddle.where(
+        is_vir_neig[:, :, None], xmax, coord_neig.reshape([batch_size, nall_neig, 3])
+    ).reshape([batch_size, nall_neig, 3])
+    # nsel
+    if isinstance(sel, int):
+        sel = [sel]
+    # nloc x nall x 3
+    diff = coord_neig[:, None, :, :] - coord_cntl[:, :, None, :]
+    if paddle.in_dynamic_mode():
+        assert list(diff.shape) == [batch_size, nloc_cntl, nall_neig, 3]
+    # nloc x nall
+    # rr = paddle.linalg.norm(diff, axis=-1)
+    rr = decomp.norm(diff, axis=-1)
+    rr, nlist = paddle.sort(rr, axis=-1), paddle.argsort(rr, axis=-1)
+
+    # We assume that the central and neighbor atoms are diffferent,
+    # thus we do not need to exclude self-neighbors.
+    # # if central atom has two zero distances, sorting sometimes can not exclude itself
+    # rr -= paddle.eye(nloc_cntl, nall_neig, dtype=rr.dtype, device=rr.place).unsqueeze(0)
+    # rr, nlist = paddle.sort(rr, axis=-1)
+    # # nloc x (nall-1)
+    # rr = rr[:, :, 1:]
+    # nlist = nlist[:, :, 1:]
+
+    return _trim_mask_distinguish_nlist(
+        is_vir_cntl, atype_neig, rr, nlist, rcut, sel, distinguish_types
+    )
+
+
+def nlist_distinguish_types(
+    nlist: paddle.Tensor,
+    atype: paddle.Tensor,
+    sel: list[int],
+):
+    """Given a nlist that does not distinguish atom types, return a nlist that
+    distinguish atom types.
+
+    """
+    nf, nloc, nnei = nlist.shape
+    ret_nlist = []
+    # nloc x nall
+    tmp_atype = paddle.tile(atype.unsqueeze(1), [1, nloc, 1])
+    mask = nlist == -1
+    # nloc x s(nsel)
+    # tnlist = paddle.take_along_axis(
+    #     tmp_atype,
+    #     axis=2,
+    #     indices=nlist.masked_fill(mask, 0),
+    # )
+    tnlist = decomp.take_along_axis(
+        tmp_atype,
+        axis=2,
+        indices=nlist.masked_fill(mask, 0),
+    )
+    tnlist = tnlist.masked_fill(mask, -1)
+    snsel = tnlist.shape[2]
+    for ii, ss in enumerate(sel):
+        # nloc x s(nsel)
+        # to int because bool cannot be sort on GPU
+        pick_mask = (tnlist == ii).to(paddle.int64)
+        # nloc x s(nsel), stable sort, nearer neighbors first
+        pick_mask, imap = (
+            paddle.sort(pick_mask, axis=-1, descending=True, stable=True),
+            paddle.argsort(pick_mask, axis=-1, descending=True, stable=True),
+        )
+        # nloc x s(nsel)
+        # inlist = paddle.take_along_axis(nlist, axis=2, indices=imap)
+        inlist = decomp.take_along_axis(nlist, axis=2, indices=imap)
+        inlist = inlist.masked_fill(~(pick_mask.to(paddle.bool)), -1)
+        # nloc x nsel[ii]
+        ret_nlist.append(paddle.split(inlist, [ss, snsel - ss], axis=-1)[0])
+    return paddle.concat(ret_nlist, axis=-1)
+
+
+# build_neighbor_list = paddle.vmap(
+#   build_neighbor_list_lower,
+#   in_dims=(0,0,None,None,None),
+#   out_dims=(0),
+# )
+
+
+def get_multiple_nlist_key(
+    rcut: float,
+    nsel: int,
+) -> str:
+    return str(rcut) + "_" + str(nsel)
+
+
+def build_multiple_neighbor_list(
+    coord: paddle.Tensor,
+    nlist: paddle.Tensor,
+    rcuts: list[float],
+    nsels: list[int],
+) -> dict[str, paddle.Tensor]:
+    """Input one neighbor list, and produce multiple neighbor lists with
+    different cutoff radius and numbers of selection out of it.  The
+    required rcuts and nsels should be smaller or equal to the input nlist.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        exptended coordinates of shape [batch_size, nall x 3]
+    nlist : paddle.Tensor
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        should be stored in an ascending order.
+    rcuts : List[float]
+        list of cut-off radius in ascending order.
+    nsels : List[int]
+        maximal number of neighbors in ascending order.
+
+    Returns
+    -------
+    nlist_dict : Dict[str, paddle.Tensor]
+        A dict of nlists, key given by get_multiple_nlist_key(rc, nsel)
+        value being the corresponding nlist.
+
+    """
+    if paddle.in_dynamic_mode():
+        assert len(rcuts) == len(nsels)
+    if len(rcuts) == 0:
+        return {}
+    nb, nloc, nsel = nlist.shape
+    if nsel < nsels[-1]:
+        pad = -paddle.ones(
+            [nb, nloc, nsels[-1] - nsel],
+            dtype=nlist.dtype,
+        ).to(device=nlist.place)
+        # nb x nloc x nsel
+        nlist = paddle.concat([nlist, pad], axis=-1)
+        if paddle.is_tensor(nsel):
+            nsel = paddle.to_tensor(nsels[-1], dtype=nsel.dtype)
+        else:
+            nsel = nsels[-1]
+
+    # nb x nall x 3
+    coord1 = coord.reshape([nb, -1, 3])
+    nall = coord1.shape[1]
+    # nb x nloc x 3
+    coord0 = coord1[:, :nloc, :]
+    nlist_mask = nlist == -1
+    # nb x (nloc x nsel) x 3
+    index = (
+        nlist.masked_fill(nlist_mask, 0)
+        .reshape([nb, nloc * nsel])
+        .unsqueeze(-1)
+        .expand([-1, -1, 3])
+    )
+    # nb x nloc x nsel x 3
+    # coord2 = paddle.take_along_axis(coord1, axis=1, index=index).reshape(
+    #     [nb, nloc, nsel, 3]
+    # )
+    coord2 = decomp.take_along_axis(coord1, axis=1, indices=index).reshape(
+        [nb, nloc, nsel, 3]
+    )
+    # nb x nloc x nsel x 3
+    diff = coord2 - coord0[:, :, None, :]
+    # nb x nloc x nsel
+    # rr = paddle.linalg.norm(diff, axis=-1)
+    rr = decomp.norm(diff, axis=-1)
+    rr.masked_fill(nlist_mask, float("inf"))
+    nlist0 = nlist
+    ret = {}
+    for rc, ns in zip(rcuts[::-1], nsels[::-1]):
+        nlist0 = nlist0[:, :, :ns].masked_fill(rr[:, :, :ns] > rc, -1)
+        ret[get_multiple_nlist_key(rc, ns)] = nlist0
+    return ret
+
+
+def extend_coord_with_ghosts(
+    coord: paddle.Tensor,
+    atype: paddle.Tensor,
+    cell: Optional[paddle.Tensor],
+    rcut: float,
+    cell_cpu: Optional[paddle.Tensor] = None,
+) -> tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
+    """Extend the coordinates of the atoms by appending peridoc images.
+    The number of images is large enough to ensure all the neighbors
+    within rcut are appended.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        original coordinates of shape [-1, nloc*3].
+    atype : paddle.Tensor
+        atom type of shape [-1, nloc].
+    cell : paddle.Tensor
+        simulation cell tensor of shape [-1, 9].
+    rcut : float
+        the cutoff radius
+    cell_cpu : paddle.Tensor
+        cell on cpu for performance
+
+    Returns
+    -------
+    extended_coord: paddle.Tensor
+        extended coordinates of shape [-1, nall*3].
+    extended_atype: paddle.Tensor
+        extended atom type of shape [-1, nall].
+    index_mapping: paddle.Tensor
+        maping extended index to the local index
+
+    """
+    device = coord.place
+    nf, nloc = atype.shape[:2]
+    aidx = paddle.tile(paddle.arange(nloc).to(device=device).unsqueeze(0), [nf, 1])  # pylint: disable=no-explicit-dtype
+    if cell is None:
+        nall = nloc
+        extend_coord = coord.clone()
+        extend_atype = atype.clone()
+        extend_aidx = aidx.clone()
+    else:
+        coord = coord.reshape([nf, nloc, 3])
+        cell = cell.reshape([nf, 3, 3])
+        cell_cpu = cell_cpu.reshape([nf, 3, 3]) if cell_cpu is not None else cell
+        # nf x 3
+        to_face = to_face_distance(cell_cpu)
+        # nf x 3
+        # *2: ghost copies on + and - directions
+        # +1: central cell
+        nbuff = paddle.ceil(rcut / to_face)
+        INT64_MIN = -9223372036854775808
+        nbuff = paddle.where(
+            paddle.isinf(nbuff),
+            paddle.full_like(nbuff, INT64_MIN, dtype=paddle.int64),
+            nbuff.astype(paddle.int64),
+        )
+        # 3
+        nbuff = paddle.amax(nbuff, axis=0)  # faster than paddle.max
+        nbuff_cpu = nbuff.cpu()
+        xi = (
+            paddle.arange(-nbuff_cpu[0], nbuff_cpu[0] + 1, 1).to(
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION
+            )
+            # .cpu()
+        )  # pylint: disable=no-explicit-dtype
+        yi = (
+            paddle.arange(-nbuff_cpu[1], nbuff_cpu[1] + 1, 1).to(
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION
+            )
+            # .cpu()
+        )  # pylint: disable=no-explicit-dtype
+        zi = (
+            paddle.arange(-nbuff_cpu[2], nbuff_cpu[2] + 1, 1).to(
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION
+            )
+            # .cpu()
+        )  # pylint: disable=no-explicit-dtype
+        eye_3 = (
+            paddle.eye(3, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION
+            )
+            # .cpu()
+        )
+        xyz = xi.reshape([-1, 1, 1, 1]) * eye_3[0]
+        xyz = xyz + yi.reshape([1, -1, 1, 1]) * eye_3[1]
+        xyz = xyz + zi.reshape([1, 1, -1, 1]) * eye_3[2]
+        xyz = xyz.reshape([-1, 3])
+        # xyz = xyz.to(device=device)
+        # ns x 3
+        # shift_idx = xyz[paddle.argsort(paddle.norm(xyz, axis=1))]
+        shift_idx = xyz[paddle.argsort(decomp.norm(xyz, axis=1))]
+        ns, _ = shift_idx.shape
+        nall = ns * nloc
+        # nf x ns x 3
+        shift_vec = paddle.einsum("sd,fdk->fsk", shift_idx, cell)
+        # nf x ns x nloc x 3
+        extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :]
+        # nf x ns x nloc
+        extend_atype = paddle.tile(atype.unsqueeze(-2), [1, ns, 1])
+        # nf x ns x nloc
+        extend_aidx = paddle.tile(aidx.unsqueeze(-2), [1, ns, 1])
+    return (
+        extend_coord.reshape([nf, nall * 3]).to(device),
+        extend_atype.reshape([nf, nall]).to(device),
+        extend_aidx.reshape([nf, nall]).to(device),
+    )
diff --git a/deepmd/pd/utils/no_use_init.py b/deepmd/pd/utils/no_use_init.py
new file mode 100644
index 0000000000..9f363d6db0
--- /dev/null
+++ b/deepmd/pd/utils/no_use_init.py
@@ -0,0 +1,515 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""
+The initialization method under this module is aligned with pytorch initialization.
+If you need to use the initialization method of PaddlePaddle, please refer to
+[paddle.nn.initializer](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/nn/initializer).
+
+This code is based on [torch.nn.init](https://github.com/pytorch/pytorch/blob/main/torch/nn/init.py)
+Ths copyright of pytorch/pytorch is a BSD-style license, as found in the LICENSE file.
+"""
+
+from __future__ import (
+    annotations,
+)
+
+import math
+import warnings
+
+import numpy as np
+import paddle
+from paddle import (
+    nn,
+)
+from typing_extensions import (
+    Literal,
+)
+
+__all__ = [
+    "uniform_",
+    "normal_",
+    "trunc_normal_",
+    "glorot_normal_",
+    "constant_",
+    "ones_",
+    "zeros_",
+    "xavier_uniform_",
+    "xavier_normal_",
+    "kaiming_uniform_",
+    "kaiming_normal_",
+    "linear_init_",
+    "conv_init_",
+]
+
+
+def _no_grad_uniform_(tensor, a, b):
+    with paddle.no_grad():
+        tensor.set_value(
+            paddle.uniform(shape=tensor.shape, dtype=tensor.dtype, min=a, max=b)
+        )
+        return tensor
+
+
+def _no_grad_normal_(tensor, mean=0.0, std=1.0):
+    with paddle.no_grad():
+        tensor.set_value(paddle.normal(mean=mean, std=std, shape=tensor.shape))
+        return tensor
+
+
+def _no_grad_trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            f"mean({mean}) is more than 2 std({std}) from [a, b]([{a}, {b}]) in _no_grad_trunc_normal_. "
+            "The distribution of values may be incorrect."
+        )
+
+    with paddle.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        _tensor = paddle.uniform(
+            shape=tensor.shape, dtype=tensor.dtype, min=2 * l - 1, max=2 * u - 1
+        )
+
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        _tensor.erfinv_()
+
+        # Transform to proper mean, std
+        _tensor = paddle.multiply(
+            _tensor, paddle.to_tensor(std * math.sqrt(2.0), tensor.dtype)
+        )
+        _tensor = paddle.add(_tensor, paddle.to_tensor(mean, tensor.dtype))
+
+        # Clamp to ensure it"s in the proper range
+        _tensor = paddle.clip(_tensor, min=a, max=b)
+        tensor.set_value(_tensor)
+        return tensor
+
+
+def _no_grad_fill_(tensor, value=0.0):
+    with paddle.no_grad():
+        tensor.set_value(paddle.full_like(tensor, value, dtype=tensor.dtype))
+        return tensor
+
+
+def uniform_(tensor: paddle.Tensor, a: float, b: float) -> paddle.Tensor:
+    """Modify tensor inplace using uniform_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        a (float): Min value.
+        b (float): Max value.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.uniform_(param, -1, 1)
+    """
+    return _no_grad_uniform_(tensor, a, b)
+
+
+def normal_(
+    tensor: paddle.Tensor, mean: float = 0.0, std: float = 1.0
+) -> paddle.Tensor:
+    """Modify tensor inplace using normal_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        mean (float, optional): Mean value. Defaults to 0.0.
+        std (float, optional): Std value. Defaults to 1.0.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.normal_(param, 0, 1)
+    """
+    return _no_grad_normal_(tensor, mean, std)
+
+
+def trunc_normal_(
+    tensor: paddle.Tensor,
+    mean: float = 0.0,
+    std: float = 1.0,
+    a: float = -2.0,
+    b: float = 2.0,
+) -> paddle.Tensor:
+    """Modify tensor inplace using trunc_normal_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        mean (float, optional): The mean of the normal distribution. Defaults to 0.0.
+        std (float, optional): The standard deviation of the normal distribution. Defaults to 1.0.
+        a (float, optional): The minimum cutoff value. Defaults to -2.0.
+        b (float, optional): The maximum cutoff value. Defaults to 2.0.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.trunc_normal_(param, 0.0, 1.0)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+
+
+def constant_(tensor: paddle.Tensor, value: float = 0.0) -> paddle.Tensor:
+    """Modify tensor inplace using constant_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        value (float, optional): Value to fill tensor. Defaults to 0.0.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.constant_(param, 2)
+    """
+    return _no_grad_fill_(tensor, value)
+
+
+def ones_(tensor: paddle.Tensor) -> paddle.Tensor:
+    """Modify tensor inplace using ones_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.ones_(param)
+    """
+    return _no_grad_fill_(tensor, 1)
+
+
+def zeros_(tensor: paddle.Tensor) -> paddle.Tensor:
+    """Modify tensor inplace using zeros_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.zeros_(param)
+    """
+    return _no_grad_fill_(tensor, 0)
+
+
+def _calculate_fan_in_and_fan_out(tensor, reverse=False):
+    """
+    Calculate (fan_in, _fan_out) for tensor.
+
+    Args:
+        tensor (paddle.Tensor): paddle.Tensor.
+        reverse (bool): Tensor data format order, False by default as [fout, fin, ...].
+            e.g. : conv.weight [cout, cin, kh, kw] is False; linear.weight [cin, cout]
+            is True.
+
+    Return:
+        Tuple[float, float]: (fan_in, fan_out).
+    """
+    if tensor.ndim < 2:
+        raise ValueError(
+            f"tensor.ndim should be no less than 2, but got {tensor.ndim}."
+        )
+
+    if reverse:
+        num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1]
+    else:
+        num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0]
+
+    receptive_field_size = 1
+    if tensor.ndim > 2:
+        receptive_field_size = np.prod(tensor.shape[2:])
+
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
+
+
+def xavier_uniform_(
+    tensor: paddle.Tensor, gain: float = 1.0, reverse: bool = False
+) -> paddle.Tensor:
+    """Modify tensor inplace using xavier_uniform_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        gain (float, optional): Hyperparameter. Defaults to 1.0.
+        reverse (bool, optional): Tensor data format order, False by default as
+            [fout, fin, ...].. Defaults to False.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.xavier_uniform_(param)
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+    k = math.sqrt(3.0) * std
+    return _no_grad_uniform_(tensor, -k, k)
+
+
+def xavier_normal_(
+    tensor: paddle.Tensor, gain: float = 1.0, reverse: bool = False
+) -> paddle.Tensor:
+    """Modify tensor inplace using xavier_normal_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        gain (float, optional): Hyperparameter. Defaults to 1.0.
+        reverse (bool, optional): Tensor data format order, False by
+            default as [fout, fin, ...]. Defaults to False.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.xavier_normal_(param)
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+    return _no_grad_normal_(tensor, 0, std)
+
+
+# reference: https://pytorch.org/docs/stable/_modules/torch/nn/init.html
+def _calculate_correct_fan(tensor, mode, reverse=False):
+    mode = mode.lower()
+    valid_modes = ["fan_in", "fan_out"]
+    if mode not in valid_modes:
+        raise ValueError(f"Mode {mode} not supported, please use one of {valid_modes}")
+
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse)
+
+    return fan_in if mode == "fan_in" else fan_out
+
+
+def _calculate_gain(nonlinearity, param=None):
+    linear_fns = [
+        "linear",
+        "conv1d",
+        "conv2d",
+        "conv3d",
+        "conv_transpose1d",
+        "conv_transpose2d",
+        "conv_transpose3d",
+    ]
+    if nonlinearity in linear_fns or nonlinearity == "sigmoid":
+        return 1
+    elif nonlinearity == "tanh":
+        return 5.0 / 3
+    elif nonlinearity == "relu":
+        return math.sqrt(2.0)
+    elif nonlinearity == "leaky_relu":
+        if param is None:
+            negative_slope = 0.01
+        elif (
+            not isinstance(param, bool)
+            and isinstance(param, int)
+            or isinstance(param, float)
+        ):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError(f"negative_slope {param} not a valid number")
+        return math.sqrt(2.0 / (1 + negative_slope**2))
+    elif nonlinearity == "selu":
+        return 3.0 / 4
+    else:
+        raise ValueError(f"Unsupported nonlinearity {nonlinearity}")
+
+
+def kaiming_uniform_(
+    tensor: paddle.Tensor,
+    a: float = 0,
+    mode: Literal["fan_in", "fan_out"] = "fan_in",
+    nonlinearity: str = "leaky_relu",
+    reverse: bool = False,
+) -> paddle.Tensor:
+    """Modify tensor inplace using kaiming_uniform method.
+
+    Args:
+        tensor (paddle.Tensor):  Paddle Tensor.
+        a (float, optional): The negative slope of the rectifier used after this layer.
+            Defaults to 0.
+        mode (Literal[&quot;fan_in&quot;, &quot;fan_out&quot;], optional):
+            ["fan_in", "fan_out"]. Defaults to "fan_in".
+        nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".
+        reverse (bool, optional): Tensor data format order, False by default as
+            [fout, fin, ...].. Defaults to False.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.kaiming_uniform_(param)
+    """
+    fan = _calculate_correct_fan(tensor, mode, reverse)
+    gain = _calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    k = math.sqrt(3.0) * std
+    return _no_grad_uniform_(tensor, -k, k)
+
+
+def kaiming_normal_(
+    tensor: paddle.Tensor,
+    a: float = 0,
+    mode: Literal["fan_in", "fan_out"] = "fan_in",
+    nonlinearity: str = "leaky_relu",
+    reverse: bool = False,
+) -> paddle.Tensor:
+    """Modify tensor inplace using kaiming_normal_.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor.
+        a (float, optional): The negative slope of the rectifier used after this layer.
+            Defaults to 0.
+        mode (Literal[&quot;fan_in&quot;, &quot;fan_out&quot;], optional): Either
+            'fan_in' (default) or 'fan_out'. Defaults to "fan_in".
+        nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".
+        reverse (bool, optional): Tensor data format order. Defaults to False.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.kaiming_normal_(param)
+    """
+    fan = _calculate_correct_fan(tensor, mode, reverse)
+    gain = _calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    return _no_grad_normal_(tensor, 0, std)
+
+
+def linear_init_(module: nn.Layer) -> None:
+    """Initialize module's weight and bias as it is a linear layer.
+
+    Args:
+        module (nn.Layer): Linear Layer to be initialized.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> layer = paddle.nn.Linear(128, 256)
+        >>> ppsci.utils.initializer.linear_init_(layer)
+    """
+    kaiming_uniform_(module.weight, a=math.sqrt(5))
+    if module.bias is not None:
+        fan_in, _ = _calculate_fan_in_and_fan_out(module.weight, reverse=True)
+        bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
+        uniform_(module.bias, -bound, bound)
+
+
+def conv_init_(module: nn.Layer) -> None:
+    """Initialize module's weight and bias as it is a conv layer.
+
+    Args:
+        module (nn.Layer): Convolution Layer to be initialized.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> layer = paddle.nn.Conv2D(4, 16, 2)
+        >>> ppsci.utils.initializer.conv_init_(layer)
+    """
+    kaiming_uniform_(module.weight, a=math.sqrt(5))
+    if module.bias is not None:
+        fan_in, _ = _calculate_fan_in_and_fan_out(module.weight, reverse=False)
+        if fan_in != 0:
+            bound = 1 / math.sqrt(fan_in)
+            uniform_(module.bias, -bound, bound)
+
+
+def glorot_normal_(tensor: paddle.Tensor) -> paddle.Tensor:
+    """Modify tensor inplace using jax-style glorot_normal.
+
+    Args:
+        tensor (paddle.Tensor): Paddle Tensor/Paramter.
+
+    Returns
+    -------
+    paddle.Tensor: Initialized tensor.
+
+    Examples
+    --------
+        >>> import paddle
+        >>> import ppsci
+        >>> param = paddle.empty((128, 256), "float32")
+        >>> param = ppsci.utils.initializer.glorot_normal_(param)
+    """
+    assert (
+        tensor.ndim == 2
+    ), f"glorot_normal_ only support 2D tensor now, but got ndim={tensor.ndim}"
+    fin, fout = tensor.shape
+    var = 2.0 / (fin + fout)
+    stddev = math.sqrt(var) * 0.87962566103423978
+    trunc_normal_(tensor)
+    tensor.set_value(tensor * stddev)
+    return tensor
diff --git a/deepmd/pd/utils/plugin.py b/deepmd/pd/utils/plugin.py
new file mode 100644
index 0000000000..aa901c06e8
--- /dev/null
+++ b/deepmd/pd/utils/plugin.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Base of plugin systems."""
+
+from deepmd.utils.plugin import (
+    Plugin,
+    PluginVariant,
+    VariantABCMeta,
+    VariantMeta,
+)
+
+__all__ = [
+    "Plugin",
+    "VariantMeta",
+    "VariantABCMeta",
+    "PluginVariant",
+]
diff --git a/deepmd/pd/utils/preprocess.py b/deepmd/pd/utils/preprocess.py
new file mode 100644
index 0000000000..052d9941f8
--- /dev/null
+++ b/deepmd/pd/utils/preprocess.py
@@ -0,0 +1,314 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    Union,
+)
+
+import paddle
+
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+
+log = logging.getLogger(__name__)
+
+
+class Region3D:
+    def __init__(self, boxt):
+        """Construct a simulation box."""
+        boxt = boxt.reshape([3, 3])
+        self.boxt = boxt  # convert physical coordinates to internal ones
+        self.rec_boxt = paddle.linalg.inv(
+            self.boxt
+        )  # convert internal coordinates to physical ones
+
+        self.volume = paddle.linalg.det(self.boxt)  # compute the volume
+
+        # boxt = boxt.permute(1, 0)
+        c_yz = paddle.cross(boxt[1], boxt[2])
+        # self._h2yz = self.volume / paddle.linalg.norm(c_yz)
+        self._h2yz = self.volume / decomp.norm(c_yz)
+        c_zx = paddle.cross(boxt[2], boxt[0])
+        # self._h2zx = self.volume / paddle.linalg.norm(c_zx)
+        self._h2zx = self.volume / decomp.norm(c_zx)
+        c_xy = paddle.cross(boxt[0], boxt[1])
+        # self._h2xy = self.volume / paddle.linalg.norm(c_xy)
+        self._h2xy = self.volume / decomp.norm(c_xy)
+
+    def phys2inter(self, coord):
+        """Convert physical coordinates to internal ones."""
+        return coord @ self.rec_boxt
+
+    def inter2phys(self, coord):
+        """Convert internal coordinates to physical ones."""
+        return coord @ self.boxt
+
+    def get_face_distance(self):
+        """Return face distinces to each surface of YZ, ZX, XY."""
+        return paddle.stack([self._h2yz, self._h2zx, self._h2xy])
+
+
+def normalize_coord(coord, region: Region3D, nloc: int):
+    """Move outer atoms into region by mirror.
+
+    Args:
+    - coord: shape is [nloc*3]
+    """
+    tmp_coord = coord.clone()
+    inter_cood = paddle.remainder(region.phys2inter(tmp_coord), 1.0)
+    tmp_coord = region.inter2phys(inter_cood)
+    return tmp_coord
+
+
+def compute_serial_cid(cell_offset, ncell):
+    """Tell the sequential cell ID in its 3D space.
+
+    Args:
+    - cell_offset: shape is [3]
+    - ncell: shape is [3]
+    """
+    cell_offset[:, 0] *= ncell[1] * ncell[2]
+    cell_offset[:, 1] *= ncell[2]
+    return cell_offset.sum(-1)
+
+
+def compute_pbc_shift(cell_offset, ncell):
+    """Tell shift count to move the atom into region."""
+    shift = paddle.zeros_like(cell_offset)
+    shift = shift + (cell_offset < 0) * -(
+        paddle.floor(paddle.divide(cell_offset, ncell))
+    )
+    shift = shift + (cell_offset >= ncell) * -(
+        paddle.floor(paddle.divide((cell_offset - ncell), ncell)) + 1
+    )
+    assert paddle.all(cell_offset + shift * ncell >= 0)
+    assert paddle.all(cell_offset + shift * ncell < ncell)
+    return shift
+
+
+def build_inside_clist(coord, region: Region3D, ncell):
+    """Build cell list on atoms inside region.
+
+    Args:
+    - coord: shape is [nloc*3]
+    - ncell: shape is [3]
+    """
+    loc_ncell = int(paddle.prod(ncell))  # num of local cells
+    nloc = coord.numel() // 3  # num of local atoms
+    inter_cell_size = 1.0 / ncell
+
+    inter_cood = region.phys2inter(coord.reshape([-1, 3]))
+    cell_offset = paddle.floor(inter_cood / inter_cell_size).to(paddle.int64)
+    # numerical error brought by conversion from phys to inter back and force
+    # may lead to negative value
+    cell_offset[cell_offset < 0] = 0
+    delta = cell_offset - ncell
+    a2c = compute_serial_cid(cell_offset, ncell)  # cell id of atoms
+    arange = paddle.arange(0, loc_ncell, 1)  # pylint: disable=no-explicit-dtype,no-explicit-device
+    cellid = a2c == arange.unsqueeze(-1)  # one hot cellid
+    c2a = cellid.nonzero()
+    lst = []
+    cnt = 0
+    bincount = paddle.bincount(a2c, minlength=loc_ncell)
+    for i in range(loc_ncell):
+        n = bincount[i]
+        lst.append(c2a[cnt : cnt + n, 1])
+        cnt += n
+    return a2c, lst
+
+
+def append_neighbors(coord, region: Region3D, atype, rcut: float):
+    """Make ghost atoms who are valid neighbors.
+
+    Args:
+    - coord: shape is [nloc*3]
+    - atype: shape is [nloc]
+    """
+    to_face = region.get_face_distance()
+
+    # compute num and size of local cells
+    ncell = paddle.floor(to_face / rcut).to(paddle.int64)
+    ncell[ncell == 0] = 1
+    cell_size = to_face / ncell
+    ngcell = (
+        paddle.floor(rcut / cell_size).to(paddle.int64) + 1
+    )  # num of cells out of local, which contain ghost atoms
+
+    # add ghost atoms
+    a2c, c2a = build_inside_clist(coord, region, ncell)
+    xi = paddle.arange(-ngcell[0], ncell[0] + ngcell[0], 1)  # pylint: disable=no-explicit-dtype,no-explicit-device
+    yi = paddle.arange(-ngcell[1], ncell[1] + ngcell[1], 1)  # pylint: disable=no-explicit-dtype,no-explicit-device
+    zi = paddle.arange(-ngcell[2], ncell[2] + ngcell[2], 1)  # pylint: disable=no-explicit-dtype,no-explicit-device
+    xyz = xi.reshape([-1, 1, 1, 1]) * paddle.to_tensor([1, 0, 0], dtype=paddle.int64)  # pylint: disable=no-explicit-device
+    xyz = xyz + yi.reshape([1, -1, 1, 1]) * paddle.to_tensor(
+        [0, 1, 0], dtype=paddle.int64
+    )  # pylint: disable=no-explicit-device
+    xyz = xyz + zi.reshape([1, 1, -1, 1]) * paddle.to_tensor(
+        [0, 0, 1], dtype=paddle.int64
+    )  # pylint: disable=no-explicit-device
+    xyz = xyz.reshape([-1, 3])
+    mask_a = (xyz >= 0).all(axis=-1)
+    mask_b = (xyz < ncell).all(axis=-1)
+    mask = ~paddle.logical_and(mask_a, mask_b)
+    xyz = xyz[mask]  # cell coord
+    shift = compute_pbc_shift(xyz, ncell)
+    coord_shift = region.inter2phys(shift.to(env.GLOBAL_PD_FLOAT_PRECISION))
+    mirrored = shift * ncell + xyz
+    cid = compute_serial_cid(mirrored, ncell)
+
+    n_atoms = coord.shape[0]
+    aid = [c2a[ci] + i * n_atoms for i, ci in enumerate(cid)]
+    aid = paddle.concat(aid)
+    tmp = paddle.trunc(paddle.divide(aid, n_atoms))
+    aid = aid % n_atoms
+    tmp_coord = coord[aid] - coord_shift[tmp]
+    tmp_atype = atype[aid]
+
+    # merge local and ghost atoms
+    merged_coord = paddle.concat([coord, tmp_coord])
+    merged_coord_shift = paddle.concat([paddle.zeros_like(coord), coord_shift[tmp]])
+    merged_atype = paddle.concat([atype, tmp_atype])
+    merged_mapping = paddle.concat([paddle.arange(atype.numel()), aid])  # pylint: disable=no-explicit-dtype,no-explicit-device
+    return merged_coord_shift, merged_atype, merged_mapping
+
+
+def build_neighbor_list(
+    nloc: int, coord, atype, rcut: float, sec, mapping, type_split=True, min_check=False
+):
+    """For each atom inside region, build its neighbor list.
+
+    Args:
+    - coord: shape is [nall*3]
+    - atype: shape is [nall]
+    """
+    nall = coord.numel() // 3
+    coord = coord.astype(paddle.get_default_dtype())
+    nlist = [[] for _ in range(nloc)]
+    coord_l = coord.reshape([-1, 1, 3])[:nloc]
+    coord_r = coord.reshape([1, -1, 3])
+    distance = coord_l - coord_r
+    # distance = paddle.linalg.norm(distance, axis=-1)
+    distance = decomp.norm(distance, axis=-1)
+    DISTANCE_INF = distance.max().detach() + rcut
+    distance[:nloc, :nloc] += paddle.eye(nloc, dtype=paddle.bool) * DISTANCE_INF  # pylint: disable=no-explicit-device
+    if min_check:
+        if distance.min().abs() < 1e-6:
+            raise RuntimeError("Atom dist too close!")
+    if not type_split:
+        sec = sec[-1:]
+    lst = []
+    nlist = paddle.zeros((nloc, sec[-1].item())).long() - 1  # pylint: disable=no-explicit-dtype,no-explicit-device
+    nlist_loc = paddle.zeros((nloc, sec[-1].item())).long() - 1  # pylint: disable=no-explicit-dtype,no-explicit-device
+    nlist_type = paddle.zeros((nloc, sec[-1].item())).long() - 1  # pylint: disable=no-explicit-dtype,no-explicit-device
+    for i, nnei in enumerate(sec):
+        if i > 0:
+            nnei = nnei - sec[i - 1]
+        if not type_split:
+            tmp = distance
+        else:
+            mask = atype.unsqueeze(0) == i
+            tmp = distance + (~mask) * DISTANCE_INF
+        if tmp.shape[1] >= nnei:
+            _sorted, indices = paddle.topk(tmp, nnei, axis=1, largest=False)
+        else:
+            # when nnei > nall
+            indices = paddle.zeros((nloc, nnei)).long() - 1  # pylint: disable=no-explicit-dtype,no-explicit-device
+            _sorted = paddle.ones((nloc, nnei)).long() * DISTANCE_INF  # pylint: disable=no-explicit-dtype,no-explicit-device
+            _sorted_nnei, indices_nnei = paddle.topk(
+                tmp, tmp.shape[1], axis=1, largest=False
+            )
+            _sorted[:, : tmp.shape[1]] = _sorted_nnei
+            indices[:, : tmp.shape[1]] = indices_nnei
+        mask = (_sorted < rcut).to(paddle.int64)
+        indices_loc = mapping[indices]
+        indices = indices * mask + -1 * (1 - mask)  # -1 for padding
+        indices_loc = indices_loc * mask + -1 * (1 - mask)  # -1 for padding
+        if i == 0:
+            start = 0
+        else:
+            start = sec[i - 1]
+        end = min(sec[i], start + indices.shape[1])
+        nlist[:, start:end] = indices[:, :nnei]
+        nlist_loc[:, start:end] = indices_loc[:, :nnei]
+        nlist_type[:, start:end] = atype[indices[:, :nnei]] * mask + -1 * (1 - mask)
+    return nlist, nlist_loc, nlist_type
+
+
+def compute_smooth_weight(distance, rmin: float, rmax: float):
+    """Compute smooth weight for descriptor elements."""
+    if rmin >= rmax:
+        raise ValueError("rmin should be less than rmax.")
+    min_mask = distance <= rmin
+    max_mask = distance >= rmax
+    mid_mask = paddle.logical_not(paddle.logical_or(min_mask, max_mask))
+    uu = (distance - rmin) / (rmax - rmin)
+    vv = uu * uu * uu * (-6 * uu * uu + 15 * uu - 10) + 1
+    return vv * mid_mask.astype(vv.dtype) + min_mask.astype(vv.dtype)
+
+
+def make_env_mat(
+    coord,
+    atype,
+    region,
+    rcut: Union[float, list],
+    sec,
+    pbc=True,
+    type_split=True,
+    min_check=False,
+):
+    """Based on atom coordinates, return environment matrix.
+
+    Returns
+    -------
+    nlist: nlist, [nloc, nnei]
+    merged_coord_shift: shift on nall atoms, [nall, 3]
+    merged_mapping: mapping from nall index to nloc index, [nall]
+    """
+    # move outer atoms into cell
+    hybrid = isinstance(rcut, list)
+    _rcut = rcut
+    if hybrid:
+        _rcut = max(rcut)
+    if pbc:
+        merged_coord_shift, merged_atype, merged_mapping = append_neighbors(
+            coord, region, atype, _rcut
+        )
+        merged_coord = coord[merged_mapping] - merged_coord_shift
+        if merged_coord.shape[0] <= coord.shape[0]:
+            log.warning("No ghost atom is added for system ")
+    else:
+        merged_coord_shift = paddle.zeros_like(coord)
+        merged_atype = atype.clone()
+        merged_mapping = paddle.arange(atype.numel())  # pylint: disable=no-explicit-dtype,no-explicit-device
+        merged_coord = coord.clone()
+
+    # build nlist
+    if not hybrid:
+        nlist, nlist_loc, nlist_type = build_neighbor_list(
+            coord.shape[0],
+            merged_coord,
+            merged_atype,
+            rcut,
+            sec,
+            merged_mapping,
+            type_split=type_split,
+            min_check=min_check,
+        )
+    else:
+        nlist, nlist_loc, nlist_type = [], [], []
+        for ii, single_rcut in enumerate(rcut):
+            nlist_tmp, nlist_loc_tmp, nlist_type_tmp = build_neighbor_list(
+                coord.shape[0],
+                merged_coord,
+                merged_atype,
+                single_rcut,
+                sec[ii],
+                merged_mapping,
+                type_split=type_split,
+                min_check=min_check,
+            )
+            nlist.append(nlist_tmp)
+            nlist_loc.append(nlist_loc_tmp)
+            nlist_type.append(nlist_type_tmp)
+    return nlist, nlist_loc, nlist_type, merged_coord_shift, merged_mapping
diff --git a/deepmd/pd/utils/region.py b/deepmd/pd/utils/region.py
new file mode 100644
index 0000000000..160a4d124e
--- /dev/null
+++ b/deepmd/pd/utils/region.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+
+from deepmd.pd.utils import (
+    decomp,
+)
+
+
+def phys2inter(
+    coord: paddle.Tensor,
+    cell: paddle.Tensor,
+) -> paddle.Tensor:
+    """Convert physical coordinates to internal(direct) coordinates.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        physical coordinates of shape [*, na, 3].
+    cell : paddle.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    inter_coord: paddle.Tensor
+        the internal coordinates
+
+    """
+    if paddle.in_dynamic_mode():
+        try:
+            rec_cell = paddle.linalg.inv(cell)
+        except Exception as e:
+            rec_cell = paddle.full_like(cell, float("nan"))
+            rec_cell.stop_gradient = cell.stop_gradient
+    else:
+        rec_cell = paddle.linalg.inv(cell)
+    return paddle.matmul(coord, rec_cell)
+
+
+def inter2phys(
+    coord: paddle.Tensor,
+    cell: paddle.Tensor,
+) -> paddle.Tensor:
+    """Convert internal(direct) coordinates to physical coordinates.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        internal coordinates of shape [*, na, 3].
+    cell : paddle.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    phys_coord: paddle.Tensor
+        the physical coordinates
+
+    """
+    return paddle.matmul(coord, cell)
+
+
+def to_face_distance(
+    cell: paddle.Tensor,
+) -> paddle.Tensor:
+    """Compute the to-face-distance of the simulation cell.
+
+    Parameters
+    ----------
+    cell : paddle.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    dist: paddle.Tensor
+        the to face distances of shape [*, 3]
+
+    """
+    cshape = cell.shape
+    dist = b_to_face_distance(cell.reshape([-1, 3, 3]))
+    return dist.reshape(list(cshape[:-2]) + [3])  # noqa:RUF005
+
+
+def _to_face_distance(cell):
+    volume = paddle.linalg.det(cell)
+    c_yz = paddle.cross(cell[1], cell[2])
+    # _h2yz = volume / paddle.linalg.norm(c_yz)
+    _h2yz = volume / decomp.norm(c_yz)
+    c_zx = paddle.cross(cell[2], cell[0])
+    # _h2zx = volume / paddle.linalg.norm(c_zx)
+    _h2zx = volume / decomp.norm(c_zx)
+    c_xy = paddle.cross(cell[0], cell[1])
+    # _h2xy = volume / paddle.linalg.norm(c_xy)
+    _h2xy = volume / decomp.norm(c_xy)
+    return paddle.stack([_h2yz, _h2zx, _h2xy])
+
+
+def b_to_face_distance(cell):
+    volume = paddle.linalg.det(cell)
+    c_yz = paddle.cross(cell[:, 1], cell[:, 2], axis=-1)
+    # _h2yz = volume / paddle.linalg.norm(c_yz, axis=-1)
+    _h2yz = volume / decomp.norm(c_yz, axis=-1)
+    c_zx = paddle.cross(cell[:, 2], cell[:, 0], axis=-1)
+    # _h2zx = volume / paddle.linalg.norm(c_zx, axis=-1)
+    _h2zx = volume / decomp.norm(c_zx, axis=-1)
+    c_xy = paddle.cross(cell[:, 0], cell[:, 1], axis=-1)
+    # _h2xy = volume / paddle.linalg.norm(c_xy, axis=-1)
+    _h2xy = volume / decomp.norm(c_xy, axis=-1)
+    return paddle.stack([_h2yz, _h2zx, _h2xy], axis=1)
+
+
+# b_to_face_distance = paddle.vmap(
+#   _to_face_distance, in_dims=(0), out_dims=(0))
+
+
+def normalize_coord(
+    coord: paddle.Tensor,
+    cell: paddle.Tensor,
+) -> paddle.Tensor:
+    """Apply PBC according to the atomic coordinates.
+
+    Parameters
+    ----------
+    coord : paddle.Tensor
+        orignal coordinates of shape [*, na, 3].
+
+    Returns
+    -------
+    wrapped_coord: paddle.Tensor
+        wrapped coordinates of shape [*, na, 3].
+
+    """
+    icoord = phys2inter(coord, cell)
+    icoord = paddle.remainder(icoord, paddle.to_tensor(1.0))
+    return inter2phys(icoord, cell)
diff --git a/deepmd/pd/utils/serialization.py b/deepmd/pd/utils/serialization.py
new file mode 100644
index 0000000000..e33d7ea5d0
--- /dev/null
+++ b/deepmd/pd/utils/serialization.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+
+
+def serialize_from_file(model_file: str) -> dict:
+    """Serialize the model file to a dictionary.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be serialized.
+
+    Returns
+    -------
+    dict
+        The serialized model data.
+    """
+    if model_file.endswith(".pd"):
+        state_dict = paddle.load(model_file)
+        if "model" in state_dict:
+            state_dict = state_dict["model"]
+        model_def_script = state_dict["_extra_state"]["model_params"]
+        model = get_model(model_def_script)
+        modelwrapper = ModelWrapper(model)
+        modelwrapper.set_state_dict(state_dict)
+        model = modelwrapper.model["Default"]
+    else:
+        raise ValueError("Paddle backend only supports converting .pd file")
+
+    model_dict = model.serialize()
+    data = {
+        "backend": "Paddle",
+        "pt_version": paddle.version.commit,
+        "model": model_dict,
+        "model_def_script": model_def_script,
+        "@variables": {},
+    }
+    if model.get_min_nbor_dist() is not None:
+        data["@variables"]["min_nbor_dist"] = model.get_min_nbor_dist()
+    return data
+
+
+def deserialize_to_file(model_file: str, data: dict) -> None:
+    """Deserialize the dictionary to a model file.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be saved.
+    data : dict
+        The dictionary to be deserialized.
+    """
+    if not model_file.endswith(".pd"):
+        raise ValueError("Paddle backend only supports converting .pd file")
+    model = BaseModel.deserialize(data["model"])
+    if "min_nbor_dist" in data.get("@variables", {}):
+        model.min_nbor_dist = paddle.to_tensor(
+            float(data["@variables"]["min_nbor_dist"])
+        )
+    paddle.set_flags(
+        {
+            "FLAGS_save_cf_stack_op": 1,
+            "FLAGS_prim_enable_dynamic": 1,
+            "FLAGS_enable_pir_api": 1,
+        }
+    )
+    model = paddle.jit.to_static(model)
+    paddle.jit.save(model, model_file)
diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py
new file mode 100644
index 0000000000..3ecd695038
--- /dev/null
+++ b/deepmd/pd/utils/stat.py
@@ -0,0 +1,604 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from collections import (
+    defaultdict,
+)
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.pd.utils import (
+    AtomExcludeMask,
+)
+from deepmd.pd.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pd.utils.utils import (
+    dict_to_device,
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_atomic,
+    compute_stats_from_redu,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+def make_stat_input(datasets, dataloaders, nbatches):
+    """Pack data for statistics.
+
+    Args:
+    - dataset: A list of dataset to analyze.
+    - nbatches: Batch count for collecting stats.
+
+    Returns
+    -------
+    - a list of dicts, each of which contains data from a system
+    """
+    lst = []
+    log.info(f"Packing data for statistics from {len(datasets)} systems")
+    for i in range(len(datasets)):
+        sys_stat = {}
+
+        iterator = iter(dataloaders[i])
+        numb_batches = min(nbatches, len(dataloaders[i]))
+        for _ in range(numb_batches):
+            try:
+                stat_data = next(iterator)
+            except StopIteration:
+                iterator = iter(dataloaders[i])
+                stat_data = next(iterator)
+            for dd in stat_data:
+                if stat_data[dd] is None:
+                    sys_stat[dd] = None
+                elif isinstance(stat_data[dd], paddle.Tensor):
+                    if dd not in sys_stat:
+                        sys_stat[dd] = []
+                    sys_stat[dd].append(stat_data[dd])
+                elif isinstance(stat_data[dd], np.float32):
+                    sys_stat[dd] = stat_data[dd]
+                else:
+                    pass
+
+        for key in sys_stat:
+            if isinstance(sys_stat[key], np.float32):
+                pass
+            elif sys_stat[key] is None or sys_stat[key][0] is None:
+                sys_stat[key] = None
+            elif isinstance(stat_data[dd], paddle.Tensor):
+                sys_stat[key] = paddle.concat(sys_stat[key], axis=0)
+        dict_to_device(sys_stat)
+        lst.append(sys_stat)
+    return lst
+
+
+def _restore_from_file(
+    stat_file_path: DPPath,
+    keys: list[str] = ["energy"],
+) -> Optional[dict]:
+    if stat_file_path is None:
+        return None, None
+    stat_files = [stat_file_path / f"bias_atom_{kk}" for kk in keys]
+    if all(not (ii.is_file()) for ii in stat_files):
+        return None, None
+    stat_files = [stat_file_path / f"std_atom_{kk}" for kk in keys]
+    if all(not (ii.is_file()) for ii in stat_files):
+        return None, None
+
+    ret_bias = {}
+    ret_std = {}
+    for kk in keys:
+        fp = stat_file_path / f"bias_atom_{kk}"
+        # only read the key that exists
+        if fp.is_file():
+            ret_bias[kk] = fp.load_numpy()
+    for kk in keys:
+        fp = stat_file_path / f"std_atom_{kk}"
+        # only read the key that exists
+        if fp.is_file():
+            ret_std[kk] = fp.load_numpy()
+    return ret_bias, ret_std
+
+
+def _save_to_file(
+    stat_file_path: DPPath,
+    bias_out: dict,
+    std_out: dict,
+):
+    assert stat_file_path is not None
+    stat_file_path.mkdir(exist_ok=True, parents=True)
+    for kk, vv in bias_out.items():
+        fp = stat_file_path / f"bias_atom_{kk}"
+        fp.save_numpy(vv)
+    for kk, vv in std_out.items():
+        fp = stat_file_path / f"std_atom_{kk}"
+        fp.save_numpy(vv)
+
+
+def _post_process_stat(
+    out_bias,
+    out_std,
+):
+    """Post process the statistics.
+
+    For global statistics, we do not have the std for each type of atoms,
+    thus fake the output std by ones for all the types.
+
+    """
+    new_std = {}
+    for kk, vv in out_bias.items():
+        new_std[kk] = np.ones_like(vv)
+    return out_bias, new_std
+
+
+def _compute_model_predict(
+    sampled: Union[Callable[[], list[dict]], list[dict]],
+    keys: list[str],
+    model_forward: Callable[..., paddle.Tensor],
+):
+    auto_batch_size = AutoBatchSize()
+    model_predict = {kk: [] for kk in keys}
+    for system in sampled:
+        nframes = system["coord"].shape[0]
+        coord, atype, box, natoms = (
+            system["coord"],
+            system["atype"],
+            system["box"],
+            system["natoms"],
+        )
+        fparam = system.get("fparam", None)
+        aparam = system.get("aparam", None)
+
+        def model_forward_auto_batch_size(*args, **kwargs):
+            return auto_batch_size.execute_all(
+                model_forward,
+                nframes,
+                system["atype"].shape[-1],
+                *args,
+                **kwargs,
+            )
+
+        sample_predict = model_forward_auto_batch_size(
+            coord, atype, box, fparam=fparam, aparam=aparam
+        )
+        for kk in keys:
+            model_predict[kk].append(
+                to_numpy_array(
+                    sample_predict[kk]  # nf x nloc x odims
+                )
+            )
+    return model_predict
+
+
+def _make_preset_out_bias(
+    ntypes: int,
+    ibias: list[Optional[np.array]],
+) -> Optional[np.array]:
+    """Make preset out bias.
+
+    output:
+        a np array of shape [ntypes, *(odim0, odim1, ...)] is any item is not None
+        None if all items are None.
+    """
+    if len(ibias) != ntypes:
+        raise ValueError("the length of preset bias list should be ntypes")
+    if all(ii is None for ii in ibias):
+        return None
+    for refb in ibias:
+        if refb is not None:
+            break
+    refb = np.array(refb)
+    nbias = [
+        np.full_like(refb, np.nan, dtype=np.float64) if ii is None else ii
+        for ii in ibias
+    ]
+    return np.array(nbias)
+
+
+def _fill_stat_with_global(
+    atomic_stat: Union[np.ndarray, None],
+    global_stat: np.ndarray,
+):
+    """This function is used to fill atomic stat with global stat.
+
+    Parameters
+    ----------
+    atomic_stat : Union[np.ndarray, None]
+        The atomic stat.
+    global_stat : np.ndarray
+        The global stat.
+    if the atomic stat is None, use global stat.
+    if the atomic stat is not None, but has nan values (missing atypes), fill with global stat.
+    """
+    if atomic_stat is None:
+        return global_stat
+    else:
+        atomic_stat = atomic_stat.reshape(global_stat.shape)
+        return np.nan_to_num(
+            np.where(
+                np.isnan(atomic_stat) & ~np.isnan(global_stat), global_stat, atomic_stat
+            )
+        )
+
+
+def compute_output_stats(
+    merged: Union[Callable[[], list[dict]], list[dict]],
+    ntypes: int,
+    keys: Union[str, list[str]] = ["energy"],
+    stat_file_path: Optional[DPPath] = None,
+    rcond: Optional[float] = None,
+    preset_bias: Optional[dict[str, list[Optional[np.ndarray]]]] = None,
+    model_forward: Optional[Callable[..., paddle.Tensor]] = None,
+    atomic_output: Optional[FittingOutputDef] = None,
+):
+    """
+    Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+    Parameters
+    ----------
+    merged : Union[Callable[[], List[dict]], List[dict]]
+        - List[dict]: A list of data samples from various data systems.
+            Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+            originating from the `i`-th data system.
+        - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            only when needed. Since the sampling process can be slow and memory-intensive,
+            the lazy function helps by only sampling once.
+    ntypes : int
+        The number of atom types.
+    stat_file_path : DPPath, optional
+        The path to the stat file.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    preset_bias : Dict[str, List[Optional[paddle.Tensor]]], optional
+        Specifying atomic energy contribution in vacuum. Given by key:value pairs.
+        The value is a list specifying the bias. the elements can be None or np.array of output shape.
+        For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
+        The `set_davg_zero` key in the descrptor should be set.
+    model_forward : Callable[..., paddle.Tensor], optional
+        The wrapped forward function of atomic model.
+        If not None, the model will be utilized to generate the original energy prediction,
+        which will be subtracted from the energy label of the data.
+        The difference will then be used to calculate the delta complement energy bias for each type.
+    atomic_output : FittingOutputDef, optional
+        The output of atomic model.
+    """
+    # try to restore the bias from stat file
+    bias_atom_e, std_atom_e = _restore_from_file(stat_file_path, keys)
+
+    # failed to restore the bias from stat file. compute
+    if bias_atom_e is None:
+        # only get data once, sampled is a list of dict[str, paddle.Tensor]
+        sampled = merged() if callable(merged) else merged
+        if model_forward is not None:
+            model_pred = _compute_model_predict(sampled, keys, model_forward)
+        else:
+            model_pred = None
+
+        # remove the keys that are not in the sample
+        keys = [keys] if isinstance(keys, str) else keys
+        assert isinstance(keys, list)
+        new_keys = [
+            ii
+            for ii in keys
+            if (ii in sampled[0].keys()) or ("atom_" + ii in sampled[0].keys())
+        ]
+        del keys
+        keys = new_keys
+        # split system based on label
+        atomic_sampled_idx = defaultdict(list)
+        global_sampled_idx = defaultdict(list)
+
+        for kk in keys:
+            for idx, system in enumerate(sampled):
+                if (("find_atom_" + kk) in system) and (
+                    system["find_atom_" + kk] > 0.0
+                ):
+                    atomic_sampled_idx[kk].append(idx)
+                elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
+                    global_sampled_idx[kk].append(idx)
+
+                else:
+                    continue
+
+        # use index to gather model predictions for the corresponding systems.
+
+        model_pred_g = (
+            {
+                kk: [
+                    np.sum(vv[idx], axis=1) for idx in global_sampled_idx[kk]
+                ]  # sum atomic dim
+                for kk, vv in model_pred.items()
+            }
+            if model_pred
+            else None
+        )
+        model_pred_a = (
+            {
+                kk: [vv[idx] for idx in atomic_sampled_idx[kk]]
+                for kk, vv in model_pred.items()
+            }
+            if model_pred
+            else None
+        )
+
+        # concat all frames within those systems
+        model_pred_g = (
+            {
+                kk: np.concatenate(model_pred_g[kk])
+                for kk in model_pred_g.keys()
+                if len(model_pred_g[kk]) > 0
+            }
+            if model_pred
+            else None
+        )
+        model_pred_a = (
+            {
+                kk: np.concatenate(model_pred_a[kk])
+                for kk in model_pred_a.keys()
+                if len(model_pred_a[kk]) > 0
+            }
+            if model_pred
+            else None
+        )
+
+        # compute stat
+        bias_atom_g, std_atom_g = compute_output_stats_global(
+            sampled,
+            ntypes,
+            keys,
+            rcond,
+            preset_bias,
+            model_pred_g,
+            atomic_output,
+        )
+        bias_atom_a, std_atom_a = compute_output_stats_atomic(
+            sampled,
+            ntypes,
+            keys,
+            model_pred_a,
+        )
+
+        # merge global/atomic bias
+        bias_atom_e, std_atom_e = {}, {}
+        for kk in keys:
+            # use atomic bias whenever available
+            if kk in bias_atom_a:
+                bias_atom_e[kk] = bias_atom_a[kk]
+                std_atom_e[kk] = std_atom_a[kk]
+            else:
+                bias_atom_e[kk] = None
+                std_atom_e[kk] = None
+            # use global bias to fill missing atomic bias
+            if kk in bias_atom_g:
+                bias_atom_e[kk] = _fill_stat_with_global(
+                    bias_atom_e[kk], bias_atom_g[kk]
+                )
+                std_atom_e[kk] = _fill_stat_with_global(std_atom_e[kk], std_atom_g[kk])
+            if (bias_atom_e[kk] is None) or (std_atom_e[kk] is None):
+                raise RuntimeError("Fail to compute stat.")
+
+        if stat_file_path is not None:
+            _save_to_file(stat_file_path, bias_atom_e, std_atom_e)
+
+    bias_atom_e = {kk: to_paddle_tensor(vv) for kk, vv in bias_atom_e.items()}
+    std_atom_e = {kk: to_paddle_tensor(vv) for kk, vv in std_atom_e.items()}
+    return bias_atom_e, std_atom_e
+
+
+def compute_output_stats_global(
+    sampled: list[dict],
+    ntypes: int,
+    keys: list[str],
+    rcond: Optional[float] = None,
+    preset_bias: Optional[dict[str, list[Optional[paddle.Tensor]]]] = None,
+    model_pred: Optional[dict[str, np.ndarray]] = None,
+    atomic_output: Optional[FittingOutputDef] = None,
+):
+    """This function only handle stat computation from reduced global labels."""
+    # return directly if model predict is empty for global
+    if model_pred == {}:
+        return {}, {}
+
+    # get label dict from sample; for each key, only picking the system with global labels.
+    outputs = {
+        kk: [
+            system[kk]
+            for system in sampled
+            if kk in system and system.get(f"find_{kk}", 0) > 0
+        ]
+        for kk in keys
+    }
+
+    data_mixed_type = "real_natoms_vec" in sampled[0]
+    natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
+    for system in sampled:
+        if "atom_exclude_types" in system:
+            type_mask = AtomExcludeMask(
+                ntypes, system["atom_exclude_types"]
+            ).get_type_mask()
+            system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)
+
+    input_natoms = {
+        kk: [
+            item[natoms_key]
+            for item in sampled
+            if kk in item and item.get(f"find_{kk}", 0) > 0
+        ]
+        for kk in keys
+    }
+    # shape: (nframes, ndim)
+    merged_output = {
+        kk: to_numpy_array(paddle.concat(outputs[kk]))
+        for kk in keys
+        if len(outputs[kk]) > 0
+    }
+    # shape: (nframes, ntypes)
+
+    merged_natoms = {
+        kk: to_numpy_array(paddle.concat(input_natoms[kk])[:, 2:])
+        for kk in keys
+        if len(input_natoms[kk]) > 0
+    }
+    nf = {kk: merged_natoms[kk].shape[0] for kk in keys if kk in merged_natoms}
+    if preset_bias is not None:
+        assigned_atom_ener = {
+            kk: _make_preset_out_bias(ntypes, preset_bias[kk])
+            if kk in preset_bias.keys()
+            else None
+            for kk in keys
+        }
+    else:
+        assigned_atom_ener = {kk: None for kk in keys}
+
+    if model_pred is None:
+        stats_input = merged_output
+    else:
+        # subtract the model bias and output the delta bias
+
+        stats_input = {
+            kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output
+        }
+
+    bias_atom_e = {}
+    std_atom_e = {}
+    for kk in keys:
+        if kk in stats_input:
+            if atomic_output is not None and atomic_output.get_data()[kk].intensive:
+                task_dim = stats_input[kk].shape[1]
+                assert merged_natoms[kk].shape == (nf[kk], ntypes)
+                stats_input[kk] = (
+                    merged_natoms[kk].sum(axis=1).reshape([-1, 1]) * stats_input[kk]
+                )
+                assert stats_input[kk].shape == (nf[kk], task_dim)
+            bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu(
+                stats_input[kk],
+                merged_natoms[kk],
+                assigned_bias=assigned_atom_ener[kk],
+                rcond=rcond,
+            )
+        else:
+            # this key does not have global labels, skip it.
+            continue
+    bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e)
+
+    # unbias_e is only used for print rmse
+
+    if model_pred is None:
+        unbias_e = {
+            kk: merged_natoms[kk] @ bias_atom_e[kk].reshape([ntypes, -1])
+            for kk in bias_atom_e.keys()
+        }
+    else:
+        unbias_e = {
+            kk: model_pred[kk].reshape([nf[kk], -1])
+            + merged_natoms[kk] @ bias_atom_e[kk].reshape([ntypes, -1])
+            for kk in bias_atom_e.keys()
+        }
+    atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()}
+
+    def rmse(x):
+        return np.sqrt(np.mean(np.square(x)))
+
+    for kk in bias_atom_e.keys():
+        rmse_ae = rmse(
+            (
+                unbias_e[kk].reshape([nf[kk], -1]).astype(merged_output[kk].dtype)
+                - merged_output[kk].reshape([nf[kk], -1])
+            )
+            / atom_numbs[kk][:, None].astype(merged_output[kk].dtype)
+        )
+        log.info(
+            f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}."
+        )
+    return bias_atom_e, std_atom_e
+
+
+def compute_output_stats_atomic(
+    sampled: list[dict],
+    ntypes: int,
+    keys: list[str],
+    model_pred: Optional[dict[str, np.ndarray]] = None,
+):
+    # get label dict from sample; for each key, only picking the system with atomic labels.
+    outputs = {
+        kk: [
+            system["atom_" + kk]
+            for system in sampled
+            if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
+        ]
+        for kk in keys
+    }
+    natoms = {
+        kk: [
+            system["atype"]
+            for system in sampled
+            if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
+        ]
+        for kk in keys
+    }
+    # shape: (nframes, nloc, ndim)
+    merged_output = {
+        kk: to_numpy_array(paddle.concat(outputs[kk]))
+        for kk in keys
+        if len(outputs[kk]) > 0
+    }
+    merged_natoms = {
+        kk: to_numpy_array(paddle.concat(natoms[kk]))
+        for kk in keys
+        if len(natoms[kk]) > 0
+    }
+    # reshape merged data to [nf, nloc, ndim]
+    merged_output = {
+        kk: merged_output[kk].reshape((*merged_natoms[kk].shape, -1))
+        for kk in merged_output
+    }
+
+    if model_pred is None:
+        stats_input = merged_output
+    else:
+        # subtract the model bias and output the delta bias
+        stats_input = {
+            kk: merged_output[kk] - model_pred[kk].reshape(merged_output[kk].shape)
+            for kk in keys
+            if kk in merged_output
+        }
+
+    bias_atom_e = {}
+    std_atom_e = {}
+
+    for kk in keys:
+        if kk in stats_input:
+            bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_atomic(
+                stats_input[kk],
+                merged_natoms[kk],
+            )
+            # correction for missing types
+            missing_types = ntypes - merged_natoms[kk].max() - 1
+            if missing_types > 0:
+                assert (
+                    bias_atom_e[kk].dtype is std_atom_e[kk].dtype
+                ), "bias and std should be of the same dtypes"
+                nan_padding = np.empty(
+                    (missing_types, bias_atom_e[kk].shape[1]),
+                    dtype=bias_atom_e[kk].dtype,
+                )
+                nan_padding.fill(np.nan)
+                bias_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding], axis=0)
+                std_atom_e[kk] = np.concatenate([std_atom_e[kk], nan_padding], axis=0)
+        else:
+            # this key does not have atomic labels, skip it.
+            continue
+    return bias_atom_e, std_atom_e
diff --git a/deepmd/pd/utils/update_sel.py b/deepmd/pd/utils/update_sel.py
new file mode 100644
index 0000000000..32b8d66c73
--- /dev/null
+++ b/deepmd/pd/utils/update_sel.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+from deepmd.pd.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.update_sel import (
+    BaseUpdateSel,
+)
+
+
+class UpdateSel(BaseUpdateSel):
+    @property
+    def neighbor_stat(self) -> type[NeighborStat]:
+        return NeighborStat
diff --git a/deepmd/pd/utils/utils.py b/deepmd/pd/utils/utils.py
new file mode 100644
index 0000000000..b38ad9e887
--- /dev/null
+++ b/deepmd/pd/utils/utils.py
@@ -0,0 +1,176 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from __future__ import (
+    annotations,
+)
+
+from typing import (
+    overload,
+)
+
+import ml_dtypes
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+from deepmd.dpmodel.common import PRECISION_DICT as NP_PRECISION_DICT
+from deepmd.pd.model.network.init import (
+    PaddleGenerator,
+)
+
+from .env import (
+    DEVICE,
+)
+from .env import PRECISION_DICT as PD_PRECISION_DICT
+
+
+class ActivationFn(paddle.nn.Layer):
+    def __init__(self, activation: str | None):
+        super().__init__()
+        self.activation: str = activation if activation is not None else "linear"
+
+    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
+        """Returns the tensor after applying activation function corresponding to `activation`."""
+        if self.activation.lower() == "relu":
+            return F.relu(x)
+        elif self.activation.lower() == "gelu" or self.activation.lower() == "gelu_tf":
+            return F.gelu(x, approximate=True)
+        elif self.activation.lower() == "tanh":
+            return paddle.tanh(x)
+        elif self.activation.lower() == "relu6":
+            return F.relu6(x)
+        elif self.activation.lower() == "softplus":
+            return F.softplus(x)
+        elif self.activation.lower() == "sigmoid":
+            return F.sigmoid(x)
+        elif self.activation.lower() == "linear" or self.activation.lower() == "none":
+            return x
+        else:
+            raise RuntimeError(f"activation function {self.activation} not supported")
+
+
+@overload
+def to_numpy_array(xx: paddle.Tensor) -> np.ndarray: ...
+
+
+@overload
+def to_numpy_array(xx: None) -> None: ...
+
+
+def to_numpy_array(
+    xx,
+):
+    if xx is None:
+        return None
+    assert xx is not None
+    # Create a reverse mapping of PD_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in PD_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(xx.dtype, None)
+    prec = NP_PRECISION_DICT.get(prec, np.float64)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    if isinstance(xx, np.ndarray):
+        return xx.astype(prec)
+    if xx.dtype == paddle.bfloat16:
+        xx = xx.astype(paddle.get_default_dtype())
+    return xx.numpy().astype(prec)
+
+
+@overload
+def to_paddle_tensor(xx: np.ndarray) -> paddle.Tensor: ...
+
+
+@overload
+def to_paddle_tensor(xx: None) -> None: ...
+
+
+def to_paddle_tensor(
+    xx,
+):
+    if xx is None:
+        return None
+    assert xx is not None
+    if not isinstance(xx, np.ndarray):
+        return xx
+    # Create a reverse mapping of NP_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(xx.dtype.type, None)
+    prec = PD_PRECISION_DICT.get(prec, None)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    if xx.dtype == ml_dtypes.bfloat16:
+        xx = xx.astype(np.float32)
+    return paddle.to_tensor(xx, dtype=prec, place=DEVICE)
+
+
+def dict_to_device(sample_dict):
+    for key in sample_dict:
+        if isinstance(sample_dict[key], list):
+            sample_dict[key] = [item.to(DEVICE) for item in sample_dict[key]]
+        if isinstance(sample_dict[key], np.float32):
+            sample_dict[key] = (
+                paddle.ones(1, dtype=paddle.float32).to(device=DEVICE)
+                * sample_dict[key]
+            )
+        else:
+            if sample_dict[key] is not None:
+                sample_dict[key] = sample_dict[key].to(DEVICE)
+
+
+# https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L58-L63
+INIT_A = 0x43B0D7E5
+MULT_A = 0x931E8875
+MIX_MULT_L = 0xCA01F9DD
+MIX_MULT_R = 0x4973F715
+XSHIFT = 16
+
+
+def hashmix(value: int, hash_const: list[int]):
+    value ^= INIT_A
+    hash_const[0] *= MULT_A
+    value *= INIT_A
+    # prevent overflow
+    hash_const[0] &= 0xFFFF_FFFF_FFFF_FFFF
+    value &= 0xFFFF_FFFF_FFFF_FFFF
+    value ^= value >> XSHIFT
+    return value
+
+
+def mix(x: int, y: int):
+    result = MIX_MULT_L * x - MIX_MULT_R * y
+    # prevent overflow
+    result &= 0xFFFF_FFFF_FFFF_FFFF
+    result ^= result >> XSHIFT
+    return result
+
+
+def mix_entropy(entropy_array: list[int]) -> int:
+    # https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L341-L374
+    hash_const = [INIT_A]
+    mixer = hashmix(entropy_array[0], hash_const)
+    for i_src in range(1, len(entropy_array)):
+        mixer = mix(mixer, hashmix(entropy_array[i_src], hash_const))
+    return mixer
+
+
+def get_generator(
+    seed: int | list[int] | None = None,
+) -> PaddleGenerator | None:
+    if seed is not None:
+        if isinstance(seed, list):
+            seed = mix_entropy(seed)
+        if DEVICE == "cpu":
+            generator = paddle.framework.core.default_cpu_generator()
+        elif DEVICE == "gpu":
+            generator = paddle.framework.core.default_cuda_generator(0)
+        elif DEVICE.startswith("gpu:"):
+            generator = paddle.framework.core.default_cuda_generator(
+                int(DEVICE.split("gpu:")[1])
+            )
+        else:
+            raise ValueError("DEVICE should be cpu or gpu or gpu:x")
+        generator.manual_seed(seed)
+        return generator
+    else:
+        return None
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 259fe93bdb..edfadc2e7e 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -180,7 +180,11 @@ def execute_with_batch_size(
                 *[
                     (
                         vv[start_index:end_index, ...]
-                        if array_api_compat.is_array_api_obj(vv) and vv.ndim > 1
+                        if (
+                            array_api_compat.is_array_api_obj(vv)
+                            or str(vv.__class__) == "<class 'paddle.Tensor'>"
+                        )
+                        and vv.ndim > 1
                         else vv
                     )
                     for vv in args
@@ -188,7 +192,11 @@ def execute_with_batch_size(
                 **{
                     kk: (
                         vv[start_index:end_index, ...]
-                        if array_api_compat.is_array_api_obj(vv) and vv.ndim > 1
+                        if (
+                            array_api_compat.is_array_api_obj(vv)
+                            or str(vv.__class__) == "<class 'paddle.Tensor'>"
+                        )
+                        and vv.ndim > 1
                         else vv
                     )
                     for kk, vv in kwargs.items()
@@ -227,6 +235,14 @@ def concate_result(r):
             if array_api_compat.is_array_api_obj(r[0]):
                 xp = array_api_compat.array_namespace(r[0])
                 ret = xp.concat(r, axis=0)
+            elif str(r[0].__class__) == "<class 'paddle.Tensor'>":
+                try:
+                    import paddle
+                except ModuleNotFoundError as e:
+                    raise ModuleNotFoundError(
+                        "The 'paddlepaddle' is required but not installed."
+                    ) from e
+                ret = paddle.concat(r, axis=0)
             else:
                 raise RuntimeError(f"Unexpected result type {type(r[0])}")
             return ret
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 7d58d65578..eda91fe9a0 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -247,6 +247,21 @@ def get_item_torch(self, index: int) -> dict:
         frame["fid"] = index
         return frame
 
+    def get_item_paddle(self, index: int) -> dict:
+        """Get a single frame data . The frame is picked from the data system by index. The index is coded across all the sets.
+
+        Parameters
+        ----------
+        index
+            index of the frame
+        """
+        i = bisect.bisect_right(self.prefix_sum, index)
+        frames = self._load_set(self.dirs[i])
+        frame = self._get_subdata(frames, index - self.prefix_sum[i])
+        frame = self.reformat_data_torch(frame)
+        frame["fid"] = index
+        return frame
+
     def get_batch(self, batch_size: int) -> dict:
         """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
 
@@ -478,7 +493,7 @@ def reformat_data_torch(self, data):
                 pass
             else:
                 if kk in data and self.data_dict[kk]["atomic"]:
-                    data[kk] = data[kk].reshape(-1, self.data_dict[kk]["ndof"])
+                    data[kk] = data[kk].reshape((-1, self.data_dict[kk]["ndof"]))
         data["atype"] = data["type"]
         if not self.pbc:
             data["box"] = None
diff --git a/doc/backend.md b/doc/backend.md
index cf99eea9cb..cce3a35164 100644
--- a/doc/backend.md
+++ b/doc/backend.md
@@ -32,6 +32,13 @@ While `.pth` and `.pt` are the same in the PyTorch package, they have different
 Both `.xlo` and `.jax` are customized format extensions defined in DeePMD-kit, since JAX has no convention for file extensions.
 Currently, this backend is developed actively, and has no support for training and the C++ interface.
 
+### Paddle {{ paddle_icon }}
+
+- Model filename extension: `.json` and `.pdiparams`
+- Checkpoint filename extension: `.pd`
+
+[Paddle](https://www.paddlepaddle.org.cn/) 3.0 or above is required.
+
 ### DP {{ dpmodel_icon }}
 
 :::{note}
@@ -54,7 +61,7 @@ NumPy 1.21 or above is required.
 
 ### Training
 
-When training and freezing a model, you can use `dp --tf` or `dp --pt` in the command line to switch the backend.
+When training and freezing a model, you can use `dp --tf`, `dp --pt` or `dp --pd` in the command line to switch the backend.
 
 ### Inference
 
@@ -66,5 +73,5 @@ For example, when the model filename ends with `.pb` (the ProtoBuf file), DeePMD
 If a model is supported by two backends, one can use [`dp convert-backend`](./cli.rst) to convert the model file between these two backends.
 
 :::{warning}
-Currently, only the `se_e2_a` model fully supports the backend conversion between TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }}.
+Currently, only the `se_e2_a` model fully supports the backend conversion between TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }} and Paddle {{ paddle_icon }}.
 :::
diff --git a/doc/freeze/freeze.md b/doc/freeze/freeze.md
index c3800917a6..1455023dcd 100644
--- a/doc/freeze/freeze.md
+++ b/doc/freeze/freeze.md
@@ -32,3 +32,23 @@ $ dp --pt freeze -o model_branch1.pth --head CHOSEN_BRANCH
 ```
 
 The output model is called `model_branch1.pth`, which is the specifically frozen model with the `CHOSEN_BRANCH` head.
+
+:::
+
+:::{tab-item} Paddle {{ paddle_icon }}
+
+```bash
+$ dp --pd freeze -o model
+DEEPMD INFO    Paddle inference model has been exported to: model.json and model.pdiparams
+```
+
+in the folder where the model is trained. The output model is called `model.json` and `model.pdiparams`.
+
+In [multi-task mode](../train/multi-task-training-pt.md), you need to choose one available heads (e.g. `CHOSEN_BRANCH`) by `--head`
+to specify which model branch you want to freeze:
+
+```bash
+$ dp --pd freeze -o model_branch1 --head CHOSEN_BRANCH
+```
+
+The output model is called `model_branch1.json`, which is the specifically frozen model with the `CHOSEN_BRANCH` head.
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index 4a0a104b7e..e13005f912 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -93,6 +93,21 @@ One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to instal
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+To install Paddle, run
+
+```sh
+# cu123
+python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/
+# cu118
+python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/
+# cpu
+python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
+```
+
+:::
+
 ::::
 
 It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by
@@ -119,7 +134,7 @@ One should remember to activate the virtual environment every time he/she uses D
 
 Check the compiler version on your machine
 
-```
+```bash
 gcc --version
 ```
 
@@ -141,6 +156,12 @@ Note that PyTorch may have specific requirements for the compiler version to sup
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+You can set the environment variable `export DP_ENABLE_PADDLE=1` to enable customized C++ OPs in the Paddle backend.
+
+:::
+
 ::::
 
 Execute
@@ -188,6 +209,13 @@ The path to the ROCM toolkit directory. If `ROCM_ROOT` is not set, it will look
 {{ pytorch_icon }} Enable customized C++ OPs for the PyTorch backend. PyTorch can still run without customized C++ OPs, but features will be limited.
 :::
 
+:::{envvar} DP_ENABLE_PADDLE
+
+**Choices**: `0`, `1`; **Default**: `0`
+
+{{ paddle_icon }} Enable customized C++ OPs for the Paddle backend. Paddle can still run without customized C++ OPs, but features will be limited.
+:::
+
 :::{envvar} TENSORFLOW_ROOT
 
 **Type**: Path; **Default**: Detected automatically
@@ -202,6 +230,13 @@ The path to the ROCM toolkit directory. If `ROCM_ROOT` is not set, it will look
 {{ pytorch_icon }} The path to PyTorch Python library. If not given, by default, the installer only finds PyTorch under the user site-package directory (`site.getusersitepackages()`) or the system site-package directory (`sysconfig.get_path("purelib")`) due to the limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest PyTorch (or the environment variable `PYTORCH_VERSION` if given) from PyPI will be built against.
 :::
 
+:::{envvar} PADDLE_INFERENCE_DIR
+
+**Type**: Path; **Default**: None
+
+{{ paddle_icon }} The path to Paddle inference library, e.g. `/path/to/paddle_inference_install_dir`. If `DP_ENABLE_PADDLE` is enabled, it needs to be specified manually; otherwise, installation will fail.
+:::
+
 :::{envvar} DP_ENABLE_NATIVE_OPTIMIZATION
 
 **Choices**: `0`, `1`; **Default**: `0`
@@ -229,7 +264,7 @@ Other [CMake environment variables](https://cmake.org/cmake/help/latest/manual/c
 
 To test the installation, one should first jump out of the source directory
 
-```
+```bash
 cd /some/other/workspace
 ```
 
@@ -314,6 +349,11 @@ You can also download libtorch prebuilt library from the [PyTorch website](https
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+If you want to use C++ interface of Paddle, you need to compile the Paddle inference library(C++ interface) manually from the [linux-compile-by-make](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/compile/linux-compile-by-make.html), then use the `.so` and `.a` files in `Paddle/build/paddle_inference_install_dir/`.
+:::
+
 ::::
 
 ### Install DeePMD-kit's C++ interface
@@ -367,6 +407,16 @@ cmake -DENABLE_PYTORCH=TRUE -DUSE_PT_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$de
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+I assume you have compiled the Paddle inference library(C++ interface) to `$PADDLE_INFERENCE_DIR`, then execute CMake
+
+```bash
+cmake -DENABLE_PADDLE=ON -DPADDLE_INFERENCE_DIR=$PADDLE_INFERENCE_DIR -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
+```
+
+:::
+
 ::::
 
 One may add the following CMake variables to `cmake` using the [`-D <var>=<value>` option](https://cmake.org/cmake/help/latest/manual/cmake.1.html#cmdoption-cmake-D):
@@ -387,6 +437,14 @@ One may add the following CMake variables to `cmake` using the [`-D <var>=<value
 
 :::
 
+:::{cmake:variable} ENABLE_PADDLE
+
+**Type**: `BOOL` (`ON`/`OFF`), Default: `OFF`
+
+{{ paddle_icon }} Whether building the Paddle backend.
+
+:::
+
 :::{cmake:variable} TENSORFLOW_ROOT
 
 **Type**: `PATH`
@@ -395,6 +453,14 @@ One may add the following CMake variables to `cmake` using the [`-D <var>=<value
 
 :::
 
+:::{cmake:variable} PADDLE_INFERENCE_DIR
+
+**Type**: `PATH`
+
+{{ paddle_icon }} The Path to Paddle's C++ inference directory, such as `/path/to/paddle_inference_install_dir` or `/path/to/paddle_inference`.
+
+:::
+
 :::{cmake:variable} CMAKE_INSTALL_PREFIX
 
 **Type**: `PATH`
diff --git a/doc/model/sel.md b/doc/model/sel.md
index babea1d463..5b85318dd9 100644
--- a/doc/model/sel.md
+++ b/doc/model/sel.md
@@ -32,6 +32,14 @@ dp --jax neighbor-stat -s data -r 6.0 -t O H
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+```sh
+dp --pd neighbor-stat -s data -r 6.0 -t O H
+```
+
+:::
+
 ::::
 
 where `data` is the directory of data, `6.0` is the cutoff radius, and `O` and `H` is the type map. The program will give the `max_nbor_size`. For example, `max_nbor_size` of the water example is `[38, 72]`, meaning an atom may have 38 O neighbors and 72 H neighbors in the training data.
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index 484564b14f..f852ec6b43 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -1,7 +1,7 @@
 # Fit energy {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, Paddle {{ paddle_icon }}, DP {{ dpmodel_icon }}
 :::
 
 In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.json` as an example of the input file.
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index d4a4510a31..b204ce43dc 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -1,7 +1,7 @@
 # Descriptor `"se_e2_a"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, Paddle {{ paddle_icon }}, DP {{ dpmodel_icon }}
 :::
 
 The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The `e2` stands for the embedding with two-atoms information. This descriptor was described in detail in [the DeepPot-SE paper](https://arxiv.org/abs/1805.09003).
diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md
index e50109318d..c6c534875c 100644
--- a/doc/train/finetuning.md
+++ b/doc/train/finetuning.md
@@ -1,7 +1,7 @@
-# Finetune the pre-trained model {{ tensorflow_icon }} {{ pytorch_icon }}
+# Finetune the pre-trained model {{ tensorflow_icon }} {{ pytorch_icon }} {{ paddle_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, Paddle {{ paddle_icon }}
 :::
 
 Pretraining-and-finetuning is a widely used approach in other fields such as Computer Vision (CV) or Natural Language Processing (NLP)
@@ -196,3 +196,69 @@ This will initiate multitask fine-tuning, where for branches `PRE_DATA1` and `PR
 it is akin to continuing training in `init-model` mode, whereas for `DOWNSTREAM_DATA`,
 fine-tuning will be based on the fitting net from `PRE_DATA1`.
 You can set `model_prob` for each dataset just the same as that in normal multitask training.
+
+## Paddle Implementation {{ paddle_icon }}
+
+In Paddle version, we have introduced an updated, more adaptable approach to fine-tuning. This methodology encompasses two primary variations:
+
+### Single-task fine-tuning
+
+#### Fine-tuning from a single-task pre-trained model
+
+By saying "single-task pre-trained", we refer to a model pre-trained on one single dataset.
+This fine-tuning method is similar to the fine-tune approach supported by TensorFlow.
+It utilizes a single-task pre-trained model (`pretrained.pt`) and modifies the energy bias within its fitting net before continuing with training.
+The command for this operation is:
+
+```bash
+$ dp --pd train input.json --finetune pretrained.pd
+```
+
+In this case, it is important to note that the fitting net weights, except the energy bias, will be automatically set to those in the pre-trained model. This default setting is consistent with the implementations in TensorFlow.
+If you wish to conduct fine-tuning using a randomly initialized fitting net in this scenario, you can manually adjust the `--model-branch` parameter to "RANDOM":
+
+```bash
+$ dp --pd train input.json --finetune pretrained.pd --model-branch RANDOM
+```
+
+The model section in input.json **must be the same as that in the pretrained model**.
+If you do not know the model params in the pretrained model, you can add `--use-pretrain-script` in the fine-tuning command:
+
+```bash
+$ dp --pd train input.json --finetune pretrained.pd --use-pretrain-script
+```
+
+The model section will be overwritten (except the `type_map` subsection) by that in the pretrained model and then the input.json can be simplified as follows:
+
+```json
+    "model": {
+        "type_map":     ["O", "H"],
+        "descriptor" :  {},
+        "fitting_net" : {}
+    }
+```
+
+#### Fine-tuning from a multi-task pre-trained model
+
+Additionally, within the Paddle implementation and leveraging the flexibility offered by the framework and the multi-task training process proposed in DPA2 [paper](https://arxiv.org/abs/2312.15492),
+we also support more general multitask pre-trained models, which includes multiple datasets for pre-training. These pre-training datasets share a common descriptor while maintaining their individual fitting nets,
+as detailed in the paper above.
+
+For fine-tuning using this multitask pre-trained model (`multitask_pretrained.pd`),
+one can select a specific branch (e.g., `CHOOSEN_BRANCH`) included in `multitask_pretrained.pd` for fine-tuning with the following command:
+
+```bash
+$ dp --pd train input.json --finetune multitask_pretrained.pd --model-branch CHOOSEN_BRANCH
+```
+
+:::{note}
+One can check the available model branches in multi-task pre-trained model by refering to the documentation of the pre-trained model or by using the following command:
+
+```bash
+$ dp --pd show multitask_pretrained.pd model-branch
+```
+
+:::
+
+This command will start fine-tuning based on the pre-trained model's descriptor and the selected branch's fitting net.
+If --model-branch is not set or set to "RANDOM", a randomly initialized fitting net will be used.
diff --git a/doc/train/parallel-training.md b/doc/train/parallel-training.md
index 9ea92b4751..2be3cf3d3f 100644
--- a/doc/train/parallel-training.md
+++ b/doc/train/parallel-training.md
@@ -1,7 +1,7 @@
-# Parallel training {{ tensorflow_icon }} {{ pytorch_icon }}
+# Parallel training {{ tensorflow_icon }} {{ pytorch_icon }} {{ paddle_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, Paddle {{ paddle_icon }}
 :::
 
 ## TensorFlow Implementation {{ tensorflow_icon }}
@@ -187,3 +187,103 @@ torchrun --rdzv_endpoint=node0:12321 --nnodes=2 --nproc_per_node=4 --node_rank=1
 > **Note** for developers: `torchrun` by default passes settings as environment variables [(list here)](https://pytorch.org/docs/stable/elastic/run.html#environment-variables).
 
 > To check forward, backward, and communication time, please set env var `TORCH_CPP_LOG_LEVEL=INFO TORCH_DISTRIBUTED_DEBUG=DETAIL`. More details can be found [here](https://pytorch.org/docs/stable/distributed.html#logging).
+
+## Paddle Implementation {{ paddle_icon }}
+
+Currently, parallel training in paddle version is implemented in the form of Paddle Distributed Data Parallelism [DDP](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/06_distributed_training/cluster_quick_start_collective_cn.html).
+DeePMD-kit will decide whether to launch the training in parallel (distributed) mode or in serial mode depending on your execution command.
+
+### Dataloader and Dataset
+
+One of the major differences between two backends during training is that the Paddle version employs a multi-threaded data loading utility [DataLoader](https://paddle.org/docs/stable/data.html).
+We utilize the Paddle framework and have designed and implemented a multiprocessing data processing and loading system called DpLoaderSet based on paddle DataLoader and Dataset.
+
+First, we establish a DeepmdData class for each system, which is consistent with the TensorFlow version in this level. Then, we create a dataloader for each system, resulting in the same number of dataloaders as the number of systems. Next, we create a dataset for the dataloaders obtained in the previous step. This allows us to query the data for each system through this dataset, while the iteration pointers for each system are maintained by their respective dataloaders. Finally, a dataloader is created for the outermost dataset.
+
+We achieve custom sampling methods using a weighted sampler. The length of the sampler is set to total_batch_num \* num_workers.The parameter "num_workers" defines the number of threads involved in multi-threaded loading, which can be modified by setting the environment variable NUM_WORKERS (default: min(8, ncpus)).
+
+> **Note** The underlying dataloader will use a distributed sampler to ensure that each GPU receives batches with different content in parallel mode, which will use sequential sampler in serial mode. In the TensorFlow version, Horovod shuffles the dataset using different random seeds for the same purpose..
+
+```mermaid
+flowchart LR
+
+    subgraph systems
+        subgraph system1
+            direction LR
+            frame1[frame 1]
+            frame2[frame 2]
+        end
+
+        subgraph system2
+            direction LR
+            frame3[frame 3]
+            frame4[frame 4]
+            frame5[frame 5]
+        end
+    end
+
+    subgraph dataset
+        dataset1[dataset 1]
+        dataset2[dataset 2]
+    end
+    system1 -- frames --> dataset1
+    system2 --> dataset2
+
+    subgraph distribted sampler
+        ds1[distributed sampler 1]
+        ds2[distributed sampler 2]
+    end
+    dataset1 --> ds1
+    dataset2 --> ds2
+
+    subgraph dataloader
+        dataloader1[dataloader 1]
+        dataloader2[dataloader 2]
+    end
+    ds1 -- mini batch --> dataloader1
+    ds2 --> dataloader2
+
+    subgraph index[index on Rank 0]
+        dl11[dataloader 1, entry 1]
+        dl21[dataloader 2, entry 1]
+        dl22[dataloader 2, entry 2]
+    end
+    dataloader1 --> dl11
+    dataloader2 --> dl21
+    dataloader2 --> dl22
+
+    index -- for each step, choose 1 system --> WeightedSampler
+    --> dploaderset --> bufferedq[buffered queue] --> model
+```
+
+### How to use
+
+We use [`paddle.distributed.fleet`](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/06_distributed_training/cluster_quick_start_collective_cn.html) to launch a DDP training session.
+
+To start training with multiple GPUs in one node, set environment variable `CUDA_VISIBLE_DEVICES` as the list of GPUs you want to use:
+
+```bash
+# example for training with 4 gpus in one node
+NUM_WORKERS=0 HDF5_USE_FILE_LOCKING=0 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --gpus="0,1,2,3" dp --pd train input.json
+```
+
+Suppose you have 2 nodes each with 4 GPUs and their ip address are: `192.168.1.2` and `192.168.1.3`, then you can use `paddle.distributed.launch` to launch a DDP training session:
+
+```bash
+# run in node 192.168.1.2
+NUM_WORKERS=0 HDF5_USE_FILE_LOCKING=0 python -m paddle.distributed.launch \
+    --gpus=0,1,2,3 \
+    --ips=192.168.1.2,192.168.1.3 \
+    dp --pd train input.json
+
+# then run in the other node 192.168.1.3
+NUM_WORKERS=0 HDF5_USE_FILE_LOCKING=0 python -m paddle.distributed.launch \
+    --gpus=0,1,2,3 \
+    --ips=192.168.1.2,192.168.1.3 \
+    dp --pd train input.json
+```
+
+:::{note}
+If `NUM_WORKERS` is too large, it may cause the program to be terminated by the system;
+if it is too small, it may slow down data reading. You can try adjusting it to an appropriate size.
+:::
diff --git a/doc/train/tensorboard.md b/doc/train/tensorboard.md
index 32ecdd0ab2..3925ab3d3d 100644
--- a/doc/train/tensorboard.md
+++ b/doc/train/tensorboard.md
@@ -1,7 +1,7 @@
-# TensorBoard Usage {{ tensorflow_icon }} {{ pytorch_icon }}
+# TensorBoard Usage {{ tensorflow_icon }} {{ pytorch_icon }} {{ paddle_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, Paddle {{ paddle_icon }}
 :::
 
 TensorBoard provides the visualization and tooling needed for machine learning
diff --git a/doc/train/training.md b/doc/train/training.md
index 5e8f8db498..8f491cc7a8 100644
--- a/doc/train/training.md
+++ b/doc/train/training.md
@@ -26,6 +26,14 @@ $ dp --pt train input.json
 
 :::
 
+:::{tab-item} Paddle {{ paddle_icon }}
+
+```bash
+$ dp --pd train input.json
+```
+
+:::
+
 ::::
 
 where `input.json` is the name of the input script.
diff --git a/pyproject.toml b/pyproject.toml
index 1faacb973c..15036d155c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -224,12 +224,12 @@ replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"'
 [tool.cibuildwheel]
 test-command = [
     "python -m deepmd -h",
-    """python -c "import deepmd.tf;import deepmd.pt" """,
+    """python -c "import deepmd.tf;import deepmd.pt;import deepmd.pd" """,
     "dp -h",
     "dp_ipi",
     "pytest {project}/source/tests/tf/test_lammps.py"
 ]
-test-extras = ["cpu", "test", "lmp", "ipi", "torch"]
+test-extras = ["cpu", "test", "lmp", "ipi", "torch", "paddle"]
 build = ["cp311-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
 # TODO: uncomment to use the latest image when CUDA 11 is deprecated
@@ -248,6 +248,7 @@ PIP_PREFER_BINARY = "1"
 DP_LAMMPS_VERSION = "stable_29Aug2024_update1"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
+DP_ENABLE_PADDLE = "1"
 # for unclear reason, when enabling PyTorch, OpenMP is found accidentally
 CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1"
 
@@ -284,6 +285,7 @@ PIP_PREFER_BINARY = "1"
 DP_LAMMPS_VERSION = "stable_29Aug2024_update1"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
+DP_ENABLE_PADDLE = "1"
 MPI_HOME = "/usr/lib64/mpich"
 PATH = "/usr/lib64/mpich/bin:$PATH"
 # use CPU version of torch for building, which should also work for GPU
@@ -294,7 +296,7 @@ UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu"
 CMAKE_PREFIX_PATH="/opt/python/cp311-cp311/"
 
 [tool.cibuildwheel.windows]
-test-extras = ["cpu", "torch"]
+test-extras = ["cpu", "torch", "paddle"]
 test-command = [
     "python -m deepmd -h",
     "dp -h",
@@ -302,6 +304,7 @@ test-command = [
 [tool.cibuildwheel.windows.environment]
 PIP_PREFER_BINARY = "1"
 DP_ENABLE_PYTORCH = "1"
+DP_ENABLE_PADDLE = "1"
 
 # One can run `tox` or `tox -e gpu`
 # to run pytest in an isolated environment
@@ -405,8 +408,10 @@ convention = "numpy"
 banned-module-level-imports = [
     "deepmd.tf",
     "deepmd.pt",
+    "deepmd.pd",
     "tensorflow",
     "torch",
+    "paddle",
 ]
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
@@ -417,9 +422,12 @@ banned-module-level-imports = [
 "deepmd/tf/**" = ["TID253"]
 "deepmd/pt/**" = ["TID253"]
 "deepmd/jax/**" = ["TID253"]
+"deepmd/pd/**" = ["TID253"]
 "source/tests/tf/**" = ["TID253"]
 "source/tests/pt/**" = ["TID253"]
+"source/tests/pd/**" = ["TID253"]
 "source/tests/universal/pt/**" = ["TID253"]
+"source/tests/universal/pd/**" = ["TID253"]
 "source/ipi/tests/**" = ["TID253"]
 "source/lmp/tests/**" = ["TID253"]
 "**/*.ipynb" = ["T20"]  # printing in a nb file is expected
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 805c6514e0..1f49093c0d 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -4,6 +4,7 @@ project(DeePMD)
 
 option(ENABLE_TENSORFLOW "Enable TensorFlow interface" OFF)
 option(ENABLE_PYTORCH "Enable PyTorch interface" OFF)
+option(ENABLE_PADDLE "Enable Paddle interface" OFF)
 option(BUILD_TESTING "Build test and enable coverage" OFF)
 set(DEEPMD_C_ROOT
     ""
@@ -21,6 +22,58 @@ if(NOT DEEPMD_C_ROOT)
   set_if_higher(CMAKE_CXX_STANDARD 14)
 endif()
 
+if(ENABLE_PADDLE)
+  if(NOT DEFINED PADDLE_INFERENCE_DIR)
+    # message( FATAL_ERROR "Make sure PADDLE_INFERENCE_DIR is set when
+    # ENABLE_PADDLE=ON")
+    message(STATUS "PADDLE_INFERENCE_DIR is not defined. Downloading...")
+    set(DOWNLOAD_URL
+        "https://paddle-qa.bj.bcebos.com/paddle-pipeline/GITHUB_Docker_Compile_Test_Cuda118_cudnn860_Trt8531_D1/ce51e82e84fc97e0a55a162037f1554746159cad/paddle_inference.tgz"
+    )
+    set(TGZ_FILE "${CMAKE_BINARY_DIR}/paddle_inference.tgz")
+    set(EXTRACTED_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir")
+    file(DOWNLOAD ${DOWNLOAD_URL} ${TGZ_FILE})
+    message(STATUS "Downloading finished, extracting...")
+    execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xzvf ${TGZ_FILE}
+                    OUTPUT_QUIET)
+    file(REMOVE ${TGZ_FILE})
+    set(PADDLE_INFERENCE_DIR
+        ${EXTRACTED_DIR}
+        CACHE PATH
+              "Path to 'paddle_inference_install_dir' or 'paddle_inference'")
+  else()
+    message(
+      STATUS "PADDLE_INFERENCE_DIR is already defined: ${PADDLE_INFERENCE_DIR}")
+  endif()
+
+  message(STATUS "Final PADDLE_INFERENCE_DIR is set to ${PADDLE_INFERENCE_DIR}")
+
+  set(PADDLE_INFERENCE_DIR
+      ${PADDLE_INFERENCE_DIR}
+      CACHE PATH "Path to 'paddle_inference_install_dir' or 'paddle_inference'")
+
+  # used in api_cc
+  set(PADDLE_LIBRARIES
+      "${PADDLE_INFERENCE_DIR}/paddle/lib/libpaddle_inference.so"
+      CACHE PATH "Path to libpaddle_inference.so")
+
+  include_directories("${PADDLE_INFERENCE_DIR}/")
+  set(PADDLE_LIB_THIRD_PARTY_PATH
+      "${PADDLE_INFERENCE_DIR}/third_party/install/")
+
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
+
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
+  link_directories("${PADDLE_INFERENCE_DIR}/paddle/lib")
+  # if (USE_ROCM_TOOLKIT) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) endif()
+endif(ENABLE_PADDLE)
+
 if(BUILD_TESTING)
   enable_testing()
   add_subdirectory(${CMAKE_SOURCE_DIR}/cmake/coverage_config coverage_config)
@@ -255,8 +308,12 @@ if(NOT DEEPMD_C_ROOT)
   if(ENABLE_PYTORCH)
     message(STATUS "- PyTorch")
   endif()
+  if(ENABLE_PADDLE)
+    message(STATUS "- Paddle")
+  endif()
   if(NOT ENABLE_TENSORFLOW
      AND NOT ENABLE_PYTORCH
+     AND NOT ENABLE_PADDLE
      AND NOT BUILD_PY_IF)
     message(FATAL_ERROR "No backend is enabled.")
   endif()
diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt
index 228a6657d3..6239f88773 100644
--- a/source/api_cc/CMakeLists.txt
+++ b/source/api_cc/CMakeLists.txt
@@ -23,6 +23,14 @@ if(ENABLE_PYTORCH
   target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}")
   target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH)
 endif()
+if(ENABLE_PADDLE AND NOT BUILD_PY_IF)
+  target_link_libraries(${libname} PUBLIC "${PADDLE_LIBRARIES}")
+  target_compile_definitions(${libname} PUBLIC BUILD_PADDLE)
+  if(DP_VARIANT STREQUAL "rocm")
+    target_link_libraries(${libname}
+                          PUBLIC "${hip_LIB_INSTALL_DIR}/libgalaxyhip.so")
+  endif()
+endif()
 
 target_include_directories(
   ${libname}
diff --git a/source/api_cc/include/DeepPotPD.h b/source/api_cc/include/DeepPotPD.h
new file mode 100644
index 0000000000..dab5c6e6e3
--- /dev/null
+++ b/source/api_cc/include/DeepPotPD.h
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#pragma once
+
+#include <paddle/include/paddle_inference_api.h>
+
+#include "DeepPot.h"
+
+namespace deepmd {
+/**
+ * @brief Paddle implementation for Deep Potential.
+ **/
+class DeepPotPD : public DeepPotBase {
+ public:
+  /**
+   * @brief DP constructor without initialization.
+   **/
+  DeepPotPD();
+  ~DeepPotPD();
+  /**
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  DeepPotPD(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+  /**
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+
+ private:
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @param[in] atomic Whether to compute the atomic energy and virial.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam,
+               const std::vector<VALUETYPE>& aparam,
+               const bool atomic);
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @param[in] atomic Whether to compute the atomic energy and virial.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam,
+               const std::vector<VALUETYPE>& aparam,
+               const bool atomic);
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @param[in] atomic Whether to compute the atomic energy and virial.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(ENERGYVTYPE& ener,
+                          std::vector<VALUETYPE>& force,
+                          std::vector<VALUETYPE>& virial,
+                          const int& nframes,
+                          const std::vector<VALUETYPE>& coord,
+                          const std::vector<int>& atype,
+                          const std::vector<VALUETYPE>& box,
+                          const std::vector<VALUETYPE>& fparam,
+                          const std::vector<VALUETYPE>& aparam,
+                          const bool atomic);
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @param[in] atomic Whether to compute the atomic energy and virial.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(ENERGYVTYPE& ener,
+                          std::vector<VALUETYPE>& force,
+                          std::vector<VALUETYPE>& virial,
+                          std::vector<VALUETYPE>& atom_energy,
+                          std::vector<VALUETYPE>& atom_virial,
+                          const int& nframes,
+                          const std::vector<VALUETYPE>& coord,
+                          const std::vector<int>& atype,
+                          const std::vector<VALUETYPE>& box,
+                          const std::vector<VALUETYPE>& fparam,
+                          const std::vector<VALUETYPE>& aparam,
+                          const bool atomic);
+
+ public:
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
+  /**
+   * @brief Get the number of types with spin.
+   * @return The number of types with spin.
+   **/
+  int numb_types_spin() const {
+    assert(inited);
+    return ntypes_spin;
+  };
+  /**
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  int dim_fparam() const {
+    assert(inited);
+    return dfparam;
+  };
+  /**
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  int dim_aparam() const {
+    assert(inited);
+    return daparam;
+  };
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string& type_map);
+
+  /**
+   * @brief Get the buffer of this model.
+   * @param[in] buffer_name Buffer name.
+   * @param[out] buffer_array Buffer array.
+   **/
+  template <typename BUFFERTYPE>
+  void get_buffer(const std::string& buffer_name,
+                  std::vector<BUFFERTYPE>& buffer_array);
+
+  /**
+   * @brief Get the buffer of this model.
+   * @param[in] buffer_name Buffer name.
+   * @param[out] buffer_scalar Buffer scalar.
+   **/
+  template <typename BUFFERTYPE>
+  void get_buffer(const std::string& buffer_name, BUFFERTYPE& buffer_scalar);
+
+  /**
+   * @brief Get whether the atom dimension of aparam is nall instead of fparam.
+   * @param[out] aparam_nall whether the atom dimension of aparam is nall
+   *instead of fparam.
+   **/
+  bool is_aparam_nall() const {
+    assert(inited);
+    return aparam_nall;
+  };
+
+  // forward to template class
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam,
+                const std::vector<double>& aparam,
+                const bool atomic);
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam,
+                const std::vector<float>& aparam,
+                const bool atomic);
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam,
+                const std::vector<double>& aparam,
+                const bool atomic);
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam,
+                const std::vector<float>& aparam,
+                const bool atomic);
+  void computew_mixed_type(std::vector<double>& ener,
+                           std::vector<double>& force,
+                           std::vector<double>& virial,
+                           std::vector<double>& atom_energy,
+                           std::vector<double>& atom_virial,
+                           const int& nframes,
+                           const std::vector<double>& coord,
+                           const std::vector<int>& atype,
+                           const std::vector<double>& box,
+                           const std::vector<double>& fparam,
+                           const std::vector<double>& aparam,
+                           const bool atomic);
+  void computew_mixed_type(std::vector<double>& ener,
+                           std::vector<float>& force,
+                           std::vector<float>& virial,
+                           std::vector<float>& atom_energy,
+                           std::vector<float>& atom_virial,
+                           const int& nframes,
+                           const std::vector<float>& coord,
+                           const std::vector<int>& atype,
+                           const std::vector<float>& box,
+                           const std::vector<float>& fparam,
+                           const std::vector<float>& aparam,
+                           const bool atomic);
+
+ private:
+  int num_intra_nthreads, num_inter_nthreads;
+  bool inited;
+  int ntypes;
+  int ntypes_spin;
+  int dfparam;
+  int daparam;
+  int aparam_nall;
+  // copy neighbor list info from host
+  std::shared_ptr<paddle_infer::Config> config;
+  std::shared_ptr<paddle_infer::Predictor> predictor;
+  double rcut;
+  NeighborListData nlist_data;
+  int max_num_neighbors;
+  int gpu_id;
+  // use int instead bool for problems may meets with vector<bool>
+  int do_message_passing;  // 1:dpa2 model 0:others
+  bool gpu_enabled;
+  std::unique_ptr<paddle_infer::Tensor> firstneigh_tensor;
+  // std::unordered_map<std::string, paddle::Tensor> comm_dict;
+};
+
+}  // namespace deepmd
diff --git a/source/api_cc/include/version.h.in b/source/api_cc/include/version.h.in
index 26b0c1be48..4be0589a30 100644
--- a/source/api_cc/include/version.h.in
+++ b/source/api_cc/include/version.h.in
@@ -10,4 +10,5 @@ const std::string global_git_branch="@GIT_BRANCH@";
 const std::string global_tf_include_dir="@TensorFlow_INCLUDE_DIRS@";
 const std::string global_tf_lib="@TensorFlow_LIBRARY@";
 const std::string global_pt_lib="@TORCH_LIBRARIES@";
+const std::string global_pd_lib="@PADDLE_LIBRARIES@";
 const std::string global_model_version="@MODEL_VERSION@";
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index c184446288..42ce09a139 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -12,6 +12,9 @@
 #ifdef BUILD_PYTORCH
 #include "DeepPotPT.h"
 #endif
+#ifdef BUILD_PADDLE
+#include "DeepPotPD.h"
+#endif
 #include "device.h"
 
 using namespace deepmd;
@@ -41,6 +44,9 @@ void DeepPot::init(const std::string& model,
     backend = deepmd::DPBackend::PyTorch;
   } else if (model.length() >= 3 && model.substr(model.length() - 3) == ".pb") {
     backend = deepmd::DPBackend::TensorFlow;
+  } else if ((model.length() >= 5 &&
+              model.substr(model.length() - 5) == ".json")) {
+    backend = deepmd::DPBackend::Paddle;
   } else {
     throw deepmd::deepmd_exception("Unsupported model file format");
   }
@@ -57,7 +63,11 @@ void DeepPot::init(const std::string& model,
     throw deepmd::deepmd_exception("PyTorch backend is not built");
 #endif
   } else if (deepmd::DPBackend::Paddle == backend) {
-    throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet");
+#ifdef BUILD_PADDLE
+    dp = std::make_shared<deepmd::DeepPotPD>(model, gpu_rank, file_content);
+#else
+    throw deepmd::deepmd_exception("Paddle backend is not built");
+#endif
   } else {
     throw deepmd::deepmd_exception("Unknown file type");
   }
diff --git a/source/api_cc/src/DeepPotPD.cc b/source/api_cc/src/DeepPotPD.cc
new file mode 100644
index 0000000000..df596e2306
--- /dev/null
+++ b/source/api_cc/src/DeepPotPD.cc
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#ifdef BUILD_PADDLE
+#include "DeepPotPD.h"
+
+#include <cstdint>
+#include <numeric>
+
+#include "common.h"
+#include "device.h"
+#include "errors.h"
+
+using namespace deepmd;
+
+std::vector<int> createNlistTensor(const std::vector<std::vector<int>>& data) {
+  std::vector<int> ret;
+  for (const auto& row : data) {
+    ret.insert(ret.end(), row.begin(), row.end());
+  }
+  return ret;
+}
+
+DeepPotPD::DeepPotPD() : inited(false) {}
+DeepPotPD::DeepPotPD(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content)
+    : inited(false) {
+  try {
+    init(model, gpu_rank, file_content);
+  } catch (...) {
+    throw;
+  }
+}
+void DeepPotPD::init(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
+  }
+  // NOTE: There is no custom operators need to be loaded now.
+  // deepmd::load_op_library();
+
+  // NOTE: Only support 1 GPU now.
+  int gpu_num = 1;
+  if (gpu_num > 0) {
+    gpu_id = gpu_rank % gpu_num;
+  } else {
+    gpu_id = 0;
+  }
+
+  // initialize inference config
+  config = std::make_shared<paddle_infer::Config>();
+  config->DisableGlogInfo();
+  config->EnableNewExecutor(true);
+  config->EnableNewIR(true);
+
+  // loading inference model
+  std::string pdmodel_path;
+  std::string pdiparams_path;
+  if (model.find(".json") != std::string::npos) {
+    pdmodel_path = model;
+    pdiparams_path = model;
+    pdiparams_path.replace(pdiparams_path.find(".json"), 5,
+                           std::string(".pdiparams"));
+  } else if (model.find(".pdmodel") != std::string::npos) {
+    pdmodel_path = model;
+    pdiparams_path = model;
+    pdiparams_path.replace(pdiparams_path.find(".pdmodel"), 8,
+                           std::string(".pdiparams"));
+  } else {
+    throw deepmd::deepmd_exception("Given inference model: " + model +
+                                   " do not exist, please check it.");
+  }
+  config->SetModel(pdmodel_path, pdiparams_path);
+  config->EnableUseGpu(
+      4096, 0);  // annotate it if use cpu, default use gpu with 4G mem
+  gpu_enabled = config->use_gpu();
+  if (!gpu_enabled) {
+    config->DisableGpu();
+    std::cout << "load model from: " << model << " to cpu " << std::endl;
+  } else {
+    std::cout << "load model from: " << model << " to gpu " << gpu_id
+              << std::endl;
+  }
+
+  // NOTE: Both set to 1 now.
+  // get_env_nthreads(num_intra_nthreads,
+  //                  num_inter_nthreads);  // need to be fixed as
+  //                                        // DP_INTRA_OP_PARALLELISM_THREADS
+  // num_intra_nthreads = 1;
+  num_inter_nthreads = 1;
+  if (num_inter_nthreads) {
+    config->SetCpuMathLibraryNumThreads(num_inter_nthreads);
+  }
+
+  predictor = paddle_infer::CreatePredictor(*config);
+
+  // initialize hyper params from model buffers
+  ntypes_spin = 0;
+  DeepPotPD::get_buffer<int>("buffer_has_message_passing", do_message_passing);
+  DeepPotPD::get_buffer<double>("buffer_rcut", rcut);
+  DeepPotPD::get_buffer<int>("buffer_ntypes", ntypes);
+  DeepPotPD::get_buffer<int>("buffer_dfparam", dfparam);
+  DeepPotPD::get_buffer<int>("buffer_daparam", daparam);
+  DeepPotPD::get_buffer<int>("buffer_aparam_nall", aparam_nall);
+  inited = true;
+}
+DeepPotPD::~DeepPotPD() {}
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotPD::compute(ENERGYVTYPE& ener,
+                        std::vector<VALUETYPE>& force,
+                        std::vector<VALUETYPE>& virial,
+                        std::vector<VALUETYPE>& atom_energy,
+                        std::vector<VALUETYPE>& atom_virial,
+                        const std::vector<VALUETYPE>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<VALUETYPE>& box,
+                        const int nghost,
+                        const InputNlist& lmp_list,
+                        const int& ago,
+                        const std::vector<VALUETYPE>& fparam,
+                        const std::vector<VALUETYPE>& aparam,
+                        const bool atomic) {
+  int natoms = atype.size();
+  // select real atoms
+  std::vector<VALUETYPE> dcoord, dforce, aparam_, datom_energy, datom_virial;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  int nall = natoms;
+  select_real_atoms_coord(dcoord, datype, aparam_, nghost_real, fwd_map,
+                          bkw_map, nall_real, nloc_real, coord, atype, aparam,
+                          nghost, ntypes, 1, daparam, nall, aparam_nall);
+  int nloc = nall_real - nghost_real;
+  int nframes = 1;
+  std::vector<VALUETYPE> coord_wrapped = dcoord;
+  auto coord_wrapped_Tensor = predictor->GetInputHandle("coord");
+  coord_wrapped_Tensor->Reshape({1, nall_real, 3});
+  coord_wrapped_Tensor->CopyFromCpu(coord_wrapped.data());
+
+  auto atype_Tensor = predictor->GetInputHandle("atype");
+  atype_Tensor->Reshape({1, nall_real});
+  atype_Tensor->CopyFromCpu(datype.data());
+
+  if (ago == 0) {
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle_exclude_empty(fwd_map);
+    nlist_data.padding();
+    if (do_message_passing == 1 && nghost > 0) {
+      throw deepmd::deepmd_exception(
+          "(do_message_passing == 1 && nghost > 0) is not supported yet.");
+      int nswap = lmp_list.nswap;
+      auto sendproc_tensor = predictor->GetInputHandle("sendproc");
+      sendproc_tensor->Reshape({nswap});
+      sendproc_tensor->CopyFromCpu(lmp_list.sendproc);
+      auto recvproc_tensor = predictor->GetInputHandle("recvproc");
+      recvproc_tensor->Reshape({nswap});
+      recvproc_tensor->CopyFromCpu(lmp_list.recvproc);
+      auto firstrecv_tensor = predictor->GetInputHandle("firstrecv");
+      firstrecv_tensor->Reshape({nswap});
+      firstrecv_tensor->CopyFromCpu(lmp_list.firstrecv);
+      auto recvnum_tensor = predictor->GetInputHandle("recvnum");
+      recvnum_tensor->Reshape({nswap});
+      recvnum_tensor->CopyFromCpu(lmp_list.recvnum);
+      auto sendnum_tensor = predictor->GetInputHandle("sendnum");
+      sendnum_tensor->Reshape({nswap});
+      sendnum_tensor->CopyFromCpu(lmp_list.sendnum);
+      auto communicator_tensor = predictor->GetInputHandle("communicator");
+      communicator_tensor->Reshape({1});
+      communicator_tensor->CopyFromCpu(static_cast<int*>(lmp_list.world));
+      auto sendlist_tensor = predictor->GetInputHandle("sendlist");
+
+      int total_send =
+          std::accumulate(lmp_list.sendnum, lmp_list.sendnum + nswap, 0);
+    }
+    if (do_message_passing == 1 && nghost == 0) {
+      throw deepmd::deepmd_exception(
+          "(do_message_passing == 1 && nghost == 0) is not supported yet.");
+    }
+  }
+  std::vector<int> firstneigh = createNlistTensor(nlist_data.jlist);
+  firstneigh_tensor = predictor->GetInputHandle("nlist");
+  firstneigh_tensor->Reshape({1, nloc, (int)firstneigh.size() / (int)nloc});
+  firstneigh_tensor->CopyFromCpu(firstneigh.data());
+  bool do_atom_virial_tensor = atomic;
+  std::unique_ptr<paddle_infer::Tensor> fparam_tensor;
+  if (!fparam.empty()) {
+    throw deepmd::deepmd_exception("fparam is not supported as input yet.");
+    // fparam_tensor = predictor->GetInputHandle("fparam");
+    // fparam_tensor->Reshape({1, static_cast<int>(fparam.size())});
+    // fparam_tensor->CopyFromCpu((fparam.data()));
+  }
+  std::unique_ptr<paddle_infer::Tensor> aparam_tensor;
+  if (!aparam_.empty()) {
+    throw deepmd::deepmd_exception("aparam is not supported as input yet.");
+    // aparam_tensor = predictor->GetInputHandle("aparam");
+    // aparam_tensor->Reshape({1, lmp_list.inum,
+    //          static_cast<int>(aparam_.size()) / lmp_list.inum});
+    // aparam_tensor->CopyFromCpu((aparam_.data()));
+  }
+
+  if (!predictor->Run()) {
+    throw deepmd::deepmd_exception("Paddle inference run failed");
+  }
+  auto output_names = predictor->GetOutputNames();
+
+  auto energy_ = predictor->GetOutputHandle(output_names[1]);
+  auto force_ = predictor->GetOutputHandle(output_names[2]);
+  auto virial_ = predictor->GetOutputHandle(output_names[3]);
+  std::vector<int> output_energy_shape = energy_->shape();
+  int output_energy_size =
+      std::accumulate(output_energy_shape.begin(), output_energy_shape.end(), 1,
+                      std::multiplies<int>());
+  std::vector<int> output_force_shape = force_->shape();
+  int output_force_size =
+      std::accumulate(output_force_shape.begin(), output_force_shape.end(), 1,
+                      std::multiplies<int>());
+  std::vector<int> output_virial_shape = virial_->shape();
+  int output_virial_size =
+      std::accumulate(output_virial_shape.begin(), output_virial_shape.end(), 1,
+                      std::multiplies<int>());
+  // output energy
+  ener.resize(output_energy_size);
+  energy_->CopyToCpu(ener.data());
+
+  // output force
+  dforce.resize(output_force_size);
+  force_->CopyToCpu(dforce.data());
+
+  // output virial
+  virial.resize(output_virial_size);
+  virial_->CopyToCpu(virial.data());
+
+  // bkw map
+  force.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
+  select_map<VALUETYPE>(force, dforce, bkw_map, 3, nframes, fwd_map.size(),
+                        nall_real);
+  if (atomic) {
+    throw "atomic virial is not supported as output yet.";
+    // auto atom_virial_ = predictor->GetOutputHandle("extended_virial");
+    // auto atom_energy_ = predictor->GetOutputHandle("atom_energy");
+    // datom_energy.resize(nall_real,
+    //                     0.0);  // resize to nall to be consistenet with TF.
+    // atom_energy_->CopyToCpu(datom_energy.data());
+    // atom_virial_->CopyToCpu(datom_virial.data());
+    // atom_energy.resize(static_cast<size_t>(nframes) * fwd_map.size());
+    // atom_virial.resize(static_cast<size_t>(nframes) * fwd_map.size() * 9);
+    // select_map<VALUETYPE>(atom_energy, datom_energy, bkw_map, 1, nframes,
+    //                       fwd_map.size(), nall_real);
+    // select_map<VALUETYPE>(atom_virial, datom_virial, bkw_map, 9, nframes,
+    //                       fwd_map.size(), nall_real);
+  }
+}
+template void DeepPotPD::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& force,
+    std::vector<double>& virial,
+    std::vector<double>& atom_energy,
+    std::vector<double>& atom_virial,
+    const std::vector<double>& coord,
+    const std::vector<int>& atype,
+    const std::vector<double>& box,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam_,
+    const bool atomic);
+
+template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& force,
+    std::vector<float>& virial,
+    std::vector<float>& atom_energy,
+    std::vector<float>& atom_virial,
+    const std::vector<float>& coord,
+    const std::vector<int>& atype,
+    const std::vector<float>& box,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam_,
+    const bool atomic);
+
+// ENERGYVTYPE: std::vector<ENERGYTYPE> or ENERGYTYPE
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotPD::compute(ENERGYVTYPE& ener,
+                        std::vector<VALUETYPE>& force,
+                        std::vector<VALUETYPE>& virial,
+                        std::vector<VALUETYPE>& atom_energy,
+                        std::vector<VALUETYPE>& atom_virial,
+                        const std::vector<VALUETYPE>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<VALUETYPE>& box,
+                        const std::vector<VALUETYPE>& fparam,
+                        const std::vector<VALUETYPE>& aparam,
+                        const bool atomic) {
+  // select real atoms
+  std::vector<VALUETYPE> coord_wrapped = coord;
+  int natoms = atype.size();
+  int nframes = 1;
+  auto coord_wrapped_Tensor = predictor->GetInputHandle("coord");
+  coord_wrapped_Tensor->Reshape({1, natoms, 3});
+  coord_wrapped_Tensor->CopyFromCpu(coord_wrapped.data());
+
+  std::vector<std::int64_t> atype_64(atype.begin(), atype.end());
+  auto atype_Tensor = predictor->GetInputHandle("atype");
+  atype_Tensor->Reshape({1, natoms});
+  atype_Tensor->CopyFromCpu(atype_64.data());
+
+  std::unique_ptr<paddle_infer::Tensor> box_Tensor;
+  if (!box.empty()) {
+    box_Tensor = predictor->GetInputHandle("box");
+    box_Tensor->Reshape({1, 9});
+    box_Tensor->CopyFromCpu((box.data()));
+  }
+  std::unique_ptr<paddle_infer::Tensor> fparam_tensor;
+  if (!fparam.empty()) {
+    throw deepmd::deepmd_exception("fparam is not supported as input yet.");
+    // fparam_tensor = predictor->GetInputHandle("box");
+    // fparam_tensor->Reshape({1, static_cast<int>(fparam.size())});
+    // fparam_tensor->CopyFromCpu((fparam.data()));
+  }
+  std::unique_ptr<paddle_infer::Tensor> aparam_tensor;
+  if (!aparam.empty()) {
+    throw deepmd::deepmd_exception("fparam is not supported as input yet.");
+    // aparam_tensor = predictor->GetInputHandle("box");
+    // aparam_tensor->Reshape({1, natoms, static_cast<int>(aparam.size()) /
+    // natoms}); aparam_tensor->CopyFromCpu((aparam.data()));
+  }
+
+  bool do_atom_virial_tensor = atomic;
+  if (!predictor->Run()) {
+    throw deepmd::deepmd_exception("Paddle inference run failed");
+  }
+
+  auto output_names = predictor->GetOutputNames();
+  auto energy_ = predictor->GetOutputHandle(output_names[1]);
+  auto force_ = predictor->GetOutputHandle(output_names[2]);
+  auto virial_ = predictor->GetOutputHandle(output_names[3]);
+
+  energy_->CopyToCpu(ener.data());
+  force_->CopyToCpu(force.data());
+  virial_->CopyToCpu(virial.data());
+
+  if (atomic) {
+    throw deepmd::deepmd_exception(
+        "atomic virial is not supported as output yet.");
+    // auto atom_energy_ = predictor->GetOutputHandle(output_names[4]);
+    // auto atom_virial_ = predictor->GetOutputHandle(output_names[5]);
+    // atom_energy_->CopyToCpu(atom_energy.data());
+    // atom_virial_->CopyToCpu(atom_virial.data());
+  }
+}
+
+template void DeepPotPD::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<double>& dforce,
+    std::vector<double>& virial,
+    std::vector<double>& atom_energy,
+    std::vector<double>& atom_virial,
+    const std::vector<double>& dcoord,
+    const std::vector<int>& atype,
+    const std::vector<double>& box,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam,
+    const bool atomic);
+
+template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<float>& force,
+    std::vector<float>& virial,
+    std::vector<float>& atom_energy,
+    std::vector<float>& atom_virial,
+    const std::vector<float>& dcoord,
+    const std::vector<int>& atype,
+    const std::vector<float>& box,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam,
+    const bool atomic);
+
+/* type_map is regarded as a special string buffer
+that need to be postprocessed */
+void DeepPotPD::get_type_map(std::string& type_map) {
+  auto type_map_tensor = predictor->GetOutputHandle("buffer_type_map");
+  auto type_map_shape = type_map_tensor->shape();
+  int type_map_size = std::accumulate(
+      type_map_shape.begin(), type_map_shape.end(), 1, std::multiplies<int>());
+
+  std::vector<int> type_map_arr(type_map_size, 0);
+  type_map_tensor->CopyToCpu(type_map_arr.data());
+  for (auto char_c : type_map_arr) {
+    type_map += std::string(1, char_c);
+  }
+}
+
+/* general function except for string buffer */
+template <typename BUFFERTYPE>
+void DeepPotPD::get_buffer(const std::string& buffer_name,
+                           std::vector<BUFFERTYPE>& buffer_array) {
+  auto buffer_tensor = predictor->GetOutputHandle(buffer_name);
+  auto buffer_shape = buffer_tensor->shape();
+  int buffer_size = std::accumulate(buffer_shape.begin(), buffer_shape.end(), 1,
+                                    std::multiplies<int>());
+  buffer_array.resize(buffer_size);
+  buffer_tensor->CopyToCpu(buffer_array.data());
+}
+
+template <typename BUFFERTYPE>
+void DeepPotPD::get_buffer(const std::string& buffer_name,
+                           BUFFERTYPE& buffer_scalar) {
+  std::vector<BUFFERTYPE> buffer_array(1);
+  DeepPotPD::get_buffer<BUFFERTYPE>(buffer_name, buffer_array);
+  buffer_scalar = buffer_array[0];
+}
+
+// forward to template method
+void DeepPotPD::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam,
+                         const bool atomic) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam, atomic);
+}
+void DeepPotPD::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam,
+                         const bool atomic) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam, atomic);
+}
+void DeepPotPD::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam,
+                         const bool atomic) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam, atomic);
+}
+void DeepPotPD::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam,
+                         const bool atomic) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam, atomic);
+}
+void DeepPotPD::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    std::vector<double>& atom_energy,
+                                    std::vector<double>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam,
+                                    const bool atomic) {
+  throw deepmd::deepmd_exception(
+      "computew_mixed_type is not implemented in paddle backend yet");
+}
+void DeepPotPD::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    std::vector<float>& atom_energy,
+                                    std::vector<float>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam,
+                                    const bool atomic) {
+  throw deepmd::deepmd_exception(
+      "computew_mixed_type is not implemented in paddle backend yet");
+}
+#endif
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index bd3f18c579..2bdb75aa15 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -409,6 +409,9 @@ void deepmd::load_op_library() {
 #ifdef BUILD_PYTORCH
   _load_single_op_library("deepmd_op_pt");
 #endif
+  // #ifdef BUILD_PADDLE
+  //   _load_single_op_library("deepmd_op_pd");
+  // #endif
   // load customized plugins
   const char* env_customized_plugins = std::getenv("DP_PLUGIN_PATH");
   if (env_customized_plugins) {
@@ -1391,6 +1394,9 @@ void deepmd::print_summary(const std::string& pre) {
 #endif
 #ifdef BUILD_PYTORCH
   std::cout << pre << "build with pt lib:  " + global_pt_lib << "\n";
+#endif
+#ifdef BUILD_PADDLE
+  std::cout << pre << "build with pd lib:  " + global_pd_lib << "\n";
 #endif
   std::cout << pre
             << "set tf intra_op_parallelism_threads: " << num_intra_nthreads
diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt
index b1ce17566f..dd005a327b 100644
--- a/source/config/CMakeLists.txt
+++ b/source/config/CMakeLists.txt
@@ -14,6 +14,12 @@ else()
   set(ENABLE_PYTORCH 0)
 endif()
 
+if(ENABLE_PADDLE)
+  set(ENABLE_PADDLE 1)
+else()
+  set(ENABLE_PADDLE 0)
+endif()
+
 configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini"
                @ONLY)
 
diff --git a/source/config/run_config.ini b/source/config/run_config.ini
index fb96ad224e..65b98a1e5a 100644
--- a/source/config/run_config.ini
+++ b/source/config/run_config.ini
@@ -6,13 +6,16 @@ GIT_DATE = @GIT_DATE@
 GIT_BRANCH = @GIT_BRANCH@
 ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@
 ENABLE_PYTORCH = @ENABLE_PYTORCH@
+ENABLE_PADDLE = @ENABLE_PADDLE@
 TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
 TF_LIBS = @TensorFlow_LIBRARY_PATH@
 TF_VERSION = @TENSORFLOW_VERSION@
 TF_CXX11_ABI_FLAG = @OP_CXX_ABI@
 PT_INCLUDE_DIR = @TORCH_INCLUDE_DIRS@
 PT_LIBS = @PyTorch_LIBRARY_PATH@
-PT_VERSIOn = @Torch_VERSION@
+PT_VERSION = @Torch_VERSION@
 PT_CXX11_ABI_FLAG = @OP_CXX_ABI_PT@
+PD_VERSION = @PADDLE_VERSION@
+PD_INFERENCE_DIR = @PADDLE_INFERENCE_DIR@
 MODEL_VERSION=@MODEL_VERSION@
 DP_VARIANT=@DP_VARIANT@
diff --git a/source/tests/pd/NiO/data/data_0/set.000/box.npy b/source/tests/pd/NiO/data/data_0/set.000/box.npy
new file mode 100644
index 0000000000..1f72eb7185
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/box.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/set.000/coord.npy b/source/tests/pd/NiO/data/data_0/set.000/coord.npy
new file mode 100644
index 0000000000..4b60ae0e0b
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/coord.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/set.000/energy.npy b/source/tests/pd/NiO/data/data_0/set.000/energy.npy
new file mode 100644
index 0000000000..8754b6dad2
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/energy.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/set.000/force.npy b/source/tests/pd/NiO/data/data_0/set.000/force.npy
new file mode 100644
index 0000000000..e95173d561
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/force.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/set.000/force_mag.npy b/source/tests/pd/NiO/data/data_0/set.000/force_mag.npy
new file mode 100644
index 0000000000..65bc1ef837
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/force_mag.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/set.000/spin.npy b/source/tests/pd/NiO/data/data_0/set.000/spin.npy
new file mode 100644
index 0000000000..c426f1c7f6
Binary files /dev/null and b/source/tests/pd/NiO/data/data_0/set.000/spin.npy differ
diff --git a/source/tests/pd/NiO/data/data_0/type.raw b/source/tests/pd/NiO/data/data_0/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/source/tests/pd/NiO/data/data_0/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pd/NiO/data/data_0/type_map.raw b/source/tests/pd/NiO/data/data_0/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/source/tests/pd/NiO/data/data_0/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/source/tests/pd/NiO/data/single/set.000/box.npy b/source/tests/pd/NiO/data/single/set.000/box.npy
new file mode 100644
index 0000000000..d3ac265aa8
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/box.npy differ
diff --git a/source/tests/pd/NiO/data/single/set.000/coord.npy b/source/tests/pd/NiO/data/single/set.000/coord.npy
new file mode 100644
index 0000000000..4060f0fc53
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/coord.npy differ
diff --git a/source/tests/pd/NiO/data/single/set.000/energy.npy b/source/tests/pd/NiO/data/single/set.000/energy.npy
new file mode 100644
index 0000000000..fd7d1420ee
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/energy.npy differ
diff --git a/source/tests/pd/NiO/data/single/set.000/force.npy b/source/tests/pd/NiO/data/single/set.000/force.npy
new file mode 100644
index 0000000000..c5c238d200
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/force.npy differ
diff --git a/source/tests/pd/NiO/data/single/set.000/force_mag.npy b/source/tests/pd/NiO/data/single/set.000/force_mag.npy
new file mode 100644
index 0000000000..3f0323ad8e
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/force_mag.npy differ
diff --git a/source/tests/pd/NiO/data/single/set.000/spin.npy b/source/tests/pd/NiO/data/single/set.000/spin.npy
new file mode 100644
index 0000000000..88985f5d2c
Binary files /dev/null and b/source/tests/pd/NiO/data/single/set.000/spin.npy differ
diff --git a/source/tests/pd/NiO/data/single/type.raw b/source/tests/pd/NiO/data/single/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/source/tests/pd/NiO/data/single/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pd/NiO/data/single/type_map.raw b/source/tests/pd/NiO/data/single/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/source/tests/pd/NiO/data/single/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/source/tests/pd/__init__.py b/source/tests/pd/__init__.py
new file mode 100644
index 0000000000..8484fef3ef
--- /dev/null
+++ b/source/tests/pd/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+
+paddle.framework.core.set_num_threads(1)
+# paddle.set_num_interop_threads(1)
+# testing purposes; device should always be set explicitly
+# paddle.set_device("gpu:9999999")
diff --git a/source/tests/pd/common.py b/source/tests/pd/common.py
new file mode 100644
index 0000000000..59a9672330
--- /dev/null
+++ b/source/tests/pd/common.py
@@ -0,0 +1,263 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.main import (
+    main,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    GLOBAL_PD_FLOAT_PRECISION,
+)
+
+
+def run_dp(cmd: str) -> int:
+    """Run DP directly from the entry point instead of the subprocess.
+
+    It is quite slow to start DeePMD-kit with subprocess.
+
+    Parameters
+    ----------
+    cmd : str
+        The command to run.
+
+    Returns
+    -------
+    int
+        Always returns 0.
+    """
+    cmds = cmd.split()
+    if cmds[0] == "dp":
+        cmds = cmds[1:]
+    else:
+        raise RuntimeError("The command is not dp")
+
+    main(cmds)
+    return 0
+
+
+def eval_model(
+    model,
+    coords: Union[np.ndarray, paddle.Tensor],
+    cells: Optional[Union[np.ndarray, paddle.Tensor]],
+    atom_types: Union[np.ndarray, paddle.Tensor, list[int]],
+    spins: Optional[Union[np.ndarray, paddle.Tensor]] = None,
+    atomic: bool = False,
+    infer_batch_size: int = 2,
+    denoise: bool = False,
+):
+    model = model.to(DEVICE)
+    energy_out = []
+    atomic_energy_out = []
+    force_out = []
+    force_mag_out = []
+    virial_out = []
+    atomic_virial_out = []
+    updated_coord_out = []
+    logits_out = []
+    err_msg = (
+        f"All inputs should be the same format, "
+        f"but found {type(coords)}, {type(cells)}, {type(atom_types)} instead! "
+    )
+    return_tensor = True
+    if isinstance(coords, paddle.Tensor):
+        if cells is not None:
+            assert isinstance(cells, paddle.Tensor), err_msg
+        if spins is not None:
+            assert isinstance(spins, paddle.Tensor), err_msg
+        assert isinstance(atom_types, paddle.Tensor) or isinstance(atom_types, list)
+        atom_types = paddle.to_tensor(atom_types, dtype=paddle.int32, place=DEVICE)
+    elif isinstance(coords, np.ndarray):
+        if cells is not None:
+            assert isinstance(cells, np.ndarray), err_msg
+        if spins is not None:
+            assert isinstance(spins, np.ndarray), err_msg
+        assert isinstance(atom_types, np.ndarray) or isinstance(atom_types, list)
+        atom_types = np.array(atom_types, dtype=np.int32)
+        return_tensor = False
+
+    nframes = coords.shape[0]
+    if len(atom_types.shape) == 1:
+        natoms = len(atom_types)
+        if isinstance(atom_types, paddle.Tensor):
+            atom_types = paddle.tile(atom_types.unsqueeze(0), [nframes, 1]).reshape(
+                [nframes, -1]
+            )
+        else:
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+    else:
+        natoms = len(atom_types[0])
+
+    coord_input = paddle.to_tensor(
+        coords.reshape([-1, natoms, 3]), dtype=GLOBAL_PD_FLOAT_PRECISION, place=DEVICE
+    )
+    spin_input = None
+    if spins is not None:
+        spin_input = paddle.to_tensor(
+            spins.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+            place=DEVICE,
+        )
+    has_spin = getattr(model, "has_spin", False)
+    if callable(has_spin):
+        has_spin = has_spin()
+    type_input = paddle.to_tensor(atom_types, dtype=paddle.int64, place=DEVICE)
+    box_input = None
+    if cells is None:
+        pbc = False
+    else:
+        pbc = True
+        box_input = paddle.to_tensor(
+            cells.reshape([-1, 3, 3]), dtype=GLOBAL_PD_FLOAT_PRECISION, place=DEVICE
+        )
+    num_iter = int((nframes + infer_batch_size - 1) / infer_batch_size)
+
+    for ii in range(num_iter):
+        batch_coord = coord_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_atype = type_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_box = None
+        batch_spin = None
+        if spin_input is not None:
+            batch_spin = spin_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        if pbc:
+            batch_box = box_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        input_dict = {
+            "coord": batch_coord,
+            "atype": batch_atype,
+            "box": batch_box,
+            "do_atomic_virial": atomic,
+        }
+        if has_spin:
+            input_dict["spin"] = batch_spin
+        batch_output = model(**input_dict)
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+        if not return_tensor:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"].numpy())
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(batch_output["atom_energy"].numpy())
+            if "force" in batch_output:
+                force_out.append(batch_output["force"].numpy())
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"].numpy())
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"].numpy())
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(batch_output["atom_virial"].numpy())
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(batch_output["updated_coord"].numpy())
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"].numpy())
+        else:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"])
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(batch_output["atom_energy"])
+            if "force" in batch_output:
+                force_out.append(batch_output["force"])
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"])
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"])
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(batch_output["atom_virial"])
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(batch_output["updated_coord"])
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"])
+    if not return_tensor:
+        energy_out = (
+            np.concatenate(energy_out) if energy_out else np.zeros([nframes, 1])  # pylint: disable=no-explicit-dtype
+        )
+        atomic_energy_out = (
+            np.concatenate(atomic_energy_out)
+            if atomic_energy_out
+            else np.zeros([nframes, natoms, 1])  # pylint: disable=no-explicit-dtype
+        )
+        force_out = (
+            np.concatenate(force_out) if force_out else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
+        )
+        force_mag_out = (
+            np.concatenate(force_mag_out)
+            if force_mag_out
+            else np.zeros([nframes, natoms, 3])  # pylint: disable=no-explicit-dtype
+        )
+        virial_out = (
+            np.concatenate(virial_out) if virial_out else np.zeros([nframes, 3, 3])  # pylint: disable=no-explicit-dtype
+        )
+        atomic_virial_out = (
+            np.concatenate(atomic_virial_out)
+            if atomic_virial_out
+            else np.zeros([nframes, natoms, 3, 3])  # pylint: disable=no-explicit-dtype
+        )
+        updated_coord_out = (
+            np.concatenate(updated_coord_out) if updated_coord_out else None
+        )
+        logits_out = np.concatenate(logits_out) if logits_out else None
+    else:
+        energy_out = (
+            paddle.concat(energy_out)
+            if energy_out
+            else paddle.zeros([nframes, 1], dtype=GLOBAL_PD_FLOAT_PRECISION).to(
+                device=DEVICE
+            )
+        )
+        atomic_energy_out = (
+            paddle.concat(atomic_energy_out)
+            if atomic_energy_out
+            else paddle.zeros([nframes, natoms, 1], dtype=GLOBAL_PD_FLOAT_PRECISION).to(
+                device=DEVICE
+            )
+        )
+        force_out = (
+            paddle.concat(force_out)
+            if force_out
+            else paddle.zeros([nframes, natoms, 3], dtype=GLOBAL_PD_FLOAT_PRECISION).to(
+                device=DEVICE
+            )
+        )
+        force_mag_out = (
+            paddle.concat(force_mag_out)
+            if force_mag_out
+            else paddle.zeros([nframes, natoms, 3], dtype=GLOBAL_PD_FLOAT_PRECISION).to(
+                device=DEVICE
+            )
+        )
+        virial_out = (
+            paddle.concat(virial_out)
+            if virial_out
+            else paddle.zeros([nframes, 3, 3], dtype=GLOBAL_PD_FLOAT_PRECISION).to(
+                device=DEVICE
+            )
+        )
+        atomic_virial_out = (
+            paddle.concat(atomic_virial_out)
+            if atomic_virial_out
+            else paddle.zeros(
+                [nframes, natoms, 3, 3], dtype=GLOBAL_PD_FLOAT_PRECISION
+            ).to(device=DEVICE)
+        )
+        updated_coord_out = (
+            paddle.concat(updated_coord_out) if updated_coord_out else None
+        )
+        logits_out = paddle.concat(logits_out) if logits_out else None
+    if denoise:
+        return updated_coord_out, logits_out
+    else:
+        results_dict = {
+            "energy": energy_out,
+            "force": force_out,
+            "virial": virial_out,
+        }
+        if has_spin:
+            results_dict["force_mag"] = force_mag_out
+        if atomic:
+            results_dict["atom_energy"] = atomic_energy_out
+            results_dict["atom_virial"] = atomic_virial_out
+        return results_dict
diff --git a/source/tests/pd/conftest.py b/source/tests/pd/conftest.py
new file mode 100644
index 0000000000..530cb18907
--- /dev/null
+++ b/source/tests/pd/conftest.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import paddle
+import pytest
+
+
+@pytest.fixture(scope="package", autouse=True)
+def clear_cuda_memory(request):
+    yield
+    paddle.device.cuda.empty_cache()
diff --git a/source/tests/pd/dos/data/atomic_system/set.000/atom_dos.npy b/source/tests/pd/dos/data/atomic_system/set.000/atom_dos.npy
new file mode 100644
index 0000000000..22809c1068
Binary files /dev/null and b/source/tests/pd/dos/data/atomic_system/set.000/atom_dos.npy differ
diff --git a/source/tests/pd/dos/data/atomic_system/set.000/box.npy b/source/tests/pd/dos/data/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/source/tests/pd/dos/data/atomic_system/set.000/box.npy differ
diff --git a/source/tests/pd/dos/data/atomic_system/set.000/coord.npy b/source/tests/pd/dos/data/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/source/tests/pd/dos/data/atomic_system/set.000/coord.npy differ
diff --git a/source/tests/pd/dos/data/atomic_system/type.raw b/source/tests/pd/dos/data/atomic_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/source/tests/pd/dos/data/atomic_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pd/dos/data/atomic_system/type_map.raw b/source/tests/pd/dos/data/atomic_system/type_map.raw
new file mode 100644
index 0000000000..a9edc74f38
--- /dev/null
+++ b/source/tests/pd/dos/data/atomic_system/type_map.raw
@@ -0,0 +1 @@
+H
diff --git a/source/tests/pd/dos/data/global_system/set.000/box.npy b/source/tests/pd/dos/data/global_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/source/tests/pd/dos/data/global_system/set.000/box.npy differ
diff --git a/source/tests/pd/dos/data/global_system/set.000/coord.npy b/source/tests/pd/dos/data/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/source/tests/pd/dos/data/global_system/set.000/coord.npy differ
diff --git a/source/tests/pd/dos/data/global_system/set.000/dos.npy b/source/tests/pd/dos/data/global_system/set.000/dos.npy
new file mode 100644
index 0000000000..904b23e709
Binary files /dev/null and b/source/tests/pd/dos/data/global_system/set.000/dos.npy differ
diff --git a/source/tests/pd/dos/data/global_system/type.raw b/source/tests/pd/dos/data/global_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/source/tests/pd/dos/data/global_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pd/dos/data/global_system/type_map.raw b/source/tests/pd/dos/data/global_system/type_map.raw
new file mode 100644
index 0000000000..a9edc74f38
--- /dev/null
+++ b/source/tests/pd/dos/data/global_system/type_map.raw
@@ -0,0 +1 @@
+H
diff --git a/source/tests/pd/dos/input.json b/source/tests/pd/dos/input.json
new file mode 100644
index 0000000000..3d06d3fcc7
--- /dev/null
+++ b/source/tests/pd/dos/input.json
@@ -0,0 +1,81 @@
+{
+  "model": {
+    "type_map": [
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        90
+      ],
+      "rcut_smth": 1.8,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "precision": "float64",
+      "seed": 1
+    },
+    "fitting_net": {
+      "type": "dos",
+      "numb_dos": 250,
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 0,
+      "precision": "float64",
+      "seed": 1
+    }
+  },
+  "loss": {
+    "type": "dos",
+    "start_pref_dos": 1.0,
+    "limit_pref_dos": 1.0,
+    "start_pref_cdf": 0.0,
+    "limit_pref_cdf": 0.0,
+    "start_pref_ados": 1.0,
+    "limit_pref_ados": 1.0,
+    "start_pref_acdf": 0.0,
+    "limit_pref_acdf": 0.0
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "stop_lr": 1e-08
+  },
+  "training": {
+    "stop_batch": 100000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json",
+    "training_data": {
+      "systems": [
+        "pd/dos/data/atomic_system/",
+        "pd/dos/data/global_system/"
+      ],
+      "batch_size": 1
+    },
+    "validation_data": {
+      "systems": [
+        "pd/dos/data/atomic_system/",
+        "pd/dos/data/global_system/"
+      ],
+      "batch_size": 1
+    }
+  },
+  "_comment1": "that's all"
+}
diff --git a/source/tests/pd/model/__init__.py b/source/tests/pd/model/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/pd/model/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/pd/model/models/dpa1.json b/source/tests/pd/model/models/dpa1.json
new file mode 100644
index 0000000000..a969c290ae
--- /dev/null
+++ b/source/tests/pd/model/models/dpa1.json
@@ -0,0 +1,36 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "se_atten",
+    "sel": 30,
+    "rcut_smth": 2.0,
+    "rcut": 6.0,
+    "neuron": [
+      2,
+      4,
+      8
+    ],
+    "axis_neuron": 4,
+    "attn": 5,
+    "attn_layer": 2,
+    "attn_dotr": true,
+    "attn_mask": false,
+    "activation_function": "tanh",
+    "scaling_factor": 1.0,
+    "normalize": true,
+    "temperature": 1.0,
+    "seed": 1
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pd/model/models/dpa1.pd b/source/tests/pd/model/models/dpa1.pd
new file mode 100644
index 0000000000..147312635c
Binary files /dev/null and b/source/tests/pd/model/models/dpa1.pd differ
diff --git a/source/tests/pd/model/models/dpa2.json b/source/tests/pd/model/models/dpa2.json
new file mode 100644
index 0000000000..f83e319de3
--- /dev/null
+++ b/source/tests/pd/model/models/dpa2.json
@@ -0,0 +1,57 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "dpa2",
+    "repinit": {
+      "rcut": 6.0,
+      "rcut_smth": 2.0,
+      "nsel": 30,
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "axis_neuron": 4,
+      "activation_function": "tanh"
+
+    },
+    "repformer": {
+      "rcut": 4.0,
+      "rcut_smth": 0.5,
+      "nsel": 10,
+      "nlayers": 12,
+      "g1_dim": 8,
+      "g2_dim": 5,
+      "attn2_hidden": 3,
+      "attn2_nhead": 1,
+      "attn1_hidden": 5,
+      "attn1_nhead": 1,
+      "axis_neuron": 4,
+      "update_h2": false,
+      "update_g1_has_conv": true,
+      "update_g1_has_grrg": true,
+      "update_g1_has_drrd": true,
+      "update_g1_has_attn": true,
+      "update_g2_has_g1g1": true,
+      "update_g2_has_attn": true,
+      "attn2_has_gate": true,
+      "use_sqrt_nnei": false,
+      "g1_out_conv": false,
+      "g1_out_mlp": false
+    },
+    "seed": 1,
+    "add_tebd_to_repinit_out": false
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pd/model/models/dpa2.pd b/source/tests/pd/model/models/dpa2.pd
new file mode 100644
index 0000000000..650f0c144e
Binary files /dev/null and b/source/tests/pd/model/models/dpa2.pd differ
diff --git a/source/tests/pd/model/models/dpa2_tebd.pd b/source/tests/pd/model/models/dpa2_tebd.pd
new file mode 100644
index 0000000000..2d3149d9a4
Binary files /dev/null and b/source/tests/pd/model/models/dpa2_tebd.pd differ
diff --git a/source/tests/pd/model/test_atomic_model_atomic_stat.py b/source/tests/pd/model/test_atomic_model_atomic_stat.py
new file mode 100644
index 0000000000..93aa7b8905
--- /dev/null
+++ b/source/tests/pd/model/test_atomic_model_atomic_stat.py
@@ -0,0 +1,431 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import h5py
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.atomic_model import (
+    BaseAtomicModel,
+    DPAtomicModel,
+)
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class FooFitting(paddle.nn.Layer, BaseFitting):
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "foo",
+                    [1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+                OutputVariableDef(
+                    "bar",
+                    [1, 2],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        raise NotImplementedError
+
+    def get_type_map(self) -> list[str]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nf, nloc, _ = descriptor.shape
+        ret = {}
+        ret["foo"] = (
+            paddle.to_tensor(
+                [
+                    [1.0, 2.0, 3.0],
+                    [4.0, 5.0, 6.0],
+                ]
+            )
+            .reshape([nf, nloc, *self.output_def()["foo"].shape])
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+        ret["bar"] = (
+            paddle.to_tensor(
+                [
+                    [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                    [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+                ]
+            )
+            .reshape([nf, nloc, *self.output_def()["bar"].shape])
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+        return ret
+
+
+class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.merged_output_stat = [
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5, 6
+                "atom_foo": to_paddle_tensor(
+                    np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1)
+                ),
+                # bias of bar: [1, 5], [3, 2]
+                "bar": to_paddle_tensor(
+                    np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2)
+                ),
+                "find_atom_foo": np.float32(1.0),
+                "find_bar": np.float32(1.0),
+            },
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5, 6 from atomic label.
+                "foo": to_paddle_tensor(np.array([5.0, 7.0]).reshape(2, 1)),
+                # bias of bar: [1, 5], [3, 2]
+                "bar": to_paddle_tensor(
+                    np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2)
+                ),
+                "find_foo": np.float32(1.0),
+                "find_bar": np.float32(1.0),
+            },
+        ]
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def test_output_stat(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        expected_ret0 = {}
+        expected_ret0["foo"] = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+            ]
+        ).reshape([nf, nloc, *md0.fitting_output_def()["foo"].shape])
+        expected_ret0["bar"] = np.array(
+            [
+                [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+            ]
+        ).reshape([nf, nloc, *md0.fitting_output_def()["bar"].shape])
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk])
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        expected_std = np.ones(
+            (2, 2, 2), dtype=np.float64
+        )  # 2 keys, 2 atypes, 2 max dims.
+        expected_std[0, :, :1] = np.array([0.0, 0.816496]).reshape(
+            2, 1
+        )  # updating std for foo based on [5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]
+        np.testing.assert_almost_equal(
+            to_numpy_array(md0.out_std), expected_std, decimal=4
+        )
+        ret1 = cvt_ret(ret1)
+        # nt x odim
+        foo_bias = np.array([5.0, 6.0]).reshape(2, 1)
+        bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2)
+        expected_ret1 = {}
+        expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
+        expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+
+        # 3. test bias load from file
+        def raise_error():
+            raise RuntimeError
+
+        md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
+        ret2 = md0.forward_common_atomic(*args)
+        ret2 = cvt_ret(ret2)
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], ret2[kk])
+        np.testing.assert_almost_equal(
+            to_numpy_array(md0.out_std), expected_std, decimal=4
+        )
+
+        # 4. test change bias
+        BaseAtomicModel.change_out_bias(
+            md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic"
+        )
+        args = [
+            to_paddle_tensor(ii)
+            for ii in [
+                self.coord_ext,
+                to_numpy_array(self.merged_output_stat[0]["atype_ext"]),
+                self.nlist,
+            ]
+        ]
+        ret3 = md0.forward_common_atomic(*args)
+        ret3 = cvt_ret(ret3)
+        expected_std[0, :, :1] = np.array([1.24722, 0.47140]).reshape(
+            2, 1
+        )  # updating std for foo based on [4.0, 3.0, 2.0], [1.0, 1.0, 1.0]]
+        expected_ret3 = {}
+        # new bias [2.666, 1.333]
+        expected_ret3["foo"] = np.array(
+            [[3.6667, 4.6667, 4.3333], [6.6667, 6.3333, 7.3333]]
+        ).reshape(2, 3, 1)
+        for kk in ["foo"]:
+            np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4)
+        np.testing.assert_almost_equal(
+            to_numpy_array(md0.out_std), expected_std, decimal=4
+        )
+
+
+class TestAtomicModelStatMergeGlobalAtomic(
+    unittest.TestCase, TestCaseSingleFrameWithNlist
+):
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.merged_output_stat = [
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 0], [0, 0, 0]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5.5, nan
+                "atom_foo": to_paddle_tensor(
+                    np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1)
+                ),
+                # bias of bar: [1, 5], [3, 2]
+                "bar": to_paddle_tensor(
+                    np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2)
+                ),
+                "find_atom_foo": np.float32(1.0),
+                "find_bar": np.float32(1.0),
+            },
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5.5, 3 from atomic label.
+                "foo": to_paddle_tensor(np.array([5.0, 7.0]).reshape(2, 1)),
+                # bias of bar: [1, 5], [3, 2]
+                "bar": to_paddle_tensor(
+                    np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2)
+                ),
+                "find_foo": np.float32(1.0),
+                "find_bar": np.float32(1.0),
+            },
+        ]
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def test_output_stat(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        expected_ret0 = {}
+        expected_ret0["foo"] = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+            ]
+        ).reshape([nf, nloc, *md0.fitting_output_def()["foo"].shape])
+        expected_ret0["bar"] = np.array(
+            [
+                [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+            ]
+        ).reshape([nf, nloc, *md0.fitting_output_def()["bar"].shape])
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk])
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+        # nt x odim
+        foo_bias = np.array([5.5, 3.0]).reshape(2, 1)
+        bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2)
+        expected_ret1 = {}
+        expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
+        expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+
+        # 3. test bias load from file
+        def raise_error():
+            raise RuntimeError
+
+        md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
+        ret2 = md0.forward_common_atomic(*args)
+        ret2 = cvt_ret(ret2)
+        for kk in ["foo", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], ret2[kk])
+
+        # 4. test change bias
+        BaseAtomicModel.change_out_bias(
+            md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic"
+        )
+        args = [
+            to_paddle_tensor(ii)
+            for ii in [
+                self.coord_ext,
+                to_numpy_array(self.merged_output_stat[0]["atype_ext"]),
+                self.nlist,
+            ]
+        ]
+        ret3 = md0.forward_common_atomic(*args)
+        ret3 = cvt_ret(ret3)
+        expected_ret3 = {}
+        # new bias [2, -5]
+        expected_ret3["foo"] = np.array([[3, 4, -2], [6, 0, 1]]).reshape(2, 3, 1)
+        for kk in ["foo"]:
+            np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4)
diff --git a/source/tests/pd/model/test_atomic_model_global_stat.py b/source/tests/pd/model/test_atomic_model_global_stat.py
new file mode 100644
index 0000000000..abd7928a0f
--- /dev/null
+++ b/source/tests/pd/model/test_atomic_model_global_stat.py
@@ -0,0 +1,510 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import h5py
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.atomic_model import DPAtomicModel as DPDPAtomicModel
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.atomic_model import (
+    BaseAtomicModel,
+    DPAtomicModel,
+)
+from deepmd.pd.model.descriptor import (
+    DescrptDPA1,
+    DescrptSeA,
+)
+from deepmd.pd.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class FooFitting(paddle.nn.Layer, BaseFitting):
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "foo",
+                    [1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+                OutputVariableDef(
+                    "pix",
+                    [1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+                OutputVariableDef(
+                    "bar",
+                    [1, 2],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        raise NotImplementedError
+
+    def get_type_map(self) -> list[str]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nf, nloc, _ = descriptor.shape
+        ret = {}
+        ret["foo"] = (
+            paddle.to_tensor(
+                [
+                    [1.0, 2.0, 3.0],
+                    [4.0, 5.0, 6.0],
+                ]
+            )
+            .reshape([nf, nloc] + self.output_def()["foo"].shape)  # noqa: RUF005
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+        ret["pix"] = (
+            paddle.to_tensor(
+                [
+                    [3.0, 2.0, 1.0],
+                    [6.0, 5.0, 4.0],
+                ]
+            )
+            .reshape([nf, nloc] + self.output_def()["pix"].shape)  # noqa: RUF005
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+        ret["bar"] = (
+            paddle.to_tensor(
+                [
+                    [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                    [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+                ]
+            )
+            .reshape([nf, nloc] + self.output_def()["bar"].shape)  # noqa: RUF005
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+        return ret
+
+
+class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        nf, nloc, nnei = self.nlist.shape
+        self.merged_output_stat = [
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 1, 3
+                "foo": to_paddle_tensor(np.array([5.0, 7.0]).reshape(2, 1)),
+                # no bias of pix
+                # bias of bar: [1, 5], [3, 2]
+                "bar": to_paddle_tensor(
+                    np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2)
+                ),
+                "find_foo": np.float32(1.0),
+                "find_bar": np.float32(1.0),
+            }
+        ]
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def test_output_stat(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+
+        expected_ret0 = {}
+        expected_ret0["foo"] = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["foo"].shape)  # noqa: RUF005
+        expected_ret0["pix"] = np.array(
+            [
+                [3.0, 2.0, 1.0],
+                [6.0, 5.0, 4.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["pix"].shape)  # noqa: RUF005
+        expected_ret0["bar"] = np.array(
+            [
+                [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["bar"].shape)  # noqa: RUF005
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk])
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+        expected_std = np.ones((3, 2, 2))  # 3 keys, 2 atypes, 2 max dims.
+        # nt x odim
+        foo_bias = np.array([1.0, 3.0]).reshape(2, 1)
+        bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2)
+        expected_ret1 = {}
+        expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
+        expected_ret1["pix"] = ret0["pix"]
+        expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
+
+        # 3. test bias load from file
+        def raise_error():
+            raise RuntimeError
+
+        md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
+        ret2 = md0.forward_common_atomic(*args)
+        ret2 = cvt_ret(ret2)
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], ret2[kk])
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
+
+        # 4. test change bias
+        BaseAtomicModel.change_out_bias(
+            md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic"
+        )
+        args = [
+            to_paddle_tensor(ii)
+            for ii in [
+                self.coord_ext,
+                to_numpy_array(self.merged_output_stat[0]["atype_ext"]),
+                self.nlist,
+            ]
+        ]
+        ret3 = md0.forward_common_atomic(*args)
+        ret3 = cvt_ret(ret3)
+        ## model output on foo: [[2, 3, 6], [5, 8, 9]] given bias [1, 3]
+        ## foo sumed: [11, 22] compared with [5, 7], fit target is [-6, -15]
+        ## fit bias is [1, -8]
+        ## old bias + fit bias [2, -5]
+        ## new model output is [[3, 4, -2], [6, 0, 1]], which sumed to [5, 7]
+        expected_ret3 = {}
+        expected_ret3["foo"] = np.array([[3, 4, -2], [6, 0, 1]]).reshape(2, 3, 1)
+        expected_ret3["pix"] = ret0["pix"]
+        for kk in ["foo", "pix"]:
+            np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk])
+        # bar is too complicated to be manually computed.
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
+
+    def test_preset_bias(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        preset_out_bias = {
+            # "foo": np.array(3.0, 2.0]).reshape(2, 1),
+            "foo": [None, 2],
+            "bar": np.array([7.0, 5.0, 13.0, 11.0]).reshape(2, 1, 2),
+        }
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+            preset_out_bias=preset_out_bias,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        expected_ret0 = {}
+        expected_ret0["foo"] = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["foo"].shape)  # noqa: RUF005
+        expected_ret0["pix"] = np.array(
+            [
+                [3.0, 2.0, 1.0],
+                [6.0, 5.0, 4.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["pix"].shape)  # noqa: RUF005
+        expected_ret0["bar"] = np.array(
+            [
+                [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["bar"].shape)  # noqa: RUF005
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk])
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+        # foo sums: [5, 7],
+        # given bias of type 1 being 2, the bias left for type 0 is [5-2*1, 7-2*2] = [3,3]
+        # the solution of type 0 is 1.8
+        foo_bias = np.array([1.8, preset_out_bias["foo"][1]]).reshape(2, 1)
+        bar_bias = preset_out_bias["bar"]
+        expected_ret1 = {}
+        expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
+        expected_ret1["pix"] = ret0["pix"]
+        expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+
+        # 3. test bias load from file
+        def raise_error():
+            raise RuntimeError
+
+        md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
+        ret2 = md0.forward_common_atomic(*args)
+        ret2 = cvt_ret(ret2)
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], ret2[kk])
+
+        # 4. test change bias
+        BaseAtomicModel.change_out_bias(
+            md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic"
+        )
+        args = [
+            to_paddle_tensor(ii)
+            for ii in [
+                self.coord_ext,
+                to_numpy_array(self.merged_output_stat[0]["atype_ext"]),
+                self.nlist,
+            ]
+        ]
+        ret3 = md0.forward_common_atomic(*args)
+        ret3 = cvt_ret(ret3)
+        ## model output on foo: [[2.8, 3.8, 5], [5.8, 7., 8.]] given bias [1.8, 2]
+        ## foo sumed: [11.6, 20.8] compared with [5, 7], fit target is [-6.6, -13.8]
+        ## fit bias is [-7, 2] (2 is assigned. -7 is fit to [-8.6, -17.8])
+        ## old bias[1.8,2] + fit bias[-7, 2] = [-5.2, 4]
+        ## new model output is [[-4.2, -3.2, 7], [-1.2, 9, 10]]
+        expected_ret3 = {}
+        expected_ret3["foo"] = np.array([[-4.2, -3.2, 7.0], [-1.2, 9.0, 10.0]]).reshape(
+            2, 3, 1
+        )
+        expected_ret3["pix"] = ret0["pix"]
+        for kk in ["foo", "pix"]:
+            np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk])
+        # bar is too complicated to be manually computed.
+
+    def test_preset_bias_all_none(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        preset_out_bias = {
+            "foo": [None, None],
+        }
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+            preset_out_bias=preset_out_bias,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        expected_ret0 = {}
+        expected_ret0["foo"] = np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["foo"].shape)  # noqa: RUF005
+        expected_ret0["pix"] = np.array(
+            [
+                [3.0, 2.0, 1.0],
+                [6.0, 5.0, 4.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["pix"].shape)  # noqa: RUF005
+        expected_ret0["bar"] = np.array(
+            [
+                [1.0, 2.0, 3.0, 7.0, 8.0, 9.0],
+                [4.0, 5.0, 6.0, 10.0, 11.0, 12.0],
+            ]
+        ).reshape([nf, nloc] + md0.fitting_output_def()["bar"].shape)  # noqa: RUF005
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk])
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+        # nt x odim
+        foo_bias = np.array([1.0, 3.0]).reshape(2, 1)
+        bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2)
+        expected_ret1 = {}
+        expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
+        expected_ret1["pix"] = ret0["pix"]
+        expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
+        for kk in ["foo", "pix", "bar"]:
+            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+
+    def test_serialize(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "foo",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["A", "B"]
+        md0 = DPAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        md1 = DPAtomicModel.deserialize(md0.serialize())
+        ret1 = md1.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+
+        for kk in ["foo"]:
+            np.testing.assert_almost_equal(ret0[kk], ret1[kk])
+
+        md2 = DPDPAtomicModel.deserialize(md0.serialize())
+        args = [self.coord_ext, self.atype_ext, self.nlist]
+        ret2 = md2.forward_common_atomic(*args)
+        for kk in ["foo"]:
+            np.testing.assert_almost_equal(ret0[kk], ret2[kk])
diff --git a/source/tests/pd/model/test_autodiff.py b/source/tests/pd/model/test_autodiff.py
new file mode 100644
index 0000000000..7554bd241f
--- /dev/null
+++ b/source/tests/pd/model/test_autodiff.py
@@ -0,0 +1,253 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+dtype = paddle.float64
+
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+
+# from deepmd-kit repo
+def finite_difference(f, x, delta=1e-6):
+    in_shape = x.shape
+    y0 = f(x)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape)
+    for idx in np.ndindex(*in_shape):
+        diff = np.zeros(in_shape)
+        diff[idx] += delta
+        y1p = f(x + diff)
+        y1n = f(x - diff)
+        res[(Ellipsis, *idx)] = (y1p - y1n) / (2 * delta)
+    return res
+
+
+def stretch_box(old_coord, old_box, new_box):
+    ocoord = old_coord.reshape(-1, 3)
+    obox = old_box.reshape(3, 3)
+    nbox = new_box.reshape(3, 3)
+    ncoord = ocoord @ np.linalg.inv(obox) @ nbox
+    return ncoord.reshape(old_coord.shape)
+
+
+class ForceTest:
+    def test(
+        self,
+    ):
+        places = 5
+        delta = 1e-5
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device="cpu")
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device="cpu")
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device="cpu")
+        coord = paddle.matmul(coord, cell)
+        spin = paddle.rand([natoms, 3], dtype=dtype).to(device="cpu")
+        atype = paddle.to_tensor([0, 0, 0, 1, 1])
+        # assumes input to be numpy tensor
+        coord = coord.numpy()
+        spin = spin.numpy()
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+
+        def np_infer_coord(
+            coord,
+        ):
+            result = eval_model(
+                self.model,
+                paddle.to_tensor(coord).to(device=env.DEVICE).unsqueeze(0),
+                cell.unsqueeze(0),
+                atype,
+                spins=paddle.to_tensor(spin).to(device=env.DEVICE).unsqueeze(0),
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            return ret
+
+        def np_infer_spin(
+            spin,
+        ):
+            result = eval_model(
+                self.model,
+                paddle.to_tensor(coord).to(device=env.DEVICE).unsqueeze(0),
+                cell.unsqueeze(0),
+                atype,
+                spins=paddle.to_tensor(spin).to(device=env.DEVICE).unsqueeze(0),
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            return ret
+
+        def ff_coord(_coord):
+            return np_infer_coord(_coord)["energy"]
+
+        def ff_spin(_spin):
+            return np_infer_spin(_spin)["energy"]
+
+        if not test_spin:
+            fdf = -finite_difference(ff_coord, coord, delta=delta).squeeze()
+            rff = np_infer_coord(coord)["force"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+        else:
+            # real force
+            fdf = -finite_difference(ff_coord, coord, delta=delta).squeeze()
+            rff = np_infer_coord(coord)["force"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+            # magnetic force
+            fdf = -finite_difference(ff_spin, spin, delta=delta).squeeze()
+            rff = np_infer_spin(spin)["force_mag"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+
+
+class VirialTest:
+    def test(
+        self,
+    ):
+        places = 5
+        delta = 1e-4
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device="cpu")
+        cell = (cell) + 5.0 * paddle.eye(3).to(device="cpu")
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device="cpu")
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1])
+        # assumes input to be numpy tensor
+        coord = coord.numpy()
+        cell = cell.numpy()
+        test_keys = ["energy", "force", "virial"]
+
+        def np_infer(
+            new_cell,
+        ):
+            result = eval_model(
+                self.model,
+                paddle.to_tensor(stretch_box(coord, cell, new_cell))
+                .to(device="cpu")
+                .unsqueeze(0),
+                paddle.to_tensor(new_cell).to(device="cpu").unsqueeze(0),
+                atype,
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            # detach
+            return ret
+
+        def ff(bb):
+            return np_infer(bb)["energy"]
+
+        fdv = (
+            -(finite_difference(ff, cell, delta=delta).transpose([0, 2, 1]) @ cell)
+            .squeeze()
+            .reshape([9])
+        )
+        rfv = np_infer(cell)["virial"]
+        np.testing.assert_almost_equal(fdv, rfv, decimal=places)
+
+
+class TestEnergyModelSeAForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSeAVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1Force(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1Virial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2Force(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPAUniVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybridForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybridVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBLForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBLVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeAForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pd/model/test_deeppot.py b/source/tests/pd/model/test_deeppot.py
new file mode 100644
index 0000000000..56620174d3
--- /dev/null
+++ b/source/tests/pd/model/test_deeppot.py
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from argparse import (
+    Namespace,
+)
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.infer.deep_pot import DeepPot as DeepPotUni
+from deepmd.pd.entrypoints.main import (
+    freeze,
+    get_trainer,
+)
+from deepmd.pd.infer.deep_eval import (
+    DeepPot,
+)
+
+
+class TestDeepPot(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        paddle.set_device(device)
+        trainer.wrapper(**input_dict, label=label_dict, cur_lr=1.0)
+        self.model = "model.pd"
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+
+    @unittest.skip("Paddle do not eval on frozen model yet.")
+    def test_dp_test(self):
+        dp = DeepPot(str(self.model))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret = dp.eval(coord, cell, atype, atomic=True)
+        e, f, v, ae, av = ret[0], ret[1], ret[2], ret[3], ret[4]
+        self.assertEqual(e.shape, (1, 1))
+        self.assertEqual(f.shape, (1, 5, 3))
+        self.assertEqual(v.shape, (1, 9))
+        self.assertEqual(ae.shape, (1, 5, 1))
+        self.assertEqual(av.shape, (1, 5, 9))
+
+        self.assertEqual(dp.get_type_map(), ["O", "H"])
+        self.assertEqual(dp.get_ntypes(), 2)
+        self.assertEqual(dp.get_dim_fparam(), 0)
+        self.assertEqual(dp.get_dim_aparam(), 0)
+        self.assertEqual(dp.deep_eval.model_type, DeepPot)
+
+    def test_uni(self):
+        dp = DeepPotUni("model.pd")
+        self.assertIsInstance(dp, DeepPot)
+        # its methods has been tested in test_dp_test
+
+
+class TestDeepPotFrozen(TestDeepPot):
+    def setUp(self):
+        super().setUp()
+        frozen_model = "frozen_model.json"
+        ns = Namespace(
+            model=self.model,
+            output=frozen_model,
+            head=None,
+        )
+        freeze(ns)
+        self.model = frozen_model
+
+    # Note: this can not actually disable cuda device to be used
+    # only can be used to test whether devices are mismatched
+    @unittest.skipIf(not (paddle.device.cuda.device_count() > 0), "CUDA not available")
+    @unittest.mock.patch("deepmd.pd.utils.env.DEVICE", "cpu")
+    @unittest.mock.patch("deepmd.pd.infer.deep_eval.DEVICE", "cpu")
+    def test_dp_test_cpu(self):
+        self.test_dp_test()
+
+
+# TestFparamAparamPT: moved to infer/test_models.py
diff --git a/source/tests/pd/model/test_descriptor.py b/source/tests/pd/model/test_descriptor.py
new file mode 100644
index 0000000000..386c68595b
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+
+import numpy as np
+import paddle
+import tensorflow.compat.v1 as tf
+
+tf.disable_eager_execution()
+
+import json
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.model.descriptor import (
+    prod_env_mat,
+)
+from deepmd.pd.utils import (
+    decomp,
+    dp_random,
+    env,
+)
+from deepmd.pd.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_PD_FLOAT_PRECISION,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.env import (
+    op_module,
+)
+
+from ..test_finetune import (
+    energy_data_requirement,
+)
+from .test_embedding_net import (
+    get_single_batch,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def base_se_a(rcut, rcut_smth, sel, batch, mean, stddev):
+    g = tf.Graph()
+    with g.as_default():
+        coord = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        box = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        atype = tf.placeholder(tf.int32, [None, None])
+        natoms_vec = tf.placeholder(tf.int32, [None])
+        default_mesh = tf.placeholder(tf.int32, [None])
+        stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+            coord,
+            atype,
+            natoms_vec,
+            box,
+            default_mesh,
+            tf.constant(mean),
+            tf.constant(stddev),
+            rcut_a=-1.0,
+            rcut_r=rcut,
+            rcut_r_smth=rcut_smth,
+            sel_a=sel,
+            sel_r=[0 for i in sel],
+        )
+
+        net_deriv_reshape = tf.ones_like(stat_descrpt)
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            natoms_vec,
+            n_a_sel=sum(sel),
+            n_r_sel=0,
+        )
+
+    with tf.Session(graph=g) as sess:
+        y = sess.run(
+            [stat_descrpt, force, nlist],
+            feed_dict={
+                coord: batch["coord"],
+                box: batch["box"],
+                natoms_vec: batch["natoms"],
+                atype: batch["atype"],
+                default_mesh: np.array([0, 0, 0, 2, 2, 2]),
+            },
+        )
+    tf.reset_default_graph()
+    return y
+
+
+class TestSeA(unittest.TestCase):
+    def setUp(self):
+        dp_random.seed(20)
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.bsz = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        ds = DeepmdDataSetForLoader(
+            self.systems[0],
+            model_config["type_map"],
+        )
+        ds.add_data_requirement(energy_data_requirement)
+        self.np_batch, self.pt_batch = get_single_batch(ds)
+        self.sec = np.cumsum(self.sel)
+        self.ntypes = len(self.sel)
+        self.nnei = sum(self.sel)
+
+    def test_consistency(self):
+        avg_zero = paddle.zeros(
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+        ).to(device=env.DEVICE)
+        std_ones = paddle.ones(
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
+        ).to(device=env.DEVICE)
+        base_d, base_force, base_nlist = base_se_a(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            batch=self.np_batch,
+            mean=avg_zero.detach().cpu(),
+            stddev=std_ones.detach().cpu(),
+        )
+
+        pt_coord = self.pt_batch["coord"].to(env.DEVICE)
+        atype = self.pt_batch["atype"].to(env.DEVICE)
+        pt_coord.stop_gradient = False
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            pt_coord,
+            self.pt_batch["atype"].to(env.DEVICE),
+            self.rcut,
+            self.sel,
+            mixed_types=False,
+            box=self.pt_batch["box"].to(env.DEVICE),
+        )
+        my_d, _, _ = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE),
+            std_ones.reshape([-1, self.nnei, 4]).to(DEVICE),
+            self.rcut,
+            self.rcut_smth,
+        )
+        my_d.sum().backward()
+        bsz = pt_coord.shape[0]
+        my_force = pt_coord.grad.reshape([bsz, -1, 3]).cpu().detach().numpy()
+        base_force = base_force.reshape(bsz, -1, 3)
+        base_d = base_d.reshape(bsz, -1, self.nnei, 4)
+        my_d = my_d.reshape([bsz, -1, self.nnei, 4]).cpu().detach().numpy()
+        base_nlist = base_nlist.reshape(bsz, -1, self.nnei)
+
+        mapping = mapping.cpu()
+        my_nlist = nlist.reshape([bsz, -1]).cpu()
+        mask = my_nlist == -1
+        my_nlist = my_nlist * (~mask).astype(my_nlist.dtype)
+        my_nlist = decomp.take_along_axis(mapping, axis=-1, indices=my_nlist)
+        my_nlist = my_nlist * (~mask).astype(my_nlist.dtype) - mask.astype(
+            my_nlist.dtype
+        )
+        my_nlist = my_nlist.cpu().reshape([bsz, -1, self.nnei]).numpy()
+        self.assertTrue(np.allclose(base_nlist, my_nlist))
+        self.assertTrue(np.allclose(np.mean(base_d, axis=2), np.mean(my_d, axis=2)))
+        self.assertTrue(np.allclose(np.std(base_d, axis=2), np.std(my_d, axis=2)))
+        # descriptors may be different when there are multiple neighbors in the same distance
+        self.assertTrue(np.allclose(base_force, -my_force))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_descriptor_dpa1.py b/source/tests/pd/model/test_descriptor_dpa1.py
new file mode 100644
index 0000000000..195495c81c
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor_dpa1.py
@@ -0,0 +1,382 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.descriptor import (
+    DescrptBlockSeAtten,
+    DescrptDPA1,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestDPA1(unittest.TestCase):
+    def setUp(self):
+        cell = [
+            5.122106549439247480e00,
+            4.016537340154059388e-01,
+            6.951654033828678081e-01,
+            4.016537340154059388e-01,
+            6.112136112297989143e00,
+            8.178091365465004481e-01,
+            6.951654033828678081e-01,
+            8.178091365465004481e-01,
+            6.159552512682983760e00,
+        ]
+        self.cell = (
+            paddle.to_tensor(
+                cell,
+                dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            )
+            .to(device=env.DEVICE)
+            .reshape([1, 3, 3])
+        )
+        coord = [
+            2.978060152121375648e00,
+            3.588469695887098077e00,
+            2.792459820604495491e00,
+            3.895592322591093115e00,
+            2.712091020667753760e00,
+            1.366836847133650501e00,
+            9.955616170888935690e-01,
+            4.121324820711413039e00,
+            1.817239061889086571e00,
+            3.553661462345699906e00,
+            5.313046969500791583e00,
+            6.635182659098815883e00,
+            6.088601018589653080e00,
+            6.575011420004332585e00,
+            6.825240650611076099e00,
+        ]
+        self.coord = (
+            paddle.to_tensor(coord, dtype=env.GLOBAL_PD_FLOAT_PRECISION)
+            .reshape([1, -1, 3])
+            .to(device=env.DEVICE)
+        )
+        self.atype = (
+            paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32)
+            .reshape([1, -1])
+            .to(device=env.DEVICE)
+        )
+        self.ref_d = paddle.to_tensor(
+            [
+                8.382518544113587780e-03,
+                -3.390120566088597812e-03,
+                6.145981571114964362e-03,
+                -4.880300873973819273e-03,
+                -3.390120566088597812e-03,
+                1.372540996564941464e-03,
+                -2.484163690574096341e-03,
+                1.972313058658722688e-03,
+                6.145981571114964362e-03,
+                -2.484163690574096341e-03,
+                4.507748738021747671e-03,
+                -3.579717194906019764e-03,
+                -4.880300873973819273e-03,
+                1.972313058658722688e-03,
+                -3.579717194906019764e-03,
+                2.842794615687799838e-03,
+                6.733043802494966066e-04,
+                -2.721540313345096771e-04,
+                4.936158526085561134e-04,
+                -3.919743287822345223e-04,
+                -1.311123004527576900e-02,
+                5.301179352601203924e-03,
+                -9.614612349318877454e-03,
+                7.634884975521277241e-03,
+                8.877088452901006621e-03,
+                -3.590945566653638409e-03,
+                6.508042782015627942e-03,
+                -5.167671664327699171e-03,
+                -2.697241463040870365e-03,
+                1.091350446825975137e-03,
+                -1.976895708961905022e-03,
+                1.569671412121975348e-03,
+                8.645131636261189911e-03,
+                -3.557395265621639355e-03,
+                6.298048561552698106e-03,
+                -4.999272007935521948e-03,
+                -3.557395265621639355e-03,
+                1.467866637220284964e-03,
+                -2.587004431651147504e-03,
+                2.052752235601402672e-03,
+                6.298048561552698106e-03,
+                -2.587004431651147504e-03,
+                4.594085551315935101e-03,
+                -3.647656549789176847e-03,
+                -4.999272007935521948e-03,
+                2.052752235601402672e-03,
+                -3.647656549789176847e-03,
+                2.896359275520481256e-03,
+                6.689620176492027878e-04,
+                -2.753606422414641049e-04,
+                4.864958810186969444e-04,
+                -3.860599754167503119e-04,
+                -1.349238259226558101e-02,
+                5.547478630961994242e-03,
+                -9.835472300819447095e-03,
+                7.808197926069362048e-03,
+                9.220744348752592245e-03,
+                -3.795799103392961601e-03,
+                6.716516319358462918e-03,
+                -5.331265718473574867e-03,
+                -2.783836698392940304e-03,
+                1.147461939123531121e-03,
+                -2.025013030986024063e-03,
+                1.606944814423778541e-03,
+                9.280385723343491378e-03,
+                -3.515852178447095942e-03,
+                7.085282215778941628e-03,
+                -5.675852414643783178e-03,
+                -3.515852178447095942e-03,
+                1.337760635271160884e-03,
+                -2.679428786337713451e-03,
+                2.145400621815936413e-03,
+                7.085282215778941628e-03,
+                -2.679428786337713451e-03,
+                5.414439648102228192e-03,
+                -4.338426468139268931e-03,
+                -5.675852414643783178e-03,
+                2.145400621815936413e-03,
+                -4.338426468139268931e-03,
+                3.476467482674507146e-03,
+                7.166961981167455130e-04,
+                -2.697932188839837972e-04,
+                5.474643906631899504e-04,
+                -4.386556623669893621e-04,
+                -1.480434821331240956e-02,
+                5.604647062899507579e-03,
+                -1.130745349141585449e-02,
+                9.059113563516829268e-03,
+                9.758791063112262978e-03,
+                -3.701477720487638626e-03,
+                7.448215522796466058e-03,
+                -5.966057584545172120e-03,
+                -2.845102393948158344e-03,
+                1.078743584169829543e-03,
+                -2.170093031447992756e-03,
+                1.738010461687942770e-03,
+                9.867599071916231118e-03,
+                -3.811041717688905522e-03,
+                7.121877634386481262e-03,
+                -5.703120290113914553e-03,
+                -3.811041717688905522e-03,
+                1.474046183772771213e-03,
+                -2.747386907428428938e-03,
+                2.199711055637492037e-03,
+                7.121877634386481262e-03,
+                -2.747386907428428938e-03,
+                5.145050639440944609e-03,
+                -4.120642824501622239e-03,
+                -5.703120290113914553e-03,
+                2.199711055637492037e-03,
+                -4.120642824501622239e-03,
+                3.300262321758350853e-03,
+                1.370499995344566383e-03,
+                -5.313041843655797901e-04,
+                9.860110343046961986e-04,
+                -7.892505817954784597e-04,
+                -1.507686316307561489e-02,
+                5.818961290579217904e-03,
+                -1.088774506142304276e-02,
+                8.719460408506790952e-03,
+                9.764630842803939323e-03,
+                -3.770134041110058572e-03,
+                7.049438389985595785e-03,
+                -5.645302934019884485e-03,
+                -3.533582373572779437e-03,
+                1.367148320603491559e-03,
+                -2.546602904764623705e-03,
+                2.038882844528267305e-03,
+                7.448297038731285964e-03,
+                -2.924276815200288742e-03,
+                5.355960540523636154e-03,
+                -4.280386435083473329e-03,
+                -2.924276815200288742e-03,
+                1.150311064893848757e-03,
+                -2.100635980860638373e-03,
+                1.678427895009850001e-03,
+                5.355960540523636154e-03,
+                -2.100635980860638373e-03,
+                3.853607053247790071e-03,
+                -3.080076301871465493e-03,
+                -4.280386435083473329e-03,
+                1.678427895009850001e-03,
+                -3.080076301871465493e-03,
+                2.461876613756722523e-03,
+                9.730712866459405395e-04,
+                -3.821759579990726546e-04,
+                6.994242056622360787e-04,
+                -5.589662297882965055e-04,
+                -1.138916742131982317e-02,
+                4.469391132927387489e-03,
+                -8.192016282448397885e-03,
+                6.547234460517113892e-03,
+                7.460070829043288082e-03,
+                -2.929867802018087421e-03,
+                5.363646855497249989e-03,
+                -4.286347242903034739e-03,
+                -2.643569023340565718e-03,
+                1.038826463247002245e-03,
+                -1.899910089750410976e-03,
+                1.518237240362583541e-03,
+            ],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        ).to(device=env.DEVICE)
+        with open(Path(CUR_DIR) / "models" / "dpa1.json") as fp:
+            self.model_json = json.load(fp)
+        self.file_model_param = Path(CUR_DIR) / "models" / "dpa1.pd"
+        self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pd"
+
+    def test_descriptor_block(self):
+        # paddle.seed(0)
+        model_dpa1 = self.model_json
+        dparams = model_dpa1["descriptor"]
+        ntypes = len(model_dpa1["type_map"])
+        assert "se_atten" == dparams.pop("type")
+        dparams["ntypes"] = ntypes
+        des = DescrptBlockSeAtten(
+            **dparams,
+        ).to(env.DEVICE)
+        des.set_state_dict(paddle.load(str(self.file_model_param)))
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        # handel type_embedding
+        type_embedding = TypeEmbedNet(ntypes, 8, use_tebd_bias=True).to(env.DEVICE)
+        type_embedding.set_state_dict(paddle.load(str(self.file_type_embed)))
+
+        ## to save model parameters
+        # paddle.save(des.state_dict(), 'model_weights.pd')
+        # paddle.save(type_embedding.state_dict(), 'model_weights.pd')
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            nlist,
+            extended_coord,
+            extended_atype,
+            type_embedding(extended_atype),
+            mapping=None,
+        )
+        # np.savetxt('tmp.out', descriptor.detach().numpy().reshape(1,-1), delimiter=",")
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        np.testing.assert_allclose(
+            descriptor.reshape([-1]).numpy(), self.ref_d.numpy(), atol=1e-10, rtol=1e-10
+        )
+
+    def test_descriptor(self):
+        with open(Path(CUR_DIR) / "models" / "dpa1.json") as fp:
+            self.model_json = json.load(fp)
+        model_dpa2 = self.model_json
+        ntypes = len(model_dpa2["type_map"])
+        dparams = model_dpa2["descriptor"]
+        dparams["ntypes"] = ntypes
+        assert dparams.pop("type") == "se_atten"
+        dparams["concat_output_tebd"] = False
+        dparams["use_tebd_bias"] = True
+        des = DescrptDPA1(
+            **dparams,
+        ).to(env.DEVICE)
+        target_dict = des.state_dict()
+        source_dict = paddle.load(str(self.file_model_param))
+        type_embd_dict = paddle.load(str(self.file_type_embed))
+        target_dict = translate_se_atten_and_type_embd_dicts_to_dpa1(
+            target_dict,
+            source_dict,
+            type_embd_dict,
+        )
+        des.set_state_dict(target_dict)
+
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        np.testing.assert_allclose(
+            descriptor.reshape([-1]).numpy(), self.ref_d.numpy(), atol=1e-10, rtol=1e-10
+        )
+
+        dparams["concat_output_tebd"] = True
+        des = DescrptDPA1(
+            **dparams,
+        ).to(env.DEVICE)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+
+
+def translate_se_atten_and_type_embd_dicts_to_dpa1(
+    target_dict,
+    source_dict,
+    type_embd_dict,
+):
+    all_keys = list(target_dict.keys())
+    record = [False for ii in all_keys]
+    for kk, vv in source_dict.items():
+        tk = "se_atten." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = vv
+    assert len(type_embd_dict.keys()) == 2
+    it = iter(type_embd_dict.keys())
+    for _ in range(2):
+        kk = next(it)
+        tk = "type_embedding." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = type_embd_dict[kk]
+    assert all(record)
+    return target_dict
diff --git a/source/tests/pd/model/test_descriptor_dpa2.py b/source/tests/pd/model/test_descriptor_dpa2.py
new file mode 100644
index 0000000000..0a3c2844a3
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor_dpa2.py
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.descriptor import (
+    DescrptDPA2,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestDPA2(unittest.TestCase):
+    def setUp(self):
+        cell = [
+            5.122106549439247480e00,
+            4.016537340154059388e-01,
+            6.951654033828678081e-01,
+            4.016537340154059388e-01,
+            6.112136112297989143e00,
+            8.178091365465004481e-01,
+            6.951654033828678081e-01,
+            8.178091365465004481e-01,
+            6.159552512682983760e00,
+        ]
+        self.cell = (
+            paddle.to_tensor(cell, dtype=env.GLOBAL_PD_FLOAT_PRECISION)
+            .reshape([1, 3, 3])
+            .to(device=env.DEVICE)
+        )
+        coord = [
+            2.978060152121375648e00,
+            3.588469695887098077e00,
+            2.792459820604495491e00,
+            3.895592322591093115e00,
+            2.712091020667753760e00,
+            1.366836847133650501e00,
+            9.955616170888935690e-01,
+            4.121324820711413039e00,
+            1.817239061889086571e00,
+            3.553661462345699906e00,
+            5.313046969500791583e00,
+            6.635182659098815883e00,
+            6.088601018589653080e00,
+            6.575011420004332585e00,
+            6.825240650611076099e00,
+        ]
+        self.coord = (
+            paddle.to_tensor(coord, dtype=env.GLOBAL_PD_FLOAT_PRECISION)
+            .reshape([1, -1, 3])
+            .to(device=env.DEVICE)
+        )
+        self.atype = (
+            paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32)
+            .reshape([1, -1])
+            .to(device=env.DEVICE)
+        )
+        self.ref_d = paddle.to_tensor(
+            [
+                8.435412613327306630e-01,
+                -4.717109614540972440e-01,
+                -1.812643456954206256e00,
+                -2.315248767961955167e-01,
+                -7.112973006771171613e-01,
+                -4.162041919507591392e-01,
+                -1.505159810095323181e00,
+                -1.191652416985768403e-01,
+                8.439214937875325617e-01,
+                -4.712976890460106594e-01,
+                -1.812605149396642856e00,
+                -2.307222236291133766e-01,
+                -7.115427800870099961e-01,
+                -4.164729253167227530e-01,
+                -1.505483119125936797e00,
+                -1.191288524278367872e-01,
+                8.286420823261241297e-01,
+                -4.535033763979030574e-01,
+                -1.787877160970498425e00,
+                -1.961763875645104460e-01,
+                -7.475459187804838201e-01,
+                -5.231446874663764346e-01,
+                -1.488399984491664219e00,
+                -3.974117581747104583e-02,
+                8.283793431613817315e-01,
+                -4.551551577556525729e-01,
+                -1.789253136645859943e00,
+                -1.977673627726055372e-01,
+                -7.448826048241211639e-01,
+                -5.161350182531234676e-01,
+                -1.487589463573479209e00,
+                -4.377376017839779143e-02,
+                8.295404560710329944e-01,
+                -4.492219258475603216e-01,
+                -1.784484611185287450e00,
+                -1.901182059718481143e-01,
+                -7.537407667483000395e-01,
+                -5.384371277650709109e-01,
+                -1.490368056268364549e00,
+                -3.073744832541754762e-02,
+            ],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+        ).to(device=env.DEVICE)
+        self.file_model_param = Path(CUR_DIR) / "models" / "dpa2.pd"
+        self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pd"
+
+    def test_descriptor(self):
+        with open(Path(CUR_DIR) / "models" / "dpa2.json") as fp:
+            self.model_json = json.load(fp)
+        model_dpa2 = self.model_json
+        ntypes = len(model_dpa2["type_map"])
+        dparams = model_dpa2["descriptor"]
+        dparams["ntypes"] = ntypes
+        assert dparams.pop("type") == "dpa2"
+        dparams["concat_output_tebd"] = False
+        dparams["use_tebd_bias"] = True
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        target_dict = des.state_dict()
+        source_dict = paddle.load(str(self.file_model_param))
+        # type_embd of repformer is removed
+        source_dict.pop("type_embedding.embedding.embedding_net.layers.0.bias")
+        type_embd_dict = paddle.load(str(self.file_type_embed))
+        target_dict = translate_type_embd_dicts_to_dpa2(
+            target_dict,
+            source_dict,
+            type_embd_dict,
+        )
+        des.set_state_dict(target_dict)
+
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        np.testing.assert_allclose(
+            descriptor.reshape([-1]).numpy(), self.ref_d.numpy(), atol=1e-10, rtol=1e-10
+        )
+
+        dparams["concat_output_tebd"] = True
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+
+
+def translate_type_embd_dicts_to_dpa2(
+    target_dict,
+    source_dict,
+    type_embd_dict,
+):
+    all_keys = list(target_dict.keys())
+    record = [False for ii in all_keys]
+    for kk, vv in source_dict.items():
+        record[all_keys.index(kk)] = True
+        target_dict[kk] = vv
+    assert len(type_embd_dict.keys()) == 2
+    it = iter(type_embd_dict.keys())
+    for _ in range(2):
+        kk = next(it)
+        tk = "type_embedding." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = type_embd_dict[kk]
+    assert all(record)
+    return target_dict
diff --git a/source/tests/pd/model/test_descriptor_hybrid.py b/source/tests/pd/model/test_descriptor_hybrid.py
new file mode 100644
index 0000000000..3830683de6
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor_hybrid.py
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.descriptor.hybrid import (
+    DescrptHybrid,
+)
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.descriptor.se_r import (
+    DescrptSeR,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_paddle_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDescrptHybrid(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_jit(
+        self,
+    ):
+        ddsub0 = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ddsub1 = DescrptSeR(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        dd0 = DescrptHybrid(list=[ddsub0, ddsub1])
+        dd1 = DescrptHybrid.deserialize(dd0.serialize())
+        dd0 = paddle.jit.to_static(dd0)
+        dd1 = paddle.jit.to_static(dd1)
+
+    def test_get_parameters(
+        self,
+    ):
+        nf, nloc, nnei = self.nlist.shape
+        ddsub0 = DescrptSeA(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+        )
+        ddsub1 = DescrptDPA1(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=np.sum(self.sel).item() - 1,
+            ntypes=len(self.sel),
+        )
+        ddsub2 = DescrptSeR(
+            rcut=self.rcut / 2,
+            rcut_smth=self.rcut_smth - 0.1,
+            sel=[3, 1],
+        )
+        em0 = DescrptHybrid(list=[ddsub0, ddsub1, ddsub2])
+        self.assertAlmostEqual(em0.get_env_protection(), 0.0)
+        self.assertAlmostEqual(em0.get_rcut_smth(), self.rcut_smth - 0.1)
+        ddsub3 = DescrptSeR(
+            rcut=self.rcut / 2,
+            rcut_smth=self.rcut_smth - 0.1,
+            sel=[3, 1],
+            env_protection=0.1,
+        )
+        em0 = DescrptHybrid(list=[ddsub0, ddsub1, ddsub3])
+        with self.assertRaises(ValueError):
+            self.assertAlmostEqual(em0.get_env_protection(), 0.0)
+
+    def test_hybrid_mixed_and_no_mixed(self):
+        coord_ext = to_paddle_tensor(self.coord_ext)
+        atype_ext = to_paddle_tensor(self.atype_ext)
+        nlist1 = to_paddle_tensor(self.nlist)
+        nlist2 = to_paddle_tensor(-np.sort(-self.nlist, axis=-1))
+        ddsub0 = DescrptSeA(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+        )
+        ddsub1 = DescrptDPA1(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=np.sum(self.sel).item() - 1,
+            ntypes=len(self.sel),
+        )
+        ddsub2 = DescrptSeR(
+            rcut=self.rcut / 2,
+            rcut_smth=self.rcut_smth,
+            sel=[3, 1],
+        )
+        dd = DescrptHybrid(list=[ddsub0, ddsub1, ddsub2])
+        ret = dd(
+            coord_ext,
+            atype_ext,
+            nlist2,
+        )
+        ret0 = ddsub0(
+            coord_ext,
+            atype_ext,
+            nlist1,
+        )
+        ret1 = ddsub1(coord_ext, atype_ext, nlist2[:, :, :-1])
+        ret2 = ddsub2(coord_ext, atype_ext, nlist1[:, :, [0, 1, 2, self.sel[0]]])
+        np.testing.assert_allclose(
+            ret[0].numpy(),
+            paddle.concat([ret0[0], ret1[0], ret2[0]], axis=2).numpy(),
+        )
diff --git a/source/tests/pd/model/test_descriptor_se_r.py b/source/tests/pd/model/test_descriptor_se_r.py
new file mode 100644
index 0000000000..02a6199de9
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor_se_r.py
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor import DescrptSeR as DPDescrptSeR
+from deepmd.pd.model.descriptor.se_r import (
+    DescrptSeR,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+# to be merged with the tf test case
+class TestDescrptSeR(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, em in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+            [[], [[0, 1]], [[1, 1]]],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                exclude_mask=em,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            dd0.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            rd1, _, _, _, sw1 = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]],
+                rd0.detach().cpu().numpy()[1],
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeR.deserialize(dd0.serialize())
+            rd2, _, _, _, sw2 = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            for aa, bb in zip([rd1, sw1], [rd2, sw2]):
+                np.testing.assert_allclose(
+                    aa.detach().cpu().numpy(),
+                    bb,
+                    rtol=rtol,
+                    atol=atol,
+                    err_msg=err_msg,
+                )
+
+    def test_load_stat(self):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                seed=GLOBAL_SEED,
+            )
+            dd0.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            dd1.compute_input_stats(
+                [
+                    {
+                        "r0": None,
+                        "coord": paddle.to_tensor(self.coord_ext)
+                        .reshape([-1, self.nall, 3])
+                        .to(env.DEVICE),
+                        "atype": paddle.to_tensor(self.atype_ext).to(env.DEVICE),
+                        "box": None,
+                        "natoms": self.nall,
+                    }
+                ]
+            )
+
+            with self.assertRaises(ValueError) as cm:
+                ev = EnvMatStatSe(dd1)
+                ev.last_dim = 3
+                ev.load_or_compute_stats([])
+            self.assertEqual(
+                "last_dim should be 1 for raial-only or 4 for full descriptor.",
+                str(cm.exception),
+            )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                seed=GLOBAL_SEED,
+            )
+            dd0.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            paddle.jit.to_static(dd0)
+            paddle.jit.to_static(dd1)
diff --git a/source/tests/pd/model/test_dipole_fitting.py b/source/tests/pd/model/test_dipole_fitting.py
new file mode 100644
index 0000000000..37e257de0d
--- /dev/null
+++ b/source/tests/pd/model/test_dipole_fitting.py
@@ -0,0 +1,398 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import os
+import unittest
+
+import numpy as np
+import paddle
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.dpmodel.fitting import DipoleFitting as DPDipoleFitting
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.model.dipole_model import (
+    DipoleModel,
+)
+from deepmd.pd.model.task.dipole import (
+    DipoleFittingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+def finite_difference(f, x, a, delta=1e-6):
+    in_shape = x.shape
+    y0 = f(x, a)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape)
+    for idx in np.ndindex(*in_shape):
+        diff = np.zeros(in_shape)
+        diff[idx] += delta
+        y1p = f(x + diff, a)
+        y1n = f(x - diff, a)
+        res[(Ellipsis, *idx)] = (y1p - y1n) / (2 * delta)
+    return res
+
+
+class TestDipoleFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.rng = np.random.default_rng(GLOBAL_SEED)
+        self.nf, self.nloc, _ = self.nlist.shape
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+
+    def test_consistency(
+        self,
+    ):
+        rd0, gr, _, _, _ = self.dd0(
+            paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+            paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+            paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+        )
+        atype = paddle.to_tensor(self.atype_ext[:, : self.nloc], dtype="int64").to(
+            device=env.DEVICE
+        )
+
+        for nfp, nap in itertools.product(
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=self.dd0.mixed_types(),
+            ).to(env.DEVICE)
+            ft1 = DPDipoleFitting.deserialize(ft0.serialize())
+            ft2 = DipoleFittingNet.deserialize(ft1.serialize())
+
+            if nfp > 0:
+                ifp = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype
+                ).to(device=env.DEVICE)
+            else:
+                ifp = None
+            if nap > 0:
+                iap = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                ).to(device=env.DEVICE)
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, gr, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["dipole"]),
+                ret1["dipole"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["dipole"]),
+                to_numpy_array(ret2["dipole"]),
+            )
+
+    def test_jit(
+        self,
+    ):
+        for mixed_types, nfp, nap in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+            ).to(env.DEVICE)
+            paddle.jit.to_static(ft0)
+
+
+class TestEquivalence(unittest.TestCase):
+    def setUp(self) -> None:
+        self.natoms = 5
+        self.rcut = 4
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        generator = paddle.seed(GLOBAL_SEED)
+        self.coord = 2 * paddle.rand([self.natoms, 3], dtype=dtype).to(
+            device=env.DEVICE
+        )
+        self.shift = paddle.to_tensor([4, 4, 4], dtype=dtype).to(device=env.DEVICE)
+        self.atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32).to(
+            device=env.DEVICE
+        )
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        self.cell = (self.cell + self.cell.T) + 5.0 * paddle.eye(3).to(
+            device=env.DEVICE
+        )
+
+    def test_rot(self):
+        atype = self.atype.reshape([1, 5])
+        rmat = paddle.to_tensor(special_ortho_group.rvs(3), dtype=dtype).to(
+            device=env.DEVICE
+        )
+        coord_rot = paddle.matmul(self.coord, rmat)
+        # use larger cell to rotate only coord and shift to the center of cell
+        cell_rot = 10.0 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE)
+        rng = np.random.default_rng(GLOBAL_SEED)
+        for nfp, nap in itertools.product(
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                3,  # ntype
+                self.dd0.dim_out,  # dim_descrpt
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=self.dd0.mixed_types(),
+            ).to(env.DEVICE)
+            if nfp > 0:
+                ifp = paddle.to_tensor(rng.normal(size=(self.nf, nfp)), dtype=dtype).to(
+                    device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = paddle.to_tensor(
+                    rng.normal(size=(self.nf, self.natoms, nap)),
+                    dtype=dtype,
+                ).to(device=env.DEVICE)
+            else:
+                iap = None
+
+            res = []
+            for xyz in [self.coord, coord_rot]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz + self.shift,
+                    atype,
+                    self.rcut,
+                    self.sel,
+                    self.dd0.mixed_types(),
+                    box=cell_rot,
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, atype, gr0, fparam=ifp, aparam=iap)
+                res.append(ret0["dipole"])
+
+            np.testing.assert_allclose(
+                to_numpy_array(res[1]), to_numpy_array(paddle.matmul(res[0], rmat))
+            )
+
+    def test_permu(self):
+        coord = paddle.matmul(self.coord, self.cell)
+        ft0 = DipoleFittingNet(
+            3,  # ntype
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=self.dd0.mixed_types(),
+        ).to(env.DEVICE)
+        res = []
+        for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
+            atype = self.atype[idx_perm].reshape([1, 5])
+            (
+                extended_coord,
+                extended_atype,
+                _,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                coord[idx_perm],
+                atype,
+                self.rcut,
+                self.sel,
+                self.dd0.mixed_types(),
+                box=self.cell,
+            )
+
+            rd0, gr0, _, _, _ = self.dd0(
+                extended_coord,
+                extended_atype,
+                nlist,
+            )
+
+            ret0 = ft0(rd0, atype, gr0, fparam=0, aparam=0)
+            res.append(ret0["dipole"])
+
+        np.testing.assert_allclose(
+            to_numpy_array(res[0][:, idx_perm]), to_numpy_array(res[1])
+        )
+
+    def test_trans(self):
+        atype = self.atype.reshape([1, 5])
+        coord_s = paddle.matmul(
+            paddle.remainder(
+                paddle.matmul(self.coord + self.shift, paddle.linalg.inv(self.cell)),
+                paddle.full([], 1.0),
+            ),
+            self.cell,
+        )
+        ft0 = DipoleFittingNet(
+            3,  # ntype
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=self.dd0.mixed_types(),
+        ).to(env.DEVICE)
+        res = []
+        for xyz in [self.coord, coord_s]:
+            (
+                extended_coord,
+                extended_atype,
+                _,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                xyz, atype, self.rcut, self.sel, self.dd0.mixed_types(), box=self.cell
+            )
+
+            rd0, gr0, _, _, _ = self.dd0(
+                extended_coord,
+                extended_atype,
+                nlist,
+            )
+
+            ret0 = ft0(rd0, atype, gr0, fparam=0, aparam=0)
+            res.append(ret0["dipole"])
+
+        np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
+
+
+class TestDipoleModel(unittest.TestCase):
+    def setUp(self):
+        self.natoms = 5
+        self.rcut = 4.0
+        self.nt = 3
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        generator = paddle.seed(GLOBAL_SEED)
+        self.coord = 2 * paddle.rand([self.natoms, 3], dtype=dtype).to(
+            device=env.DEVICE
+        )
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        self.cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        self.atype = paddle.to_tensor([0, 0, 0, 1, 1], place="cpu").to(env.DEVICE)
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.ft0 = DipoleFittingNet(
+            self.nt,
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=self.dd0.mixed_types(),
+        ).to(env.DEVICE)
+        self.type_mapping = ["O", "H", "B"]
+        self.model = DipoleModel(self.dd0, self.ft0, self.type_mapping)
+        self.file_path = "model_output.pd"
+
+    def test_auto_diff(self):
+        places = 5
+        delta = 1e-5
+        atype = self.atype.reshape([self.nf, self.natoms])
+
+        def ff(coord, atype):
+            return (
+                self.model(to_paddle_tensor(coord), to_paddle_tensor(atype))[
+                    "global_dipole"
+                ]
+                .detach()
+                .cpu()
+                .numpy()
+            )
+
+        fdf = -finite_difference(
+            ff, to_numpy_array(self.coord), to_numpy_array(atype), delta=delta
+        )
+        rff = self.model(self.coord, atype)["force"].detach().cpu().numpy()
+
+        np.testing.assert_almost_equal(fdf, rff.transpose([0, 2, 1, 3]), decimal=places)
+
+    @unittest.skip("Call method with inference model is not supported in paddle")
+    def test_deepdipole_infer(self):
+        atype = to_numpy_array(self.atype.reshape([self.nf, self.natoms]))
+        coord = to_numpy_array(self.coord.reshape([1, 5, 3]))
+        cell = to_numpy_array(self.cell.reshape([1, 9]))
+        paddle.set_flags(
+            {
+                "FLAGS_save_cf_stack_op": 1,
+                "FLAGS_prim_enable_dynamic": 1,
+                "FLAGS_enable_pir_api": 1,
+            }
+        )
+        from paddle.static import (
+            InputSpec,
+        )
+
+        jit_md = paddle.jit.to_static(
+            self.model,
+            full_graph=True,
+            input_spec=[
+                InputSpec([-1, -1, 3], dtype="float64", name="coord"),
+                InputSpec([-1, -1], dtype="int32", name="atype"),
+                InputSpec([-1, -1, -1], dtype="int32", name="cell"),
+            ],
+        )
+        paddle.jit.save(jit_md, self.file_path)
+        load_md = DeepDipole(self.file_path)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=False)
+        load_md.eval_full(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval_full(coords=coord, atom_types=atype, cells=cell, atomic=False)
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_dp_atomic_model.py b/source/tests/pd/model/test_dp_atomic_model.py
new file mode 100644
index 0000000000..785bfa1076
--- /dev/null
+++ b/source/tests/pd/model/test_dp_atomic_model.py
@@ -0,0 +1,235 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.atomic_model import DPAtomicModel as DPDPAtomicModel
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.dpmodel.fitting import InvarFitting as DPInvarFitting
+from deepmd.pd.model.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithNlistWithVirtual,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDPAtomicModel(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+
+        # test the case of exclusion
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            md0 = DPAtomicModel(
+                ds,
+                ft,
+                type_map=type_map,
+            ).to(env.DEVICE)
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+            args = [
+                to_paddle_tensor(ii)
+                for ii in [self.coord_ext, self.atype_ext, self.nlist]
+            ]
+            ret0 = md0.forward_common_atomic(*args)
+            ret1 = md1.forward_common_atomic(*args)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+    def test_dp_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPDPAtomicModel(ds, ft, type_map=type_map)
+        md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args0 = [self.coord_ext, self.atype_ext, self.nlist]
+        args1 = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+        )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = paddle.jit.to_static(md0)
+        self.assertEqual(md0.get_rcut(), self.rcut)
+        self.assertEqual(md0.get_type_map(), type_map)
+
+    def test_excl_consistency(self):
+        type_map = ["foo", "bar"]
+
+        # test the case of exclusion
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            ds = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+            ).to(env.DEVICE)
+            ft = InvarFitting(
+                "energy",
+                self.nt,
+                ds.get_dim_out(),
+                1,
+                mixed_types=ds.mixed_types(),
+            ).to(env.DEVICE)
+            md0 = DPAtomicModel(
+                ds,
+                ft,
+                type_map=type_map,
+            ).to(env.DEVICE)
+            md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            # hacking!
+            md1.descriptor.reinit_exclude(pair_excl)
+            md1.fitting_net.reinit_exclude(atom_excl)
+
+            # check energy consistency
+            args = [
+                to_paddle_tensor(ii)
+                for ii in [self.coord_ext, self.atype_ext, self.nlist]
+            ]
+            ret0 = md0.forward_common_atomic(*args)
+            ret1 = md1.forward_common_atomic(*args)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+            # check output def
+            out_names = [vv.name for vv in md0.atomic_output_def().get_data().values()]
+            self.assertEqual(out_names, ["energy", "mask"])
+            if atom_excl != []:
+                for ii in md0.atomic_output_def().get_data().values():
+                    if ii.name == "mask":
+                        self.assertEqual(ii.shape, [1])
+                        self.assertFalse(ii.reducible)
+                        self.assertFalse(ii.r_differentiable)
+                        self.assertFalse(ii.c_differentiable)
+
+            # check mask
+            if atom_excl == []:
+                pass
+            elif atom_excl == [1]:
+                self.assertIn("mask", ret0.keys())
+                expected = np.array([1, 1, 0], dtype="int64")
+                expected = np.concatenate(
+                    [expected, expected[self.perm[: self.nloc]]]
+                ).reshape(2, 3)
+                np.testing.assert_array_equal(to_numpy_array(ret0["mask"]), expected)
+            else:
+                raise ValueError(f"not expected atom_excl {atom_excl}")
+
+
+class TestDPAtomicModelVirtualConsistency(unittest.TestCase):
+    def setUp(self):
+        self.case0 = TestCaseSingleFrameWithNlist()
+        self.case1 = TestCaseSingleFrameWithNlistWithVirtual()
+        self.case0.setUp()
+        self.case1.setUp()
+
+    def test_virtual_consistency(self):
+        nf, _, _ = self.case0.nlist.shape
+        ds = DescrptSeA(
+            self.case0.rcut,
+            self.case0.rcut_smth,
+            self.case0.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.case0.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md1 = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+
+        args0 = [self.case0.coord_ext, self.case0.atype_ext, self.case0.nlist]
+        args0 = [to_paddle_tensor(ii) for ii in args0]
+        args1 = [self.case1.coord_ext, self.case1.atype_ext, self.case1.nlist]
+        args1 = [to_paddle_tensor(ii) for ii in args1]
+
+        ret0 = md1.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+
+        for dd in range(self.case0.nf):
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"])[dd],
+                to_numpy_array(ret1["energy"])[dd, self.case1.get_real_mapping[dd], :],
+            )
+        expected_mask = np.array(
+            [
+                [1, 0, 1, 1],
+                [1, 1, 0, 1],
+            ]
+        )
+        np.testing.assert_equal(to_numpy_array(ret1["mask"]), expected_mask)
diff --git a/source/tests/pd/model/test_dp_model.py b/source/tests/pd/model/test_dp_model.py
new file mode 100644
index 0000000000..a281851f14
--- /dev/null
+++ b/source/tests/pd/model/test_dp_model.py
@@ -0,0 +1,633 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.dpmodel.fitting import EnergyFittingNet as DPEnergyFittingNet
+from deepmd.dpmodel.model.ener_model import EnergyModel as DPEnergyModel
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.model import (
+    EnergyModel,
+)
+from deepmd.pd.model.task.ener import (
+    EnergyFittingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithoutNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDPModel(unittest.TestCase, TestCaseSingleFrameWithoutNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithoutNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc = self.atype.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [to_paddle_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        ret0 = md0.forward_common(*args)
+        ret1 = md1.forward_common(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_redu"]),
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_r"]),
+            to_numpy_array(ret1["energy_derv_r"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret1["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_common(*args, do_atomic_virial=True)
+        ret1 = md1.forward_common(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c"]),
+            to_numpy_array(ret1["energy_derv_c"]),
+            atol=self.atol,
+        )
+
+        coord_ext, atype_ext, mapping = extend_coord_with_ghosts(
+            to_paddle_tensor(self.coord),
+            to_paddle_tensor(self.atype),
+            to_paddle_tensor(self.cell),
+            self.rcut,
+        )
+        nlist = build_neighbor_list(
+            coord_ext,
+            atype_ext,
+            self.nloc,
+            self.rcut,
+            self.sel,
+            distinguish_types=(not md0.mixed_types()),
+        )
+        args = [coord_ext, atype_ext, nlist]
+        ret2 = md0.forward_common_lower(*args, do_atomic_virial=True)
+        # check the consistency between the reduced virial from
+        # forward_common and forward_common_lower
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret2["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency(self):
+        nf, nloc = self.atype.shape
+        nfp, nap = 2, 3
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPEnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+            numb_fparam=nfp,
+            numb_aparam=nap,
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPEnergyModel(ds, ft, type_map=type_map)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        rng = np.random.default_rng(GLOBAL_SEED)
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+        args0 = [self.coord, self.atype, self.cell]
+        args1 = [to_paddle_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        kwargs0 = {"fparam": fparam, "aparam": aparam}
+        kwargs1 = {kk: to_paddle_tensor(vv) for kk, vv in kwargs0.items()}
+        ret0 = md0.call(*args0, **kwargs0)
+        ret1 = md1.forward_common(*args1, **kwargs1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency_nopbc(self):
+        nf, nloc = self.atype.shape
+        nfp, nap = 2, 3
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPEnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+            numb_fparam=nfp,
+            numb_aparam=nap,
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPEnergyModel(ds, ft, type_map=type_map)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        rng = np.random.default_rng(GLOBAL_SEED)
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, self.nloc, nap])
+        args0 = [self.coord, self.atype]
+        args1 = [to_paddle_tensor(ii) for ii in args0]
+        kwargs0 = {"fparam": fparam, "aparam": aparam}
+        kwargs1 = {kk: to_paddle_tensor(vv) for kk, vv in kwargs0.items()}
+        ret0 = md0.call(*args0, **kwargs0)
+        ret1 = md1.forward_common(*args1, **kwargs1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc = self.atype.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPEnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md0 = DPEnergyModel(ds, ft, type_map=type_map)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        args64 = [to_paddle_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        args64[0] = args64[0].to(paddle.float64)
+        args64[2] = args64[2].to(paddle.float64)
+        args32 = [to_paddle_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        args32[0] = args32[0].to(paddle.float32)
+        args32[2] = args32[2].to(paddle.float32)
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = to_paddle_tensor(fparam)
+        aparam = to_paddle_tensor(aparam)
+
+        model_l_ret_64 = md1.forward_common(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.forward_common(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, paddle.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, paddle.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, paddle.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, paddle.int32)
+            np.testing.assert_allclose(
+                to_numpy_array(model_l_ret_32[ii]),
+                to_numpy_array(model_l_ret_64[ii]),
+                atol=self.atol,
+            )
+
+
+class TestDPModelLower(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_common_lower(*args)
+        ret1 = md1.forward_common_lower(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_redu"]),
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_r"]),
+            to_numpy_array(ret1["energy_derv_r"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret1["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_common_lower(*args, do_atomic_virial=True)
+        ret1 = md1.forward_common_lower(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c"]),
+            to_numpy_array(ret1["energy_derv_c"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPEnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPEnergyModel(ds, ft, type_map=type_map)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args0 = [self.coord_ext, self.atype_ext, self.nlist]
+        args1 = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.call_lower(*args0)
+        ret1 = md1.forward_common_lower(*args1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPEnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md0 = DPEnergyModel(ds, ft, type_map=type_map)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        args64 = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        args64[0] = args64[0].to(paddle.float64)
+        args32 = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        args32[0] = args32[0].to(paddle.float32)
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = to_paddle_tensor(fparam)
+        aparam = to_paddle_tensor(aparam)
+
+        model_l_ret_64 = md1.forward_common_lower(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.forward_common_lower(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, paddle.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, paddle.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, paddle.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, paddle.int32)
+            np.testing.assert_allclose(
+                to_numpy_array(model_l_ret_32[ii]),
+                to_numpy_array(model_l_ret_64[ii]),
+                atol=self.atol,
+            )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = paddle.jit.to_static(md0)
+        md0.get_rcut()
+        md0.get_type_map()
+
+
+class TestDPModelFormatNlist(unittest.TestCase):
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 5
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+                [2.3, 0, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.expected_nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype="int64",
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.atype_ext = np.array([0, 0, 1, 0, 1], dtype="int64").reshape(
+            [1, self.nall]
+        )
+        self.rcut_smth = 0.4
+        self.rcut = 2.0
+
+        nf, nloc, nnei = self.expected_nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        self.md = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+
+    def test_nlist_eq(self):
+        # n_nnei == nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_paddle_tensor(self.coord_ext),
+            to_paddle_tensor(self.atype_ext),
+            to_paddle_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+    def test_nlist_st(self):
+        # n_nnei < nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, 2],
+                [0, -1, -1, 2],
+                [0, 1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_paddle_tensor(self.coord_ext),
+            to_paddle_tensor(self.atype_ext),
+            to_paddle_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+    def test_nlist_lt(self):
+        # n_nnei > nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1, -1, 4],
+                [0, -1, 4, -1, -1, 2, -1, 3, -1],
+                [0, 1, -1, -1, -1, 4, -1, -1, 3],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_paddle_tensor(self.coord_ext),
+            to_paddle_tensor(self.atype_ext),
+            to_paddle_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+
+class TestEnergyModel(unittest.TestCase, TestCaseSingleFrameWithoutNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithoutNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc = self.atype.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [to_paddle_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        ret0 = md0.forward(*args)
+        ret1 = md1.forward(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_energy"]),
+            to_numpy_array(ret1["atom_energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["force"]),
+            to_numpy_array(ret1["force"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret1["virial"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward(*args, do_atomic_virial=True)
+        ret1 = md1.forward(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_virial"]),
+            to_numpy_array(ret1["atom_virial"]),
+            atol=self.atol,
+        )
+        coord_ext, atype_ext, mapping, nlist = extend_input_and_build_neighbor_list(
+            to_paddle_tensor(self.coord),
+            to_paddle_tensor(self.atype),
+            self.rcut,
+            self.sel,
+            mixed_types=md0.mixed_types(),
+            box=to_paddle_tensor(self.cell),
+        )
+        args = [coord_ext, atype_ext, nlist]
+        ret2 = md0.forward_lower(*args, do_atomic_virial=True)
+        # check the consistency between the reduced virial from
+        # forward and forward_lower
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret2["virial"]),
+            atol=self.atol,
+        )
+
+
+class TestEnergyModelLower(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_lower(*args)
+        ret1 = md1.forward_lower(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_energy"]),
+            to_numpy_array(ret1["atom_energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["extended_force"]),
+            to_numpy_array(ret1["extended_force"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret1["virial"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_lower(*args, do_atomic_virial=True)
+        ret1 = md1.forward_lower(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["extended_virial"]),
+            to_numpy_array(ret1["extended_virial"]),
+            atol=self.atol,
+        )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = EnergyFittingNet(
+            self.nt,
+            ds.get_dim_out(),
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = paddle.jit.to_static(md0)
+        self.assertEqual(md0.get_rcut(), self.rcut)
+        self.assertEqual(md0.get_type_map(), type_map)
diff --git a/source/tests/pd/model/test_dpa1.py b/source/tests/pd/model/test_dpa1.py
new file mode 100644
index 0000000000..285dd3d4cd
--- /dev/null
+++ b/source/tests/pd/model/test_dpa1.py
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor.dpa1 import DescrptDPA1 as DPDescrptDPA1
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDescrptSeAtten(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, sm, to, tm, prec, ect in itertools.product(
+            [False, True],  # resnet_dt
+            [False, True],  # smooth_type_embedding
+            [False, True],  # type_one_side
+            ["concat", "strip"],  # tebd_input_mode
+            [
+                "float64",
+            ],  # precision
+            [False, True],  # use_econf_tebd
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+
+            # dpa1 new impl
+            dd0 = DescrptDPA1(
+                self.rcut,
+                self.rcut_smth,
+                self.sel_mix,
+                self.nt,
+                attn_layer=2,
+                precision=prec,
+                resnet_dt=idt,
+                smooth_type_embedding=sm,
+                type_one_side=to,
+                tebd_input_mode=tm,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            dd0.se_atten.mean = paddle.to_tensor(davg, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.se_atten.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptDPA1.deserialize(dd0.serialize())
+            rd1, _, _, _, _ = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptDPA1.deserialize(dd0.serialize())
+            rd2, _, _, _, _ = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd2,
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, sm, to, tm, ect in itertools.product(
+            [
+                False,
+            ],  # resnet_dt
+            [
+                "float64",
+            ],  # precision
+            [False, True],  # smooth_type_embedding
+            [
+                False,
+            ],  # type_one_side
+            ["concat", "strip"],  # tebd_input_mode
+            [False, True],  # use_econf_tebd
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # dpa1 new impl
+            dd0 = DescrptDPA1(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                self.nt,
+                precision=prec,
+                resnet_dt=idt,
+                smooth_type_embedding=sm,
+                type_one_side=to,
+                tebd_input_mode=tm,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            )
+            dd0.se_atten.mean = paddle.to_tensor(davg, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.se_atten.dstd = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            # dd1 = DescrptDPA1.deserialize(dd0.serialize())
+            model = paddle.jit.to_static(dd0)
+            # model = paddle.jit.to_static(dd1)
diff --git a/source/tests/pd/model/test_dpa2.py b/source/tests/pd/model/test_dpa2.py
new file mode 100644
index 0000000000..68a5fe8f37
--- /dev/null
+++ b/source/tests/pd/model/test_dpa2.py
@@ -0,0 +1,332 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor.dpa2 import DescrptDPA2 as DPDescrptDPA2
+from deepmd.dpmodel.descriptor.dpa2 import (
+    RepformerArgs,
+    RepinitArgs,
+)
+from deepmd.pd.model.descriptor.dpa2 import (
+    DescrptDPA2,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDescrptDPA2(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        davg_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd = 0.1 + np.abs(dstd)
+        dstd_2 = 0.1 + np.abs(dstd_2)
+
+        for (
+            riti,
+            riz,
+            rp1c,
+            rp1d,
+            rp1g,
+            rp1a,
+            rp2g,
+            rp2a,
+            rph,
+            rp2gate,
+            rus,
+            rpz,
+            sm,
+            prec,
+            ect,
+            ns,
+        ) in itertools.product(
+            ["concat", "strip"],  # repinit_tebd_input_mode
+            [
+                True,
+            ],  # repinit_set_davg_zero
+            [True, False],  # repformer_update_g1_has_conv
+            [True, False],  # repformer_update_g1_has_drrd
+            [True, False],  # repformer_update_g1_has_grrg
+            [
+                False,
+            ],  # repformer_update_g1_has_attn
+            [
+                False,
+            ],  # repformer_update_g2_has_g1g1
+            [True, False],  # repformer_update_g2_has_attn
+            [
+                False,
+            ],  # repformer_update_h2
+            [
+                True,
+            ],  # repformer_attn2_has_gate
+            ["res_avg", "res_residual"],  # repformer_update_style
+            [
+                True,
+            ],  # repformer_set_davg_zero
+            [
+                True,
+            ],  # smooth
+            ["float64"],  # precision
+            [False, True],  # use_econf_tebd
+            [
+                False,
+                True,
+            ],  # new sub-structures (use_sqrt_nnei, g1_out_conv, g1_out_mlp)
+        ):
+            if ns and not rp1d and not rp1g:
+                continue
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            if prec == "float64":
+                atol = 1e-8  # marginal GPU test cases...
+
+            repinit = RepinitArgs(
+                rcut=self.rcut,
+                rcut_smth=self.rcut_smth,
+                nsel=self.sel_mix,
+                tebd_input_mode=riti,
+                set_davg_zero=riz,
+            )
+            repformer = RepformerArgs(
+                rcut=self.rcut / 2,
+                rcut_smth=self.rcut_smth,
+                nsel=nnei // 2,
+                nlayers=3,
+                g1_dim=20,
+                g2_dim=10,
+                axis_neuron=4,
+                update_g1_has_conv=rp1c,
+                update_g1_has_drrd=rp1d,
+                update_g1_has_grrg=rp1g,
+                update_g1_has_attn=rp1a,
+                update_g2_has_g1g1=rp2g,
+                update_g2_has_attn=rp2a,
+                update_h2=rph,
+                attn1_hidden=20,
+                attn1_nhead=2,
+                attn2_hidden=10,
+                attn2_nhead=2,
+                attn2_has_gate=rp2gate,
+                update_style=rus,
+                set_davg_zero=rpz,
+                use_sqrt_nnei=ns,
+                g1_out_conv=ns,
+                g1_out_mlp=ns,
+            )
+
+            # dpa2 new impl
+            dd0 = DescrptDPA2(
+                self.nt,
+                repinit=repinit,
+                repformer=repformer,
+                # kwargs for descriptor
+                smooth=sm,
+                exclude_types=[],
+                add_tebd_to_repinit_out=False,
+                precision=prec,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+
+            dd0.repinit.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.repinit.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.mean = paddle.to_tensor(davg_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.stddev = paddle.to_tensor(dstd_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.mapping, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptDPA2.deserialize(dd0.serialize())
+            rd1, _, _, _, _ = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.mapping, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+            )
+            # dp impl
+            dd2 = DPDescrptDPA2.deserialize(dd0.serialize())
+            rd2, _, _, _, _ = dd2.call(
+                self.coord_ext, self.atype_ext, self.nlist, self.mapping
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd2,
+                rtol=rtol,
+                atol=atol,
+            )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        davg_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for (
+            riti,
+            riz,
+            rp1c,
+            rp1d,
+            rp1g,
+            rp1a,
+            rp2g,
+            rp2a,
+            rph,
+            rp2gate,
+            rus,
+            rpz,
+            sm,
+            prec,
+            ect,
+            ns,
+        ) in itertools.product(
+            ["concat", "strip"],  # repinit_tebd_input_mode
+            [
+                True,
+            ],  # repinit_set_davg_zero
+            [
+                True,
+            ],  # repformer_update_g1_has_conv
+            [
+                True,
+            ],  # repformer_update_g1_has_drrd
+            [
+                True,
+            ],  # repformer_update_g1_has_grrg
+            [
+                True,
+            ],  # repformer_update_g1_has_attn
+            [
+                True,
+            ],  # repformer_update_g2_has_g1g1
+            [
+                True,
+            ],  # repformer_update_g2_has_attn
+            [
+                False,
+            ],  # repformer_update_h2
+            [
+                True,
+            ],  # repformer_attn2_has_gate
+            ["res_avg", "res_residual"],  # repformer_update_style
+            [
+                True,
+            ],  # repformer_set_davg_zero
+            [
+                True,
+            ],  # smooth
+            ["float64"],  # precision
+            [False, True],  # use_econf_tebd
+            [True],  # new sub-structures (use_sqrt_nnei, g1_out_conv, g1_out_mlp)
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+
+            repinit = RepinitArgs(
+                rcut=self.rcut,
+                rcut_smth=self.rcut_smth,
+                nsel=self.sel_mix,
+                tebd_input_mode=riti,
+                set_davg_zero=riz,
+            )
+            repformer = RepformerArgs(
+                rcut=self.rcut / 2,
+                rcut_smth=self.rcut_smth,
+                nsel=nnei // 2,
+                nlayers=3,
+                g1_dim=20,
+                g2_dim=10,
+                axis_neuron=4,
+                update_g1_has_conv=rp1c,
+                update_g1_has_drrd=rp1d,
+                update_g1_has_grrg=rp1g,
+                update_g1_has_attn=rp1a,
+                update_g2_has_g1g1=rp2g,
+                update_g2_has_attn=rp2a,
+                update_h2=rph,
+                attn1_hidden=20,
+                attn1_nhead=2,
+                attn2_hidden=10,
+                attn2_nhead=2,
+                attn2_has_gate=rp2gate,
+                update_style=rus,
+                set_davg_zero=rpz,
+                use_sqrt_nnei=ns,
+                g1_out_conv=ns,
+                g1_out_mlp=ns,
+            )
+
+            # dpa2 new impl
+            dd0 = DescrptDPA2(
+                self.nt,
+                repinit=repinit,
+                repformer=repformer,
+                # kwargs for descriptor
+                smooth=sm,
+                exclude_types=[],
+                add_tebd_to_repinit_out=False,
+                precision=prec,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+
+            dd0.repinit.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.repinit.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.mean = paddle.to_tensor(davg_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.stddev = paddle.to_tensor(dstd_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            model = paddle.jit.to_static(dd0)
diff --git a/source/tests/pd/model/test_embedding_net.py b/source/tests/pd/model/test_embedding_net.py
new file mode 100644
index 0000000000..12c42049e8
--- /dev/null
+++ b/source/tests/pd/model/test_embedding_net.py
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import re
+import unittest
+
+import numpy as np
+import paddle
+import tensorflow.compat.v1 as tf
+
+from deepmd.pd.utils import (
+    env,
+)
+
+tf.disable_eager_execution()
+
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.model.descriptor import (
+    DescrptSeA,
+)
+from deepmd.pd.utils import (
+    dp_random,
+)
+from deepmd.pd.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
+
+from ..test_finetune import (
+    energy_data_requirement,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def gen_key(worb, depth, elemid):
+    return (worb, depth, elemid)
+
+
+def get_single_batch(dataset, index=None):
+    if index is None:
+        index = dp_random.choice(np.arange(len(dataset)))
+    np_batch = dataset[index]
+    pt_batch = {}
+
+    for key in [
+        "coord",
+        "box",
+        "force",
+        "force_mag",
+        "energy",
+        "virial",
+        "atype",
+        "natoms",
+    ]:
+        if key in np_batch.keys():
+            np_batch[key] = np.expand_dims(np_batch[key], axis=0)
+            pt_batch[key] = paddle.to_tensor(np_batch[key]).to(device=env.DEVICE)
+            if key in ["coord", "force", "force_mag"]:
+                np_batch[key] = np_batch[key].reshape(1, -1)
+    np_batch["natoms"] = np_batch["natoms"][0]
+    return np_batch, pt_batch
+
+
+def base_se_a(descriptor, coord, atype, natoms, box):
+    g = tf.Graph()
+    with g.as_default():
+        name_pfx = "d_sea_"
+        t_coord = tf.placeholder(
+            GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_coord"
+        )
+        t_atype = tf.placeholder(tf.int32, [None, None], name=name_pfx + "t_type")
+        t_natoms = tf.placeholder(
+            tf.int32, [descriptor.ntypes + 2], name=name_pfx + "t_natoms"
+        )
+        t_box = tf.placeholder(
+            GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_box"
+        )
+        t_default_mesh = tf.placeholder(tf.int32, [None], name=name_pfx + "t_mesh")
+        t_embedding = descriptor.build(
+            t_coord, t_atype, t_natoms, t_box, t_default_mesh, input_dict={}
+        )
+        fake_energy = tf.reduce_sum(t_embedding)
+        t_force = descriptor.prod_force_virial(fake_energy, t_natoms)[0]
+        t_vars = {}
+        for var in tf.global_variables():
+            ms = re.findall(r"([a-z]+)_(\d)_(\d)", var.name)
+            if len(ms) == 1:
+                m = ms[0]
+                key = gen_key(worb=m[0], depth=int(m[1]), elemid=int(m[2]))
+                t_vars[key] = var
+        init_op = tf.global_variables_initializer()
+
+    with tf.Session(graph=g) as sess:
+        sess.run(init_op)
+        embedding, force, values = sess.run(
+            [t_embedding, t_force, t_vars],
+            feed_dict={
+                t_coord: coord,
+                t_atype: atype,
+                t_natoms: natoms,
+                t_box: box,
+                t_default_mesh: np.array([0, 0, 0, 2, 2, 2]),
+            },
+        )
+    tf.reset_default_graph()
+    return embedding, force, values
+
+
+class TestSeA(unittest.TestCase):
+    def setUp(self):
+        dp_random.seed(0)
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.bsz = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        ds = DeepmdDataSetForLoader(
+            self.systems[0],
+            model_config["type_map"],
+        )
+        ds.add_data_requirement(energy_data_requirement)
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.np_batch, self.paddle_batch = get_single_batch(ds)
+
+    def test_consistency(self):
+        dp_d = DescrptSeA_tf(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+            seed=1,
+        )
+        dp_embedding, dp_force, dp_vars = base_se_a(
+            descriptor=dp_d,
+            coord=self.np_batch["coord"],
+            atype=self.np_batch["atype"],
+            natoms=self.np_batch["natoms"],
+            box=self.np_batch["box"],
+        )
+
+        # Reproduced
+        descriptor = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+        ).to(DEVICE)
+        for name, param in descriptor.named_parameters():
+            ms = re.findall(r"(\d)\.layers\.(\d)\.([a-z]+)", name)
+            if len(ms) == 1:
+                m = ms[0]
+                key = gen_key(worb=m[2], depth=int(m[1]) + 1, elemid=int(m[0]))
+                var = dp_vars[key]
+                with paddle.no_grad():
+                    # Keep parameter value consistency between 2 implentations
+                    paddle.assign(var, param)
+
+        pt_coord = self.paddle_batch["coord"].to(env.DEVICE)
+        pt_coord.stop_gradient = False
+
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            pt_coord,
+            self.paddle_batch["atype"].to(env.DEVICE),
+            self.rcut,
+            self.sel,
+            mixed_types=False,
+            box=self.paddle_batch["box"].to(env.DEVICE),
+        )
+        descriptor_out, _, _, _, _ = descriptor(
+            extended_coord,
+            extended_atype,
+            nlist,
+        )
+        my_embedding = descriptor_out.cpu().detach().numpy()
+        fake_energy = paddle.sum(descriptor_out)
+        fake_energy.backward()
+        my_force = -pt_coord.grad.cpu().numpy()
+
+        # Check
+        np.testing.assert_allclose(dp_embedding, my_embedding)
+        dp_force = dp_force.reshape(*my_force.shape)
+        np.testing.assert_allclose(dp_force, my_force)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_ener_fitting.py b/source/tests/pd/model/test_ener_fitting.py
new file mode 100644
index 0000000000..dd13f139dc
--- /dev/null
+++ b/source/tests/pd/model/test_ener_fitting.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.fitting import InvarFitting as DPInvarFitting
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        rd0, _, _, _, _ = dd0(
+            paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+            paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+            paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+        )
+        atype = paddle.to_tensor(self.atype_ext[:, :nloc], dtype="int64").to(
+            device=env.DEVICE
+        )
+
+        for od, mixed_types, nfp, nap, et, nn in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [[], [0], [1]],
+            [[4, 4, 4], []],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                dd0.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+                neuron=nn,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            ft1 = DPInvarFitting.deserialize(ft0.serialize())
+            ft2 = InvarFitting.deserialize(ft0.serialize())
+
+            if nfp > 0:
+                ifp = paddle.to_tensor(
+                    rng.normal(size=(self.nf, nfp)), dtype=dtype, place=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = paddle.to_tensor(
+                    rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    place=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                ret1["foo"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                to_numpy_array(ret2["foo"]),
+            )
+            self.assertEqual(ft0.get_sel_type(), ft1.get_sel_type())
+
+    def test_jit(
+        self,
+    ):
+        for od, mixed_types, nfp, nap, et in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [[], [0]],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                9,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            paddle.jit.to_static(ft0)
+
+    def test_get_set(self):
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            3,
+            1,
+            seed=GLOBAL_SEED,
+        )
+        rng = np.random.default_rng(GLOBAL_SEED)
+        foo = rng.normal([3, 4])
+        for ii in [
+            "bias_atom_e",
+            "fparam_avg",
+            "fparam_inv_std",
+            "aparam_avg",
+            "aparam_inv_std",
+        ]:
+            ifn0[ii] = paddle.to_tensor(foo, dtype=dtype).to(device=env.DEVICE)
+            np.testing.assert_allclose(
+                foo, np.reshape(ifn0[ii].detach().cpu().numpy(), foo.shape)
+            )
diff --git a/source/tests/pd/model/test_ener_spin_model.py b/source/tests/pd/model/test_ener_spin_model.py
new file mode 100644
index 0000000000..b4573b6a45
--- /dev/null
+++ b/source/tests/pd/model/test_ener_spin_model.py
@@ -0,0 +1,437 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.model import SpinModel as DPSpinModel
+from deepmd.pd.model.model import (
+    SpinEnergyModel,
+    get_model,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+    model_spin,
+)
+
+dtype = paddle.float64
+
+
+def reduce_tensor(extended_tensor, mapping, nloc: int):
+    nframes, nall = extended_tensor.shape[:2]
+    ext_dims = extended_tensor.shape[2:]
+    reduced_tensor = paddle.zeros(
+        [nframes, nloc, *ext_dims],
+        dtype=extended_tensor.dtype,
+    ).to(device=extended_tensor.place)
+    mldims = list(mapping.shape)
+    mapping = mapping.reshape(mldims + [1] * len(ext_dims)).expand(
+        [-1] * len(mldims) + list(ext_dims)
+    )
+    # nf x nloc x (*ext_dims)
+    reduced_tensor = decomp.scatter_reduce(
+        reduced_tensor,
+        1,
+        index=mapping,
+        src=extended_tensor,
+        reduce="sum",
+    )
+    return reduced_tensor
+
+
+class SpinTest:
+    def setUp(self):
+        self.prec = 1e-10
+        natoms = 5
+        self.ntypes = 3  # ["O", "H", "B"] for test
+        self.cell = 4.0 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE).unsqueeze(0)
+        generator = paddle.seed(GLOBAL_SEED)
+        self.coord = 3.0 * paddle.rand([natoms, 3], dtype=dtype).unsqueeze(0).to(
+            device=env.DEVICE
+        )
+        self.spin = 0.5 * paddle.rand([natoms, 3], dtype=dtype).unsqueeze(0).to(
+            device=env.DEVICE
+        )
+        self.atype = paddle.to_tensor(
+            [0, 0, 0, 1, 1], dtype=paddle.int64, place=env.DEVICE
+        ).unsqueeze(0)
+
+        self.expected_mask = paddle.to_tensor(
+            [
+                [True],
+                [True],
+                [True],
+                [False],
+                [False],
+            ],
+            dtype=paddle.bool,
+            place=env.DEVICE,
+        ).unsqueeze(0)
+        self.expected_atype_with_spin = paddle.to_tensor(
+            [0, 0, 0, 1, 1, 3, 3, 3, 4, 4], dtype=paddle.int64, place=env.DEVICE
+        ).unsqueeze(0)
+        self.expected_nloc_spin_index = (
+            paddle.arange(natoms, natoms * 2, dtype=paddle.int64)
+            .to(device=env.DEVICE)
+            .unsqueeze(0)
+            .unsqueeze(-1)
+        )
+
+    def test_output_shape(
+        self,
+    ):
+        result = self.model(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        # check magnetic mask
+        assert np.allclose(result["mask_mag"].numpy(), self.expected_mask.numpy())
+        # check output shape to assure split
+        nframes, nloc = self.coord.shape[:2]
+        assert np.allclose(result["energy"].shape, [nframes, 1])
+        assert np.allclose(result["atom_energy"].shape, [nframes, nloc, 1])
+        assert np.allclose(result["force"].shape, [nframes, nloc, 3])
+        assert np.allclose(result["force_mag"].shape, [nframes, nloc, 3])
+
+    def test_input_output_process(self):
+        nframes, nloc = self.coord.shape[:2]
+        self.real_ntypes = self.model.spin.get_ntypes_real()
+        # 1. test forward input process
+        coord_updated, atype_updated = self.model.process_spin_input(
+            self.coord, self.atype, self.spin
+        )
+        # compare atypes of real and virtual atoms
+        assert np.allclose(atype_updated.numpy(), self.expected_atype_with_spin.numpy())
+        # compare coords of real and virtual atoms
+        assert np.allclose(coord_updated.shape, [nframes, nloc * 2, 3])
+        assert np.allclose(coord_updated[:, :nloc].numpy(), self.coord.numpy())
+        virtual_scale = paddle.to_tensor(
+            self.model.spin.get_virtual_scale_mask()[self.atype.cpu()],
+            dtype=dtype,
+            place=env.DEVICE,
+        )
+        virtual_coord = self.coord + self.spin * virtual_scale.unsqueeze(-1)
+        assert np.allclose(coord_updated[:, nloc:].numpy(), virtual_coord.numpy())
+
+        # 2. test forward output process
+        model_ret = self.model.backbone_model.forward_common(
+            coord_updated,
+            atype_updated,
+            self.cell,
+            do_atomic_virial=True,
+        )
+        if self.model.do_grad_r("energy"):
+            force_all = model_ret["energy_derv_r"].squeeze(-2)
+            force_real, force_mag, _ = self.model.process_spin_output(
+                self.atype, force_all
+            )
+            np.testing.assert_allclose(
+                force_real.numpy(), (force_all[:, :nloc] + force_all[:, nloc:]).numpy()
+            )
+            np.testing.assert_allclose(
+                force_mag.numpy(),
+                (force_all[:, nloc:] * virtual_scale.unsqueeze(-1)).numpy(),
+            )
+
+        # 3. test forward_lower input process
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            self.coord,
+            self.atype,
+            self.model.get_rcut(),
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=self.cell,
+        )
+        nall = extended_coord.shape[1]
+        nnei = nlist.shape[-1]
+        extended_spin = decomp.take_along_axis(
+            self.spin, indices=mapping.unsqueeze(-1).tile((1, 1, 3)), axis=1
+        )
+        (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        ) = self.model.process_spin_input_lower(
+            extended_coord, extended_atype, extended_spin, nlist, mapping=mapping
+        )
+        # compare atypes of real and virtual atoms
+        # Note that the real and virtual atoms corresponding to the local ones are switch to the first nloc * 2 atoms
+        assert np.allclose(extended_atype_updated.shape, [nframes, nall * 2])
+        assert np.allclose(
+            extended_atype_updated[:, :nloc].numpy(), extended_atype[:, :nloc].numpy()
+        )
+        assert np.allclose(
+            extended_atype_updated[:, nloc : nloc + nloc].numpy(),
+            extended_atype[:, :nloc].numpy() + self.real_ntypes,
+        )
+        assert np.allclose(
+            extended_atype_updated[:, nloc + nloc : nloc + nall].numpy(),
+            extended_atype[:, nloc:nall].numpy(),
+        )
+        assert np.allclose(
+            extended_atype_updated[:, nloc + nall :].numpy(),
+            extended_atype[:, nloc:nall].numpy() + self.real_ntypes,
+        )
+        virtual_scale = paddle.to_tensor(
+            self.model.spin.get_virtual_scale_mask()[extended_atype.cpu()],
+            dtype=dtype,
+            place=env.DEVICE,
+        )
+        # compare coords of real and virtual atoms
+        virtual_coord = extended_coord + extended_spin * virtual_scale.unsqueeze(-1)
+        assert np.allclose(extended_coord_updated.shape, [nframes, nall * 2, 3])
+        np.testing.assert_allclose(
+            extended_coord_updated[:, :nloc].numpy(), extended_coord[:, :nloc].numpy()
+        )
+        np.testing.assert_allclose(
+            extended_coord_updated[:, nloc : nloc + nloc].numpy(),
+            virtual_coord[:, :nloc].numpy(),
+        )
+        np.testing.assert_allclose(
+            extended_coord_updated[:, nloc + nloc : nloc + nall].numpy(),
+            extended_coord[:, nloc:nall].numpy(),
+        )
+        np.testing.assert_allclose(
+            extended_coord_updated[:, nloc + nall :].numpy(),
+            virtual_coord[:, nloc:nall].numpy(),
+        )
+
+        # compare mapping
+        assert np.allclose(mapping_updated.shape, [nframes, nall * 2])
+        assert np.allclose(mapping_updated[:, :nloc].numpy(), mapping[:, :nloc].numpy())
+        assert np.allclose(
+            mapping_updated[:, nloc : nloc + nloc].numpy(),
+            mapping[:, :nloc].numpy() + nloc,
+        )
+        assert np.allclose(
+            mapping_updated[:, nloc + nloc : nloc + nall].numpy(),
+            mapping[:, nloc:nall].numpy(),
+        )
+        assert np.allclose(
+            mapping_updated[:, nloc + nall :].numpy(),
+            mapping[:, nloc:nall].numpy() + nloc,
+        )
+
+        # compare nlist
+        assert np.allclose(nlist_updated.shape, [nframes, nloc * 2, nnei * 2 + 1])
+        # self spin
+        assert np.allclose(
+            nlist_updated[:, :nloc, :1].numpy(), self.expected_nloc_spin_index.numpy()
+        )
+        # real and virtual neighbors
+        loc_atoms_mask = (nlist < nloc) & (nlist != -1)
+        ghost_atoms_mask = nlist >= nloc
+        real_neighbors = nlist.clone()
+        decomp.masked_add_(real_neighbors, ghost_atoms_mask, nloc)
+        # real_neighbors[ghost_atoms_mask] += nloc
+        assert np.allclose(
+            nlist_updated[:, :nloc, 1 : 1 + nnei].numpy(), real_neighbors.numpy()
+        )
+        virtual_neighbors = nlist.clone()
+        # virtual_neighbors[loc_atoms_mask] += nloc
+        decomp.masked_add_(virtual_neighbors, loc_atoms_mask, nloc)
+        # virtual_neighbors[ghost_atoms_mask] += nall
+        decomp.masked_add_(virtual_neighbors, ghost_atoms_mask, nall)
+        assert np.allclose(
+            nlist_updated[:, :nloc, 1 + nnei :].numpy(), virtual_neighbors.numpy()
+        )
+
+        # 4. test forward_lower output process
+        model_ret = self.model.backbone_model.forward_common_lower(
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping=mapping_updated,
+            do_atomic_virial=True,
+        )
+        if self.model.do_grad_r("energy"):
+            force_all = model_ret["energy_derv_r"].squeeze(-2)
+            force_real, force_mag, _ = self.model.process_spin_output_lower(
+                extended_atype, force_all, nloc
+            )
+            force_all_switched = paddle.zeros_like(force_all)
+            force_all_switched[:, :nloc] = force_all[:, :nloc]
+            force_all_switched[:, nloc:nall] = force_all[:, nloc + nloc : nloc + nall]
+            force_all_switched[:, nall : nall + nloc] = force_all[:, nloc : nloc + nloc]
+            force_all_switched[:, nall + nloc :] = force_all[:, nloc + nall :]
+            np.testing.assert_allclose(
+                force_real.numpy(),
+                (force_all_switched[:, :nall] + force_all_switched[:, nall:]).numpy(),
+            )
+            np.testing.assert_allclose(
+                force_mag.numpy(),
+                (force_all_switched[:, nall:] * virtual_scale.unsqueeze(-1)).numpy(),
+            )
+
+    def test_jit(self):
+        model = paddle.jit.to_static(self.model)
+        self.assertEqual(model.get_rcut(), self.rcut)
+        self.assertEqual(model.get_nsel(), self.nsel)
+        self.assertEqual(model.get_type_map(), self.type_map)
+
+    def test_self_consistency(self):
+        if hasattr(self, "serial_test") and not self.serial_test:
+            # not implement serialize and deserialize
+            return
+        model1 = SpinEnergyModel.deserialize(self.model.serialize())
+        result = model1(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        expected_result = self.model(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        for key in result:
+            assert np.allclose(
+                result[key].numpy(),
+                expected_result[key].numpy(),
+                rtol=self.prec,
+                atol=self.prec,
+            )
+        model1 = paddle.jit.to_static(model1)
+
+    def test_dp_consistency(self):
+        if hasattr(self, "serial_test") and not self.serial_test:
+            # not implement serialize and deserialize
+            return
+        dp_model = DPSpinModel.deserialize(self.model.serialize())
+        # test call
+        dp_ret = dp_model.call(
+            to_numpy_array(self.coord),
+            to_numpy_array(self.atype),
+            to_numpy_array(self.spin),
+            to_numpy_array(self.cell),
+        )
+        result = self.model.forward_common(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result["energy"]),
+            dp_ret["energy"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result["energy_redu"]),
+            dp_ret["energy_redu"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+
+        # test call_lower
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            self.coord,
+            self.atype,
+            self.model.get_rcut(),
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=self.cell,
+        )
+        extended_spin = decomp.take_along_axis(
+            self.spin, indices=mapping.unsqueeze(-1).tile((1, 1, 3)), axis=1
+        )
+        dp_ret_lower = dp_model.call_lower(
+            to_numpy_array(extended_coord),
+            to_numpy_array(extended_atype),
+            to_numpy_array(extended_spin),
+            to_numpy_array(nlist),
+            to_numpy_array(mapping),
+        )
+        result_lower = self.model.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            extended_spin,
+            nlist,
+            mapping,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result_lower["energy"]),
+            dp_ret_lower["energy"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result_lower["energy_redu"]),
+            dp_ret_lower["energy_redu"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_se_e2_a["descriptor"])
+        self.rcut = model_params["descriptor"]["rcut"]
+        self.nsel = sum(model_params["descriptor"]["sel"])
+        self.type_map = model_params["type_map"]
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA1(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa1["descriptor"])
+        self.rcut = model_params["descriptor"]["rcut"]
+        self.nsel = model_params["descriptor"]["sel"]
+        self.type_map = model_params["type_map"]
+        # not implement serialize and deserialize
+        self.serial_test = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA2(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa2["descriptor"])
+        self.rcut = model_params["descriptor"]["repinit"]["rcut"]
+        self.nsel = model_params["descriptor"]["repinit"]["nsel"]
+        self.type_map = model_params["type_map"]
+        # not implement serialize and deserialize
+        self.serial_test = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_env_mat.py b/source/tests/pd/model/test_env_mat.py
new file mode 100644
index 0000000000..7cbc698264
--- /dev/null
+++ b/source/tests/pd/model/test_env_mat.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils import (
+    EnvMat,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestCaseSingleFrameWithNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype="int64").reshape([1, self.nall])
+        self.mapping = np.array([0, 1, 2, 0], dtype="int64").reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.sel_mix = [7]
+        self.natoms = [3, 3, 2, 1]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype="int64",
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([2, 0, 1, 3], dtype=np.int32)
+        inv_perm = np.array([1, 2, 0, 3], dtype=np.int32)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        self.mapping = np.concatenate(
+            [self.mapping, self.mapping[:, self.perm]], axis=0
+        )
+
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype="int64").reshape(
+            [1, self.nall]
+        )
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.sel_mix = [7]
+        self.natoms = [3, 3, 2, 1]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype="int64",
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithoutNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nf, self.nt = 1, 2
+        self.coord = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nloc * 3])
+        self.atype = np.array([0, 0, 1], dtype="int64").reshape([1, self.nloc])
+        self.cell = 2.0 * np.eye(3).reshape([1, 9])
+        # sel = [5, 2]
+        self.sel = [16, 8]
+        self.sel_mix = [24]
+        self.natoms = [3, 3, 2, 1]
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        self.atol = 1e-12
+
+
+# to be merged with the tf test case
+class TestEnvMat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+        em0 = EnvMat(self.rcut, self.rcut_smth)
+        mm0, diff0, ww0 = em0.call(
+            self.coord_ext, self.atype_ext, self.nlist, davg, dstd
+        )
+        mm1, diff1, ww1 = prod_env_mat(
+            paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+            paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            paddle.to_tensor(self.atype_ext[:, :nloc], dtype="int64").to(
+                device=env.DEVICE
+            ),
+            paddle.to_tensor(davg).to(device=env.DEVICE),
+            paddle.to_tensor(dstd).to(device=env.DEVICE),
+            self.rcut,
+            self.rcut_smth,
+        )
+        np.testing.assert_allclose(mm0, mm1.detach().cpu().numpy())
+        np.testing.assert_allclose(diff0, diff1.detach().cpu().numpy())
+        np.testing.assert_allclose(ww0, ww1.detach().cpu().numpy())
+        np.testing.assert_allclose(mm0[0][self.perm[: self.nloc]], mm0[1])
diff --git a/source/tests/pd/model/test_exclusion_mask.py b/source/tests/pd/model/test_exclusion_mask.py
new file mode 100644
index 0000000000..ff479ee7db
--- /dev/null
+++ b/source/tests/pd/model/test_exclusion_mask.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestAtomExcludeMask(unittest.TestCase):
+    def test_build_type_exclude_mask(self):
+        nf = 2
+        nt = 3
+        exclude_types = [0, 2]
+        atype = np.array(
+            [
+                [0, 2, 1, 2, 0, 1, 0],
+                [1, 2, 0, 0, 2, 2, 1],
+            ],
+            dtype=np.int32,
+        ).reshape([nf, -1])
+        expected_mask = np.array(
+            [
+                [0, 0, 1, 0, 0, 1, 0],
+                [1, 0, 0, 0, 0, 0, 1],
+            ]
+        ).reshape([nf, -1])
+        des = AtomExcludeMask(nt, exclude_types=exclude_types)
+        mask = des(to_paddle_tensor(atype))
+        np.testing.assert_equal(to_numpy_array(mask), expected_mask)
+
+
+# to be merged with the tf test case
+class TestPairExcludeMask(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_build_type_exclude_mask(self):
+        exclude_types = [[0, 1]]
+        expected_mask = np.array(
+            [
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+            ]
+        ).reshape(self.nf, self.nloc, sum(self.sel))
+        des = PairExcludeMask(self.nt, exclude_types=exclude_types).to(env.DEVICE)
+        mask = des(
+            to_paddle_tensor(self.nlist),
+            to_paddle_tensor(self.atype_ext),
+        )
+        np.testing.assert_equal(to_numpy_array(mask), expected_mask)
diff --git a/source/tests/pd/model/test_fitting_net.py b/source/tests/pd/model/test_fitting_net.py
new file mode 100644
index 0000000000..9a4d4d128f
--- /dev/null
+++ b/source/tests/pd/model/test_fitting_net.py
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import re
+import unittest
+
+import numpy as np
+import paddle
+import tensorflow.compat.v1 as tf
+
+tf.disable_eager_execution()
+
+from deepmd.pd.model.task import (
+    EnergyFittingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.tf.fit.ener import (
+    EnerFitting,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+
+class FakeDescriptor:
+    def __init__(self, ntypes, embedding_width):
+        self._ntypes = ntypes
+        self._dim_out = embedding_width
+
+    def get_ntypes(self):
+        return self._ntypes
+
+    def get_dim_out(self):
+        return self._dim_out
+
+
+def gen_key(type_id, layer_id, w_or_b):
+    return (type_id, layer_id, w_or_b)
+
+
+def base_fitting_net(dp_fn, embedding, natoms, atype):
+    g = tf.Graph()
+    with g.as_default():
+        t_embedding = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        t_natoms = tf.placeholder(tf.int32, [None])
+        t_atype = tf.placeholder(tf.int32, [None, None])
+        t_energy = dp_fn.build(t_embedding, t_natoms, {"atype": t_atype})
+        init_op = tf.global_variables_initializer()
+        t_vars = {}
+        for var in tf.global_variables():
+            key = None
+            matched = re.match(r"layer_(\d)_type_(\d)/([a-z]+)", var.name)
+            if matched:
+                key = gen_key(
+                    type_id=matched.group(2),
+                    layer_id=matched.group(1),
+                    w_or_b=matched.group(3),
+                )
+            else:
+                matched = re.match(r"final_layer_type_(\d)/([a-z]+)", var.name)
+                if matched:
+                    key = gen_key(
+                        type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2)
+                    )
+            if key is not None:
+                t_vars[key] = var
+
+    with tf.Session(graph=g) as sess:
+        sess.run(init_op)
+        energy, values = sess.run(
+            [t_energy, t_vars],
+            feed_dict={
+                t_embedding: embedding,
+                t_natoms: natoms,
+                t_atype: atype,
+            },
+        )
+    tf.reset_default_graph()
+    return energy, values
+
+
+class TestFittingNet(unittest.TestCase):
+    def setUp(self):
+        nloc = 7
+        self.embedding_width = 30
+        self.natoms = np.array([nloc, nloc, 2, 5], dtype=np.int32)
+        rng = np.random.default_rng(GLOBAL_SEED)
+        self.embedding = rng.uniform(size=[4, nloc * self.embedding_width])
+        self.ntypes = self.natoms.size - 2
+        self.n_neuron = [32, 32, 32]
+        self.atype = np.zeros([4, nloc], dtype=np.int32)
+        cnt = 0
+        for i in range(self.ntypes):
+            self.atype[:, cnt : cnt + self.natoms[i + 2]] = i
+            cnt += self.natoms[i + 2]
+
+        fake_d = FakeDescriptor(2, 30)
+        self.dp_fn = EnerFitting(
+            fake_d.get_ntypes(), fake_d.get_dim_out(), self.n_neuron
+        )
+        self.dp_fn.bias_atom_e = rng.uniform(size=[self.ntypes])
+
+    def test_consistency(self):
+        dp_energy, values = base_fitting_net(
+            self.dp_fn, self.embedding, self.natoms, self.atype
+        )
+        my_fn = EnergyFittingNet(
+            self.ntypes,
+            self.embedding_width,
+            neuron=self.n_neuron,
+            bias_atom_e=self.dp_fn.bias_atom_e,
+            mixed_types=False,
+        ).to(env.DEVICE)
+        for name, param in my_fn.named_parameters():
+            matched = re.match(
+                r"filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name
+            )
+            key = None
+            if matched:
+                if int(matched.group(2)) == len(self.n_neuron):
+                    layer_id = -1
+                else:
+                    layer_id = matched.group(2)
+                key = gen_key(
+                    type_id=matched.group(1),
+                    layer_id=layer_id,
+                    w_or_b=matched.group(3),
+                )
+            assert key is not None
+            var = values[key]
+            with paddle.no_grad():
+                # Keep parameter value consistency between 2 implentations
+                paddle.assign(var, param)
+        embedding = paddle.to_tensor(self.embedding)
+        embedding = embedding.reshape([4, -1, self.embedding_width])
+        atype = paddle.to_tensor(self.atype)
+        ret = my_fn(embedding.to(env.DEVICE), atype.to(env.DEVICE))
+        my_energy = ret["energy"]
+        my_energy = my_energy.detach().cpu()
+        np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_force_grad.py b/source/tests/pd/model/test_force_grad.py
new file mode 100644
index 0000000000..d7b569ef38
--- /dev/null
+++ b/source/tests/pd/model/test_force_grad.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+
+class CheckSymmetry(DeepmdData):
+    def __init__(
+        self,
+        sys_path: str,
+        type_map: Optional[list[str]] = None,
+    ):
+        super().__init__(sys_path=sys_path, type_map=type_map)
+        self.add("energy", 1, atomic=False, must=False, high_prec=True)
+        self.add("force", 3, atomic=True, must=False, high_prec=False)
+        self.add("virial", 9, atomic=False, must=False, high_prec=False)
+
+    def get_disturb(self, index, atom_index, axis_index, delta):
+        for i in range(
+            0, len(self.dirs) + 1
+        ):  # note: if different sets can be merged, prefix sum is unused to calculate
+            if index < self.prefix_sum[i]:
+                break
+        frames = self._load_set(self.dirs[i - 1])
+        tmp = copy.deepcopy(frames["coord"].reshape(self.nframes, -1, 3))
+        tmp[:, atom_index, axis_index] += delta
+        frames["coord"] = tmp
+        frame = self._get_subdata(frames, index - self.prefix_sum[i - 1])
+        frame = self.reformat_data_torch(frame)
+        return frame
+
+
+def get_data(batch):
+    inputs = {}
+    for key in ["coord", "atype", "box"]:
+        inputs[key] = batch[key].unsqueeze(0).to(env.DEVICE)
+    return inputs
+
+
+class TestForceGrad(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.system_index = 0
+        self.batch_index = 0
+        self.get_dataset(self.system_index, self.batch_index)
+        self.get_model()
+
+    def get_model(self):
+        self.model = get_model(self.config["model"]).to(env.DEVICE)
+
+    def get_dataset(self, system_index=0, batch_index=0):
+        systems = self.config["training"]["training_data"]["systems"]
+        rcut = self.config["model"]["descriptor"]["rcut"]
+        sel = self.config["model"]["descriptor"]["sel"]
+        sec = paddle.cumsum(paddle.to_tensor(sel), axis=0)
+        type_map = self.config["model"]["type_map"]
+        self.dpdatasystem = CheckSymmetry(
+            sys_path=systems[system_index], type_map=type_map
+        )
+        self.origin_batch = self.dpdatasystem.get_item_paddle(batch_index)
+
+    @unittest.skip("it can be replaced by autodiff")
+    def test_force_grad(self, threshold=1e-2, delta0=1e-6, seed=20):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        result0 = self.model(**get_data(self.origin_batch))
+        np.random.default_rng(seed)
+        errors = np.zeros((self.dpdatasystem.natoms, 3))
+        for atom_index in range(self.dpdatasystem.natoms):
+            for axis_index in range(3):
+                delta = rng.random() * delta0
+                disturb_batch = self.dpdatasystem.get_disturb(
+                    self.batch_index, atom_index, axis_index, delta
+                )
+                disturb_result = self.model(**get_data(disturb_batch))
+                disturb_force = -(disturb_result["energy"] - result0["energy"]) / delta
+                disturb_error = (
+                    result0["force"][0, atom_index, axis_index] - disturb_force
+                )
+                errors[atom_index, axis_index] = disturb_error.detach().cpu().numpy()
+        self.assertTrue(np.abs(errors).max() < threshold, msg=str(np.abs(errors).max()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_forward_lower.py b/source/tests/pd/model/test_forward_lower.py
new file mode 100644
index 0000000000..213369ea12
--- /dev/null
+++ b/source/tests/pd/model/test_forward_lower.py
@@ -0,0 +1,202 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (  # model_dpau,
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = paddle.float64
+
+
+def reduce_tensor(extended_tensor, mapping, nloc: int):
+    nframes, nall = extended_tensor.shape[:2]
+    ext_dims = extended_tensor.shape[2:]
+    reduced_tensor = paddle.zeros(
+        [nframes, nloc, *ext_dims],
+        dtype=extended_tensor.dtype,
+    ).to(device=extended_tensor.place)
+    mldims = list(mapping.shape)
+    mapping = mapping.reshape(mldims + [1] * len(ext_dims)).expand(
+        [-1] * len(mldims) + list(ext_dims)
+    )
+    # nf x nloc x (*ext_dims)
+    reduced_tensor = decomp.scatter_reduce(
+        reduced_tensor,
+        1,
+        index=mapping,
+        src=extended_tensor,
+        reduce="sum",
+    )
+    return reduced_tensor
+
+
+class ForwardLowerTest:
+    def test(
+        self,
+    ):
+        prec = self.prec
+        natoms = 5
+        cell = 4.0 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE)
+        generator = paddle.seed(GLOBAL_SEED)
+        coord = 3.0 * paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        spin = 0.5 * paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int64).to(
+            device=env.DEVICE
+        )
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag"]
+
+        result_forward = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord.unsqueeze(0),
+            atype.unsqueeze(0),
+            self.model.get_rcut() + 1.0
+            if test_spin
+            else self.model.get_rcut(),  # buffer region for spin nlist
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=cell.unsqueeze(0),
+        )
+        extended_spin = decomp.take_along_axis(
+            spin.unsqueeze(0), indices=mapping.unsqueeze(-1).tile((1, 1, 3)), axis=1
+        )
+        input_dict = {
+            "extended_coord": extended_coord,
+            "extended_atype": extended_atype,
+            "nlist": nlist,
+            "mapping": mapping,
+            "do_atomic_virial": False,
+        }
+        if test_spin:
+            input_dict["extended_spin"] = extended_spin
+        result_forward_lower = self.model.forward_lower(**input_dict)
+        for key in test_keys:
+            if key in ["energy"]:
+                np.testing.assert_allclose(
+                    result_forward_lower[key].numpy(),
+                    result_forward[key].numpy(),
+                    rtol=prec,
+                    atol=prec,
+                )
+            elif key in ["force", "force_mag"]:
+                reduced_vv = reduce_tensor(
+                    result_forward_lower[f"extended_{key}"], mapping, natoms
+                )
+                np.testing.assert_allclose(
+                    reduced_vv.numpy(),
+                    result_forward[key].numpy(),
+                    rtol=prec,
+                    atol=prec,
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    np.testing.assert_allclose(
+                        result_forward_lower[key].numpy(),
+                        result_forward[key].numpy(),
+                        rtol=prec,
+                        atol=prec,
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_dpa1)
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_dpa2)
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_zbl)
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_spin)
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA1(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa1)["descriptor"]
+        # double sel for virtual atoms to avoid large error
+        model_params["descriptor"]["sel"] *= 2
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA2(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa2)["descriptor"]
+        # double sel for virtual atoms to avoid large error
+        model_params["descriptor"]["repinit"]["nsel"] *= 2
+        model_params["descriptor"]["repformer"]["nsel"] *= 2
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_get_model.py b/source/tests/pd/model/test_get_model.py
new file mode 100644
index 0000000000..7ace7c4e43
--- /dev/null
+++ b/source/tests/pd/model/test_get_model.py
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+dtype = paddle.float64
+
+model_se_e2_a = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+    "atom_exclude_types": [1],
+    "pair_exclude_types": [[1, 2]],
+    "preset_out_bias": {
+        "energy": [
+            None,
+            [1.0],
+            [3.0],
+        ]
+    },
+}
+
+
+class TestGetModel(unittest.TestCase):
+    def test_model_attr(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.model = get_model(model_params).to(env.DEVICE)
+        atomic_model = self.model.atomic_model
+        self.assertEqual(atomic_model.type_map, ["O", "H", "B"])
+        self.assertEqual(
+            atomic_model.preset_out_bias,
+            {
+                "energy": [
+                    None,
+                    np.array([1.0]),
+                    np.array([3.0]),
+                ]
+            },
+        )
+        self.assertEqual(atomic_model.atom_exclude_types, [1])
+        self.assertEqual(atomic_model.pair_exclude_types, [[1, 2]])
+
+    def test_model_attr_energy_float(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        model_params["preset_out_bias"] = {"energy": ["1.", 3, None]}
+        self.model = get_model(model_params).to(env.DEVICE)
+        atomic_model = self.model.atomic_model
+        self.assertEqual(atomic_model.type_map, ["O", "H", "B"])
+        self.assertEqual(
+            atomic_model.preset_out_bias,
+            {
+                "energy": [
+                    np.array([1.0]),
+                    np.array([3.0]),
+                    None,
+                ]
+            },
+        )
+        self.assertEqual(atomic_model.atom_exclude_types, [1])
+        self.assertEqual(atomic_model.pair_exclude_types, [[1, 2]])
+
+    def test_model_attr_energy_unsupported_type(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        model_params["preset_out_bias"] = {"energy": [1.0 + 2.0j, 3, None]}
+        with self.assertRaises(ValueError):
+            self.model = get_model(model_params).to(env.DEVICE)
+
+    def test_model_attr_energy_unsupported_value(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        model_params["preset_out_bias"] = {"energy": ["1.0 + 2.0j", 3, None]}
+        with self.assertRaises(ValueError):
+            self.model = get_model(model_params).to(env.DEVICE)
+
+    def test_notset_model_attr(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        model_params.pop("atom_exclude_types")
+        model_params.pop("pair_exclude_types")
+        model_params.pop("preset_out_bias")
+        self.model = get_model(model_params).to(env.DEVICE)
+        atomic_model = self.model.atomic_model
+        self.assertEqual(atomic_model.type_map, ["O", "H", "B"])
+        self.assertEqual(atomic_model.preset_out_bias, None)
+        self.assertEqual(atomic_model.atom_exclude_types, [])
+        self.assertEqual(atomic_model.pair_exclude_types, [])
+
+    def test_preset_wrong_len(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        model_params["preset_out_bias"] = {"energy": [None]}
+        with self.assertRaises(ValueError):
+            self.model = get_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pd/model/test_jit.py b/source/tests/pd/model/test_jit.py
new file mode 100644
index 0000000000..f4f755d2eb
--- /dev/null
+++ b/source/tests/pd/model/test_jit.py
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import paddle
+from paddle.static import (
+    InputSpec,
+)
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.infer import (
+    inference,
+)
+
+from .test_permutation import (
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+)
+
+
+class JITTest:
+    def test_jit(self):
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        paddle.set_flags(
+            {
+                "FLAGS_save_cf_stack_op": 1,
+                "FLAGS_prim_enable_dynamic": 1,
+                "FLAGS_enable_pir_api": 1,
+            }
+        )
+        model = paddle.jit.to_static(
+            inference.Tester("./model.pd").model, full_graph=True
+        )
+        paddle.jit.save(
+            model,
+            "./frozen_model",
+            input_spec=[
+                InputSpec([-1, -1, 3], dtype="float64"),
+                InputSpec([-1, -1], dtype="int32"),
+                InputSpec([-1, -1, -1], dtype="int32"),
+            ],
+        )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith("pt"):
+                os.remove(f)
+            if f in ["lcurve.out", "frozen_model.json"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+            if f in ["checkpoint"]:
+                os.remove(f)
+
+
+class TestEnergyModelSeA(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestDOSModelSeA(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent.parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent.parent / "dos/data/global_system")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+@unittest.skip("generated_tensor_2553 can not when jit.save")
+class TestEnergyModelHybrid(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+@unittest.skip("generated_tensor_2553 can not when jit.save")
+class TestEnergyModelHybrid2(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        # self.config["model"]["descriptor"]["hybrid_mode"] = "sequential"
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_linear_atomic_model.py b/source/tests/pd/model/test_linear_atomic_model.py
new file mode 100644
index 0000000000..3e9c916a90
--- /dev/null
+++ b/source/tests/pd/model/test_linear_atomic_model.py
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.atomic_model import (
+    DPZBLLinearEnergyAtomicModel as DPDPZBLLinearEnergyAtomicModel,
+)
+from deepmd.pd.model.atomic_model import (
+    DPAtomicModel,
+    DPZBLLinearEnergyAtomicModel,
+    PairTabAtomicModel,
+)
+from deepmd.pd.model.descriptor import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.model import (
+    DPZBLModel,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestWeightCalculation(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_pairwise(self, mock_loadtxt):
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.05, 1.0, 2.0, 3.0],
+                [0.1, 0.8, 1.6, 2.4],
+                [0.15, 0.5, 1.0, 1.5],
+                [0.2, 0.25, 0.4, 0.75],
+                [0.25, 0.0, 0.0, 0.0],
+            ]
+        )
+        extended_atype = paddle.to_tensor([[0, 0]]).to(device=env.DEVICE)
+        nlist = paddle.to_tensor([[[1], [-1]]]).to(device=env.DEVICE)
+
+        ds = DescrptDPA1(
+            rcut_smth=0.3,
+            rcut=0.4,
+            sel=[3],
+            ntypes=2,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            2,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+
+        type_map = ["foo", "bar"]
+        zbl_model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.3, sel=2, type_map=type_map[::-1]
+        )
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        wgt_model = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        wgt_res = []
+        for dist in np.linspace(0.05, 0.3, 10):
+            extended_coord = paddle.to_tensor(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ],
+                dtype=paddle.float64,
+                place=env.DEVICE,
+            )
+
+            wgt_model.forward_atomic(extended_coord, extended_atype, nlist)
+
+            wgt_res.append(wgt_model.zbl_weight)
+        results = paddle.stack(wgt_res).reshape([10, 2])
+        excepted_res = paddle.to_tensor(
+            [
+                [1.0, 0.0],
+                [1.0, 0.0],
+                [0.9995, 0.0],
+                [0.9236, 0.0],
+                [0.6697, 0.0],
+                [0.3303, 0.0],
+                [0.0764, 0.0],
+                [0.0005, 0.0],
+                [0.0, 0.0],
+                [0.0, 0.0],
+            ],
+            dtype=paddle.float64,
+            place=env.DEVICE,
+        )
+        np.testing.assert_allclose(
+            results.numpy(), excepted_res.numpy(), rtol=0.0001, atol=0.0001
+        )
+
+
+class TestIntegration(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        zbl_model = PairTabAtomicModel(
+            file_path, self.rcut, sum(self.sel), type_map=type_map
+        )
+        self.md0 = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        self.md1 = DPZBLLinearEnergyAtomicModel.deserialize(self.md0.serialize()).to(
+            env.DEVICE
+        )
+        self.md2 = DPDPZBLLinearEnergyAtomicModel.deserialize(self.md0.serialize())
+        self.md3 = DPZBLModel(
+            dp_model, zbl_model, sw_rmin=0.1, sw_rmax=0.25, type_map=type_map
+        )
+
+    def test_self_consistency(self):
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = self.md0.forward_atomic(*args)
+        ret1 = self.md1.forward_atomic(*args)
+        ret2 = self.md2.forward_atomic(self.coord_ext, self.atype_ext, self.nlist)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+        )
+
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]), ret2["energy"], atol=0.001, rtol=0.001
+        )
+
+    def test_jit(self):
+        md1 = paddle.jit.to_static(self.md1)
+        # atomic model no more export methods
+        # self.assertEqual(md1.get_rcut(), self.rcut)
+        # self.assertEqual(md1.get_type_map(), ["foo", "bar"])
+        md3 = paddle.jit.to_static(self.md3)
+        # atomic model no more export methods
+        # self.assertEqual(md3.get_rcut(), self.rcut)
+        # self.assertEqual(md3.get_type_map(), ["foo", "bar"])
+
+
+class TestRemmapMethod(unittest.TestCase):
+    def test_valid(self):
+        generator = paddle.seed(GLOBAL_SEED)
+        atype = paddle.randint(0, 3, (4, 20)).to(device=env.DEVICE)
+        commonl = ["H", "O", "S"]
+        originl = ["Si", "H", "O", "S"]
+        mapping = DPZBLLinearEnergyAtomicModel.remap_atype(originl, commonl)
+        new_atype = mapping[atype]
+
+        def trans(atype, map):
+            idx = atype.flatten().tolist()
+            res = []
+            for i in idx:
+                res.append(map[i])
+            return res
+
+        assert trans(atype, commonl) == trans(new_atype, originl)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/pd/model/test_linear_atomic_model_stat.py b/source/tests/pd/model/test_linear_atomic_model_stat.py
new file mode 100644
index 0000000000..086a2e20de
--- /dev/null
+++ b/source/tests/pd/model/test_linear_atomic_model_stat.py
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import h5py
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pd.model.atomic_model import (
+    DPAtomicModel,
+    LinearEnergyAtomicModel,
+)
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class FooFittingA(paddle.nn.Layer, BaseFitting):
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        raise NotImplementedError
+
+    def get_type_map(self) -> list[str]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nf, nloc, _ = descriptor.shape
+        ret = {}
+        ret["energy"] = (
+            paddle.to_tensor(
+                [
+                    [1.0, 2.0, 3.0],
+                    [4.0, 5.0, 6.0],
+                ]
+            )
+            .reshape([nf, nloc, *self.output_def()["energy"].shape])
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+
+        return ret
+
+
+class FooFittingB(paddle.nn.Layer, BaseFitting):
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reducible=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        raise NotImplementedError
+
+    def get_type_map(self) -> list[str]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nf, nloc, _ = descriptor.shape
+        ret = {}
+        ret["energy"] = (
+            paddle.to_tensor(
+                [
+                    [7.0, 8.0, 9.0],
+                    [10.0, 11.0, 12.0],
+                ]
+            )
+            .reshape([nf, nloc, *self.output_def()["energy"].shape])
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+
+        return ret
+
+
+class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        nf, nloc, nnei = self.nlist.shape
+        self.merged_output_stat = [
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 1, 3
+                "energy": to_paddle_tensor(np.array([5.0, 7.0]).reshape(2, 1)),
+                "find_energy": np.float32(1.0),
+            }
+        ]
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def test_linear_atomic_model_stat_with_bias(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft_a = FooFittingA().to(env.DEVICE)
+        ft_b = FooFittingB().to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(
+            ds,
+            ft_a,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        md1 = DPAtomicModel(
+            ds,
+            ft_b,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        linear_model = LinearEnergyAtomicModel([md0, md1], type_map=type_map).to(
+            env.DEVICE
+        )
+
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = linear_model.forward_common_atomic(*args)
+
+        ret0 = to_numpy_array(ret0["energy"])
+        ret_no_bias = []
+        for md in linear_model.models:
+            ret_no_bias.append(
+                to_numpy_array(md.forward_common_atomic(*args)["energy"])
+            )
+        expected_ret0 = np.array(
+            [
+                [4.0, 5.0, 6.0],
+                [7.0, 8.0, 9.0],
+            ]
+        ).reshape(nf, nloc, *linear_model.fitting_output_def()["energy"].shape)
+
+        np.testing.assert_almost_equal(ret0, expected_ret0)
+
+        # 2. test bias is applied
+        linear_model.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        # bias applied to sub atomic models.
+        ener_bias = np.array([1.0, 3.0]).reshape(2, 1)
+        linear_ret = []
+        for idx, md in enumerate(linear_model.models):
+            ret = md.forward_common_atomic(*args)
+            ret = to_numpy_array(ret["energy"])
+            linear_ret.append(ret_no_bias[idx] + ener_bias[at])
+            np.testing.assert_almost_equal((ret_no_bias[idx] + ener_bias[at]), ret)
+
+        # linear model not adding bias again
+        ret1 = linear_model.forward_common_atomic(*args)
+        ret1 = to_numpy_array(ret1["energy"])
+        np.testing.assert_almost_equal(np.mean(np.stack(linear_ret), axis=0), ret1)
diff --git a/source/tests/pd/model/test_make_hessian_model.py b/source/tests/pd/model/test_make_hessian_model.py
new file mode 100644
index 0000000000..79a7c4f163
--- /dev/null
+++ b/source/tests/pd/model/test_make_hessian_model.py
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.output_def import (
+    OutputVariableCategory,
+)
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.model import (
+    EnergyModel,
+    make_hessian_model,
+)
+from deepmd.pd.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+dtype = paddle.float64
+
+
+def finite_hessian(f, x, delta=1e-6):
+    in_shape = x.shape
+    assert len(in_shape) == 1
+    y0 = f(x)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape + in_shape)
+    for iidx in np.ndindex(*in_shape):
+        for jidx in np.ndindex(*in_shape):
+            i0 = np.zeros(in_shape)
+            i1 = np.zeros(in_shape)
+            i2 = np.zeros(in_shape)
+            i3 = np.zeros(in_shape)
+            i0[iidx] += delta
+            i2[iidx] += delta
+            i1[iidx] -= delta
+            i3[iidx] -= delta
+            i0[jidx] += delta
+            i1[jidx] += delta
+            i2[jidx] -= delta
+            i3[jidx] -= delta
+            y0 = f(x + i0)
+            y1 = f(x + i1)
+            y2 = f(x + i2)
+            y3 = f(x + i3)
+            res[(Ellipsis, *iidx, *jidx)] = (y0 + y3 - y1 - y2) / (4 * delta**2.0)
+    return res
+
+
+class HessianTest:
+    def test(
+        self,
+    ):
+        # setup test case
+        places = 6
+        delta = 1e-3
+        natoms = self.nloc
+        nf = self.nf
+        nv = self.nv
+        generator = paddle.seed(GLOBAL_SEED)
+        cell0 = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell0 = 1.0 * (cell0 + cell0.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        cell1 = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell1 = 1.0 * (cell1 + cell1.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        cell = paddle.stack([cell0, cell1])
+        coord = paddle.rand([nf, natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        cell = cell.reshape([nf, 9])
+        coord = coord.reshape([nf, natoms * 3])
+        atype = (
+            paddle.stack(
+                [
+                    paddle.to_tensor([0, 0, 1]),
+                    paddle.to_tensor([1, 0, 1]),
+                ]
+            )
+            .reshape([nf, natoms])
+            .to(env.DEVICE)
+        )
+        nfp, nap = 2, 3
+        fparam = paddle.rand([nf, nfp], dtype=dtype).to(device=env.DEVICE)
+        aparam = paddle.rand([nf, natoms * nap], dtype=dtype).to(device=env.DEVICE)
+        # forward hess and valu models
+        ret_dict0 = self.model_hess.forward_common(
+            coord, atype, box=cell, fparam=fparam, aparam=aparam
+        )
+        ret_dict1 = self.model_valu.forward_common(
+            coord, atype, box=cell, fparam=fparam, aparam=aparam
+        )
+        # compare hess and value models
+        np.testing.assert_allclose(
+            ret_dict0["energy"].numpy(), ret_dict1["energy"].numpy()
+        )
+        ana_hess = ret_dict0["energy_derv_r_derv_r"]
+
+        # compute finite difference
+        fnt_hess = []
+        for ii in range(nf):
+
+            def np_infer(
+                xx,
+            ):
+                ret = self.model_valu.forward_common(
+                    to_paddle_tensor(xx).unsqueeze(0),
+                    atype[ii].unsqueeze(0),
+                    box=cell[ii].unsqueeze(0),
+                    fparam=fparam[ii].unsqueeze(0),
+                    aparam=aparam[ii].unsqueeze(0),
+                )
+                # detach
+                ret = {kk: to_numpy_array(ret[kk]) for kk in ret}
+                return ret
+
+            def ff(xx):
+                return np_infer(xx)["energy_redu"]
+
+            xx = to_numpy_array(coord[ii])
+            fnt_hess.append(finite_hessian(ff, xx, delta=delta).squeeze())
+
+        # compare finite difference with autodiff
+        fnt_hess = np.stack(fnt_hess).reshape([nf, nv, natoms * 3, natoms * 3])
+        np.testing.assert_almost_equal(
+            fnt_hess, to_numpy_array(ana_hess), decimal=places
+        )
+
+
+@unittest.skip("Skip temporarily")
+class TestDPModel(unittest.TestCase, HessianTest):
+    def setUp(self):
+        paddle.seed(2)
+        self.nf = 2
+        self.nloc = 3
+        self.rcut = 4.0
+        self.rcut_smth = 3.0
+        self.sel = [10, 10]
+        self.nt = 2
+        self.nv = 2
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            neuron=[2, 4, 8],
+            axis_neuron=2,
+        ).to(env.DEVICE)
+        ft0 = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            self.nv,
+            mixed_types=ds.mixed_types(),
+            do_hessian=True,
+            neuron=[4, 4, 4],
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        self.model_hess = make_hessian_model(EnergyModel)(
+            ds, ft0, type_map=type_map
+        ).to(env.DEVICE)
+        self.model_valu = EnergyModel.deserialize(self.model_hess.serialize())
+        self.model_hess.requires_hessian("energy")
+
+    def test_output_def(self):
+        self.assertTrue(self.model_hess.atomic_output_def()["energy"].r_hessian)
+        self.assertFalse(self.model_valu.atomic_output_def()["energy"].r_hessian)
+        self.assertTrue(self.model_hess.model_output_def()["energy"].r_hessian)
+        self.assertEqual(
+            self.model_hess.model_output_def()["energy_derv_r_derv_r"].category,
+            OutputVariableCategory.DERV_R_DERV_R,
+        )
diff --git a/source/tests/pd/model/test_mlp.py b/source/tests/pd/model/test_mlp.py
new file mode 100644
index 0000000000..90653644d3
--- /dev/null
+++ b/source/tests/pd/model/test_mlp.py
@@ -0,0 +1,283 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.utils import EmbeddingNet as DPEmbeddingNet
+from deepmd.dpmodel.utils import FittingNet as DPFittingNet
+from deepmd.dpmodel.utils import (
+    NativeLayer,
+    NativeNet,
+)
+from deepmd.pd.model.network.mlp import (
+    MLP,
+    EmbeddingNet,
+    FittingNet,
+    MLPLayer,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+
+def get_tols(prec):
+    if prec in ["single", "float32"]:
+        rtol, atol = 0.0, 1e-4
+    elif prec in ["double", "float64"]:
+        rtol, atol = 0.0, 1e-12
+    # elif prec in ["half", "float16"]:
+    #   rtol, atol=1e-2, 0
+    else:
+        raise ValueError(f"unknown prec {prec}")
+    return rtol, atol
+
+
+class TestMLPLayer(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [(5, 5), (5, 10), (5, 8), (8, 5)],  # inp, out
+            [True, False],  # bias
+            [True, False],  # use time step
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet
+            [None, [4], [3, 2]],  # prefix shapes
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_native_layer(
+        self,
+    ):
+        for (ninp, nout), bias, ut, ac, resnet, ashp, prec in self.test_cases:
+            # input
+            inp_shap = [ninp]
+            if ashp is not None:
+                inp_shap = ashp + inp_shap
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = (
+                paddle.arange(np.prod(inp_shap), dtype=dtype)
+                .to(device=env.DEVICE)
+                .reshape(inp_shap)
+            )
+            # def mlp layer
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(
+                env.DEVICE
+            )
+            # check consistency
+            nl = NativeLayer.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = MLPLayer.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+
+    def test_jit(self):
+        for (ninp, nout), bias, ut, ac, resnet, _, prec in self.test_cases:
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec)
+            model = paddle.jit.to_static(ml)
+            ml1 = MLPLayer.deserialize(ml.serialize())
+            model = paddle.jit.to_static(ml1)
+
+
+class TestMLP(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [[2, 2, 4, 8], [1, 3, 3]],  # inp and hiddens
+            [True, False],  # bias
+            [True, False],  # use time step
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet
+            [None, [4], [3, 2]],  # prefix shapes
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_native_net(
+        self,
+    ):
+        for ndims, bias, ut, ac, resnet, ashp, prec in self.test_cases:
+            # input
+            inp_shap = [ndims[0]]
+            if ashp is not None:
+                inp_shap = ashp + inp_shap
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = (
+                paddle.arange(np.prod(inp_shap), dtype=dtype)
+                .to(device=env.DEVICE)
+                .reshape(inp_shap)
+            )
+            # def MLP
+            layers = []
+            for ii in range(1, len(ndims)):
+                layers.append(
+                    MLPLayer(
+                        ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec
+                    ).serialize()
+                )
+            ml = MLP(layers).to(env.DEVICE)
+            # check consistency
+            nl = NativeNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = MLP.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+
+    def test_jit(self):
+        for ndims, bias, ut, ac, resnet, _, prec in self.test_cases:
+            layers = []
+            for ii in range(1, len(ndims)):
+                ml = layers.append(
+                    MLPLayer(
+                        ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec
+                    ).serialize()
+                )
+            ml = MLP(ml)
+            model = paddle.jit.to_static(ml)
+            ml1 = MLP.deserialize(ml.serialize())
+            model = paddle.jit.to_static(ml1)
+
+
+class TestEmbeddingNet(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [1, 3],  # inp
+            [[24, 48, 96], [24, 36]],  # and hiddens
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet_dt
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_embedding_net(
+        self,
+    ):
+        for idim, nn, act, idt, prec in self.test_cases:
+            # input
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = paddle.arange(idim, dtype=dtype).to(device=env.DEVICE)
+            # def MLP
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
+            # check consistency
+            nl = DPEmbeddingNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+
+    def test_jit(
+        self,
+    ):
+        for idim, nn, act, idt, prec in self.test_cases:
+            # def MLP
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            model = paddle.jit.to_static(ml)
+            model = paddle.jit.to_static(ml1)
+
+
+class TestFittingNet(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [1, 3],  # inp
+            [1, 5],  # out
+            [[24, 48, 96], [24, 36]],  # and hiddens
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet_dt
+            ["float32", "double"],  # precision
+            [True, False],  # bias_out
+        )
+
+    def test_match_fitting_net(
+        self,
+    ):
+        for idim, odim, nn, act, idt, prec, ob in self.test_cases:
+            # input
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = paddle.arange(idim, dtype=dtype).to(device=env.DEVICE)
+            # def MLP
+            ml = FittingNet(
+                idim,
+                odim,
+                neuron=nn,
+                activation_function=act,
+                resnet_dt=idt,
+                precision=prec,
+                bias_out=ob,
+            ).to(env.DEVICE)
+            # check consistency
+            nl = DPFittingNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+
+    def test_jit(
+        self,
+    ):
+        for idim, odim, nn, act, idt, prec, ob in self.test_cases:
+            # def MLP
+            ml = FittingNet(
+                idim,
+                odim,
+                neuron=nn,
+                activation_function=act,
+                resnet_dt=idt,
+                precision=prec,
+                bias_out=ob,
+            ).to(env.DEVICE)
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            model = paddle.jit.to_static(ml)
+            model = paddle.jit.to_static(ml1)
diff --git a/source/tests/pd/model/test_model.py b/source/tests/pd/model/test_model.py
new file mode 100644
index 0000000000..1bdc1aa74d
--- /dev/null
+++ b/source/tests/pd/model/test_model.py
@@ -0,0 +1,424 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import collections
+import json
+import unittest
+
+import numpy as np
+import paddle
+import tensorflow.compat.v1 as tf
+
+from deepmd.pd.utils import (
+    env,
+)
+
+tf.disable_eager_execution()
+
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pd.utils.env import (
+    DEVICE,
+)
+from deepmd.pd.utils.learning_rate import LearningRateExp as MyLRExp
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
+from deepmd.tf.fit import (
+    EnerFitting,
+)
+from deepmd.tf.loss import (
+    EnerStdLoss,
+)
+from deepmd.tf.model import (
+    EnerModel,
+)
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.tf.utils.learning_rate import (
+    LearningRateExp,
+)
+
+from ..test_finetune import (
+    energy_data_requirement,
+)
+
+VariableState = collections.namedtuple("VariableState", ["value", "gradient"])
+
+
+def paddle2tf(paddle_name, last_layer_id=None):
+    fields = paddle_name.split(".")
+    offset = int(fields[3] == "networks") + 1
+    element_id = int(fields[2 + offset])
+    if fields[1] == "descriptor":
+        layer_id = int(fields[4 + offset]) + 1
+        weight_type = fields[5 + offset]
+        ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
+    elif fields[1] == "fitting_net":
+        layer_id = int(fields[4 + offset])
+        weight_type = fields[5 + offset]
+        if layer_id != last_layer_id:
+            ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
+        else:
+            ret = "final_layer_type_%d/%s:0" % (element_id, weight_type)
+    else:
+        raise RuntimeError(f"Unexpected parameter name: {paddle_name}")
+    return ret
+
+
+class DpTrainer:
+    def __init__(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        self.batch_size = config["training"]["training_data"]["batch_size"]
+        self.type_map = model_config["type_map"]
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.n_neuron = model_config["fitting_net"]["neuron"]
+        self.data_stat_nbatch = 3
+        self.start_lr = 0.001
+        self.stop_lr = 3.51e-8
+        self.decay_steps = 500
+        self.stop_steps = 1600
+        self.start_pref_e = 1.0
+        self.limit_pref_e = 2.0
+        self.start_pref_f = 2.0
+        self.limit_pref_f = 1.0
+        self.ntypes = len(self.type_map)
+
+    def get_intermediate_state(self, num_steps=1):
+        dp_model = self._get_dp_model()
+        dp_loss = self._get_dp_loss()
+        dp_lr = self._get_dp_lr()
+        dp_ds = self._get_dp_dataset()
+        dp_ds.add_data_requirements(dp_model.input_requirement)
+        dp_ds.add_data_requirements(dp_loss.label_requirement)
+        dp_model.data_stat(dp_ds)
+
+        # Build graph
+        g = tf.Graph()
+        with g.as_default():
+            place_holders = self._get_dp_placeholders(dp_ds)
+            model_pred = dp_model.build(
+                coord_=place_holders["coord"],
+                atype_=place_holders["type"],
+                natoms=place_holders["natoms_vec"],
+                box=place_holders["box"],
+                mesh=place_holders["default_mesh"],
+                input_dict=place_holders,
+            )
+            global_step = tf.train.get_or_create_global_step()
+            learning_rate = dp_lr.build(global_step, self.stop_steps)
+            l2_l, _ = dp_loss.build(
+                learning_rate=learning_rate,
+                natoms=place_holders["natoms_vec"],
+                model_dict=model_pred,
+                label_dict=place_holders,
+                suffix="test",
+            )
+            t_vars = tf.trainable_variables()
+            optimizer = tf.train.AdamOptimizer(learning_rate)
+            t_grad_and_vars = optimizer.compute_gradients(l2_l, t_vars)
+            train_op = optimizer.apply_gradients(t_grad_and_vars, global_step)
+            init_op = tf.global_variables_initializer()
+            t_heads = {
+                "loss": l2_l,
+                "energy": model_pred["energy"],
+                "force": model_pred["force"],
+                "virial": model_pred["virial"],
+                "atom_virial": model_pred["atom_virial"],
+            }
+
+        # Get statistics of each component
+        stat_dict = {
+            "descriptor.mean": dp_model.descrpt.davg,
+            "descriptor.stddev": dp_model.descrpt.dstd,
+            "fitting_net.bias_atom_e": dp_model.fitting.bias_atom_e,
+        }
+
+        # Get variables and their gradients
+        with tf.Session(graph=g) as sess:
+            sess.run(init_op)
+            for _ in range(num_steps):
+                batch = dp_ds.get_batch()
+                feeds = self._get_feed_dict(batch, place_holders)
+                sess.run(train_op, feed_dict=feeds)
+
+            batch = dp_ds.get_batch()
+            feeds = self._get_feed_dict(batch, place_holders)
+            grads_and_vars, head_dict = sess.run(
+                [t_grad_and_vars, t_heads], feed_dict=feeds
+            )
+            vs_dict = {}
+            for idx, one in enumerate(t_vars):
+                grad, var = grads_and_vars[idx]
+                vs_dict[one.name] = VariableState(var, grad)
+
+        tf.reset_default_graph()
+        # Used for reproducing
+        return batch, head_dict, stat_dict, vs_dict
+
+    def _get_dp_dataset(self):
+        data = DeepmdDataSystem(
+            systems=self.systems,
+            batch_size=self.batch_size,
+            test_size=1,
+            rcut=self.rcut,
+            type_map=self.type_map,
+            trn_all_set=True,
+        )
+        return data
+
+    def _get_dp_model(self):
+        dp_descrpt = DescrptSeA_tf(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+        )
+        dp_fitting = EnerFitting(
+            dp_descrpt.get_ntypes(), dp_descrpt.get_dim_out(), neuron=self.n_neuron
+        )
+        return EnerModel(
+            dp_descrpt,
+            dp_fitting,
+            type_map=self.type_map,
+            data_stat_nbatch=self.data_stat_nbatch,
+        )
+
+    def _get_dp_loss(self):
+        return EnerStdLoss(
+            starter_learning_rate=self.start_lr,
+            start_pref_e=self.start_pref_e,
+            limit_pref_e=self.limit_pref_e,
+            start_pref_f=self.start_pref_f,
+            limit_pref_f=self.limit_pref_f,
+        )
+
+    def _get_dp_lr(self):
+        return LearningRateExp(
+            start_lr=self.start_lr, stop_lr=self.stop_lr, decay_steps=self.decay_steps
+        )
+
+    def _get_dp_placeholders(self, dataset):
+        place_holders = {}
+        data_dict = dataset.get_data_dict()
+        for kk in data_dict.keys():
+            if kk == "type":
+                continue
+            prec = tf.float64
+            place_holders[kk] = tf.placeholder(prec, [None], name="t_" + kk)
+            place_holders["find_" + kk] = tf.placeholder(
+                tf.float32, name="t_find_" + kk
+            )
+        place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type")
+        place_holders["natoms_vec"] = tf.placeholder(
+            tf.int32, [self.ntypes + 2], name="t_natoms"
+        )
+        place_holders["default_mesh"] = tf.placeholder(tf.int32, [None], name="t_mesh")
+        place_holders["is_training"] = tf.placeholder(tf.bool)
+        return place_holders
+
+    def _get_feed_dict(self, batch, place_holders):
+        feed_dict = {}
+        for kk in batch.keys():
+            if kk == "find_type" or kk == "type":
+                continue
+            if "find_" in kk:
+                feed_dict[place_holders[kk]] = batch[kk]
+            else:
+                feed_dict[place_holders[kk]] = np.reshape(batch[kk], [-1])
+        for ii in ["type"]:
+            feed_dict[place_holders[ii]] = np.reshape(batch[ii], [-1])
+        for ii in ["natoms_vec", "default_mesh"]:
+            feed_dict[place_holders[ii]] = batch[ii]
+        feed_dict[place_holders["is_training"]] = True
+        return feed_dict
+
+
+class TestEnergy(unittest.TestCase):
+    def setUp(self):
+        self.dp_trainer = DpTrainer()
+        self.wanted_step = 0
+        for key in dir(self.dp_trainer):
+            if not key.startswith("_") or key == "get_intermediate_state":
+                value = getattr(self.dp_trainer, key)
+                setattr(self, key, value)
+
+    def test_consistency(self):
+        batch, head_dict, stat_dict, vs_dict = self.dp_trainer.get_intermediate_state(
+            self.wanted_step
+        )
+        # Build DeePMD graph
+        my_ds = DpLoaderSet(self.systems, self.batch_size, self.type_map)
+        my_ds.add_data_requirement(energy_data_requirement)
+        my_model = get_model(
+            model_params={
+                "descriptor": {
+                    "type": "se_e2_a",
+                    "sel": self.sel,
+                    "rcut_smth": self.rcut_smth,
+                    "rcut": self.rcut,
+                    "neuron": self.filter_neuron,
+                    "axis_neuron": self.axis_neuron,
+                },
+                "fitting_net": {"neuron": self.n_neuron, "mixed_types": False},
+                "data_stat_nbatch": self.data_stat_nbatch,
+                "type_map": self.type_map,
+            },
+        )
+        my_model.to(DEVICE)
+        my_lr = MyLRExp(self.start_lr, self.stop_lr, self.decay_steps, self.stop_steps)
+        my_loss = EnergyStdLoss(
+            starter_learning_rate=self.start_lr,
+            start_pref_e=self.start_pref_e,
+            limit_pref_e=self.limit_pref_e,
+            start_pref_f=self.start_pref_f,
+            limit_pref_f=self.limit_pref_f,
+        )
+
+        # Keep statistics consistency between 2 implentations
+        my_em = my_model.get_descriptor()
+        mean = stat_dict["descriptor.mean"].reshape([self.ntypes, my_em.get_nsel(), 4])
+        stddev = stat_dict["descriptor.stddev"].reshape(
+            [self.ntypes, my_em.get_nsel(), 4]
+        )
+        my_em.set_stat_mean_and_stddev(
+            paddle.to_tensor(mean).to(device=DEVICE),
+            paddle.to_tensor(stddev).to(device=DEVICE),
+        )
+        my_model.get_fitting_net().bias_atom_e = paddle.to_tensor(
+            stat_dict["fitting_net.bias_atom_e"], place=DEVICE
+        )
+
+        # Keep parameter value consistency between 2 implentations
+        for name, param in my_model.named_parameters():
+            name = name.replace("sea.", "")
+            var_name = paddle2tf(name, last_layer_id=len(self.n_neuron))
+            var = vs_dict[var_name].value
+            with paddle.no_grad():
+                src = paddle.to_tensor(var)
+                dst = param
+                # print(name)
+                # print(src.mean(), src.std())
+                # print(dst.mean(), dst.std())
+                paddle.assign(src, dst)
+        # Start forward computing
+        tmp = np.copy(batch["natoms_vec"])
+        batch = my_ds.systems[0]._data_system._get_subdata(batch, 0)
+        batch = my_ds.systems[0]._data_system.reformat_data_torch(batch)
+        for key in ["coord", "atype", "box", "energy", "force"]:
+            batch[key] = paddle.to_tensor(batch[key]).to(device=env.DEVICE)
+            batch[key] = batch[key].unsqueeze(0)
+        batch["coord"].stop_gradient = False
+        batch["natoms_vec"] = tmp
+        batch["natoms"] = paddle.to_tensor(
+            batch["natoms_vec"], place=batch["coord"].place
+        ).unsqueeze(0)
+        model_input = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": True,
+        }
+        model_input_1 = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": False,
+        }
+        label = {
+            "energy": batch["energy"].to(env.DEVICE),
+            "find_energy": 1.0,
+            "force": batch["force"].to(env.DEVICE),
+            "find_force": 1.0,
+        }
+        cur_lr = my_lr.value(self.wanted_step)
+        model_predict, loss, _ = my_loss(
+            model_input, my_model, label, int(batch["natoms"][0, 0]), cur_lr
+        )
+        model_predict_1 = my_model(**model_input_1)
+        p_energy, p_force, p_virial, p_atomic_virial = (
+            model_predict["energy"],
+            model_predict["force"],
+            model_predict["virial"],
+            model_predict["atom_virial"],
+        )
+        np.testing.assert_allclose(
+            head_dict["energy"], p_energy.reshape([-1]).cpu().detach().numpy()
+        )
+        np.testing.assert_allclose(
+            head_dict["force"],
+            p_force.reshape(head_dict["force"].shape).cpu().detach().numpy(),
+        )
+        rtol = 1e-5
+        atol = 1e-8
+        np.testing.assert_allclose(
+            head_dict["loss"], loss.cpu().detach().numpy(), rtol=rtol, atol=atol
+        )
+        np.testing.assert_allclose(
+            head_dict["virial"],
+            p_virial.reshape(head_dict["virial"].shape).cpu().detach().numpy(),
+        )
+        np.testing.assert_allclose(
+            head_dict["virial"],
+            model_predict_1["virial"]
+            .reshape([*head_dict["virial"].shape])
+            .cpu()
+            .detach()
+            .numpy(),
+        )
+        self.assertIsNone(model_predict_1.get("atom_virial", None))
+        np.testing.assert_allclose(
+            head_dict["atom_virial"],
+            p_atomic_virial.reshape(head_dict["atom_virial"].shape)
+            .cpu()
+            .detach()
+            .numpy(),
+        )
+        optimizer = paddle.optimizer.Adam(
+            learning_rate=cur_lr, parameters=my_model.parameters()
+        )
+        optimizer.clear_grad()
+
+        def step(step_id):
+            bdata = self.training_data.get_trainning_batch()
+            optimizer.clear_grad()
+
+        # Compare gradient for consistency
+        loss.backward()
+
+        for name, param in my_model.named_parameters():
+            name = name.replace("sea.", "")
+            var_name = paddle2tf(name, last_layer_id=len(self.n_neuron))
+            var_grad = vs_dict[var_name].gradient
+            param_grad = param.grad.cpu()
+            var_grad = paddle.to_tensor(var_grad).to(device="cpu")
+            assert np.allclose(var_grad, param_grad, rtol=rtol, atol=atol)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_nlist.py b/source/tests/pd/model/test_nlist.py
new file mode 100644
index 0000000000..0947355ac0
--- /dev/null
+++ b/source/tests/pd/model/test_nlist.py
@@ -0,0 +1,304 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    build_directional_neighbor_list,
+    build_multiple_neighbor_list,
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    get_multiple_nlist_key,
+)
+from deepmd.pd.utils.region import (
+    inter2phys,
+)
+
+dtype = paddle.float64
+
+
+class TestNeighList(unittest.TestCase):
+    def setUp(self):
+        self.nf = 3
+        self.nloc = 3
+        self.ns = 5 * 5 * 3
+        self.nall = self.ns * self.nloc
+        self.cell = paddle.to_tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, place=env.DEVICE
+        )
+        self.icoord = paddle.to_tensor(
+            [[0, 0, 0], [0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype, place=env.DEVICE
+        )
+        self.atype = paddle.to_tensor([-1, 0, 1], dtype=paddle.int64).to(
+            device=env.DEVICE
+        )
+        [self.cell, self.icoord, self.atype] = [
+            ii.unsqueeze(0) for ii in [self.cell, self.icoord, self.atype]
+        ]
+        self.coord = inter2phys(self.icoord, self.cell).reshape([-1, self.nloc * 3])
+        self.cell = self.cell.reshape([-1, 9])
+        [self.cell, self.coord, self.atype] = [
+            paddle.tile(ii, [self.nf, 1]) for ii in [self.cell, self.coord, self.atype]
+        ]
+        self.rcut = 1.01
+        self.prec = 1e-10
+        self.nsel = [10, 10]
+        # genrated by preprocess.build_neighbor_list
+        # ref_nlist, _, _ = legacy_build_neighbor_list(
+        #   2, ecoord[0], eatype[0],
+        #   self.rcut,
+        #   paddle.to_tensor([10,20], dtype=paddle.int64),
+        #   mapping[0], type_split=True, )
+        self.ref_nlist = paddle.to_tensor(
+            [
+                [-1] * sum(self.nsel),
+                [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1],
+                [1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1],
+            ],
+            place=env.DEVICE,
+        )
+
+    def test_build_notype(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        # test normal sel
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            sum(self.nsel),
+            distinguish_types=False,
+        )
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        np.testing.assert_allclose(
+            paddle.sort(nlist_loc, axis=-1).numpy(),
+            paddle.sort(self.ref_nlist, axis=-1).numpy(),
+        )
+        # test a very large sel
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            sum(self.nsel) + 300,  # +300, real nnei==224
+            distinguish_types=False,
+        )
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        np.testing.assert_allclose(
+            paddle.sort(nlist_loc, descending=True, axis=-1)[
+                :, : sum(self.nsel)
+            ].numpy(),
+            paddle.sort(self.ref_nlist, descending=True, axis=-1).numpy(),
+        )
+
+    def test_build_type(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            self.nsel,
+            distinguish_types=True,
+        )
+        np.testing.assert_allclose(nlist[0].numpy(), nlist[1].numpy())
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        for ii in range(2):
+            np.testing.assert_allclose(
+                paddle.sort(
+                    paddle.split(nlist_loc, (self.nsel), axis=-1)[ii], axis=-1
+                ).numpy(),
+                paddle.sort(
+                    paddle.split(self.ref_nlist, (self.nsel), axis=-1)[ii], axis=-1
+                ).numpy(),
+            )
+
+    def test_build_multiple_nlist(self):
+        rcuts = [1.01, 2.01]
+        nsels = [20, 80]
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, max(rcuts)
+        )
+        nlist1 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[1],
+            nsels[1] - 1,
+            distinguish_types=False,
+        )
+        pad = -1 * paddle.ones([self.nf, self.nloc, 1], dtype=nlist1.dtype).to(
+            device=nlist1.place
+        )
+        nlist2 = paddle.concat([nlist1, pad], axis=-1)
+        nlist0 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[0],
+            nsels[0],
+            distinguish_types=False,
+        )
+        nlists = build_multiple_neighbor_list(ecoord, nlist1, rcuts, nsels)
+        for dd in range(2):
+            self.assertEqual(
+                nlists[get_multiple_nlist_key(rcuts[dd], nsels[dd])].shape[-1],
+                nsels[dd],
+            )
+        np.testing.assert_allclose(
+            nlists[get_multiple_nlist_key(rcuts[0], nsels[0])].numpy(),
+            nlist0.numpy(),
+        )
+        np.testing.assert_allclose(
+            nlists[get_multiple_nlist_key(rcuts[1], nsels[1])].numpy(),
+            nlist2.numpy(),
+        )
+
+    def test_extend_coord(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        # expected ncopy x nloc
+        self.assertEqual(list(ecoord.shape), [self.nf, self.nall * 3])
+        self.assertEqual(list(eatype.shape), [self.nf, self.nall])
+        self.assertEqual(list(mapping.shape), [self.nf, self.nall])
+        # check the nloc part is identical with original coord
+        np.testing.assert_allclose(
+            ecoord[:, : self.nloc * 3].numpy(),
+            self.coord.numpy(),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        # check the shift vectors are aligned with grid
+        shift_vec = (
+            ecoord.reshape([-1, self.ns, self.nloc, 3])
+            - self.coord.reshape([-1, self.nloc, 3])[:, None, :, :]
+        )
+        shift_vec = shift_vec.reshape([-1, self.nall, 3])
+        # hack!!! assumes identical cell across frames
+        shift_vec = paddle.matmul(
+            shift_vec, paddle.linalg.inv(self.cell.reshape([self.nf, 3, 3])[0])
+        )
+        # nf x nall x 3
+        shift_vec = paddle.round(shift_vec)
+        # check: identical shift vecs
+        np.testing.assert_allclose(
+            shift_vec[0].numpy(), shift_vec[1].numpy(), rtol=self.prec, atol=self.prec
+        )
+        # check: shift idx aligned with grid
+        mm, cc = paddle.unique(shift_vec[0][:, 0], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm.numpy(),
+            paddle.to_tensor([-2, -1, 0, 1, 2], dtype=dtype)
+            .to(device=env.DEVICE)
+            .numpy(),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc.numpy(),
+            paddle.to_tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=paddle.int64, place=env.DEVICE
+            ).numpy(),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = paddle.unique(shift_vec[1][:, 1], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm.numpy(),
+            paddle.to_tensor([-2, -1, 0, 1, 2], dtype=dtype).to(device=env.DEVICE),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc.numpy(),
+            paddle.to_tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=paddle.int64, place=env.DEVICE
+            ),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = paddle.unique(shift_vec[1][:, 2], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm.numpy(),
+            paddle.to_tensor([-1, 0, 1], dtype=dtype).to(device=env.DEVICE).numpy(),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc.numpy(),
+            paddle.to_tensor(
+                [self.ns * self.nloc // 3] * 3, dtype=paddle.int64, place=env.DEVICE
+            ).numpy(),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+
+    def test_build_directional_nlist(self):
+        """Directional nlist is tested against the standard nlist implementation."""
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        for distinguish_types, mysel in zip([True, False], [sum(self.nsel), 300]):
+            # full neighbor list
+            nlist_full = build_neighbor_list(
+                ecoord,
+                eatype,
+                self.nloc,
+                self.rcut,
+                sum(self.nsel),
+                distinguish_types=distinguish_types,
+            )
+            # central as part of the system
+            nlist = build_directional_neighbor_list(
+                ecoord[:, 3:6],
+                eatype[:, 1:2],
+                paddle.concat(
+                    [
+                        ecoord[:, 0:3],
+                        paddle.zeros(
+                            [self.nf, 3],
+                            dtype=dtype,
+                        ).to(device=env.DEVICE),  # placeholder
+                        ecoord[:, 6:],
+                    ],
+                    axis=1,
+                ),
+                paddle.concat(
+                    [
+                        eatype[:, 0:1],
+                        -1
+                        * paddle.ones(
+                            [self.nf, 1],
+                            dtype="int64",
+                        ).to(device=env.DEVICE),  # placeholder
+                        eatype[:, 2:],
+                    ],
+                    axis=1,
+                ),
+                self.rcut,
+                mysel,
+                distinguish_types=distinguish_types,
+            )
+            np.testing.assert_allclose(nlist[0].numpy(), nlist[1].numpy())
+            np.testing.assert_allclose(nlist[0].numpy(), nlist[2].numpy())
+            np.testing.assert_allclose(
+                paddle.sort(nlist[0], descending=True, axis=-1)[
+                    :, : sum(self.nsel)
+                ].numpy(),
+                paddle.sort(nlist_full[0][1:2], descending=True, axis=-1).numpy(),
+            )
diff --git a/source/tests/pd/model/test_null_input.py b/source/tests/pd/model/test_null_input.py
new file mode 100644
index 0000000000..52601071b2
--- /dev/null
+++ b/source/tests/pd/model/test_null_input.py
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+    get_zbl_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_zbl,
+)
+
+dtype = paddle.float64
+
+
+class NullTest:
+    def test_nloc_1(
+        self,
+    ):
+        natoms = 1
+        generator = paddle.seed(GLOBAL_SEED)
+        # paddle.seed(1000)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        # large box to exclude images
+        cell = (cell + cell.T) + 100.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        atype = paddle.to_tensor([0], dtype=paddle.int32).to(device=env.DEVICE)
+        test_keys = ["energy", "force", "virial"]
+        result = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        ret0 = {key: result[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        expect_e_shape = [1]
+        expect_f = paddle.zeros([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        expect_v = paddle.zeros([9], dtype=dtype).to(device=env.DEVICE)
+        self.assertEqual(list(ret0["energy"].shape), expect_e_shape)
+        self.assertFalse(np.isnan(to_numpy_array(ret0["energy"])[0]))
+        np.testing.assert_allclose(
+            ret0["force"].numpy(), expect_f.numpy(), rtol=prec, atol=prec
+        )
+        if not hasattr(self, "test_virial") or self.test_virial:
+            np.testing.assert_allclose(
+                ret0["virial"].numpy(), expect_v.numpy(), rtol=prec, atol=prec
+            )
+
+    def test_nloc_2_far(
+        self,
+    ):
+        natoms = 2
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        # large box to exclude images
+        cell = (cell + cell.T) + 3000.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([1, 3], dtype=dtype).to(device=env.DEVICE)
+        # 2 far-away atoms
+        coord = paddle.concat([coord, coord + 100.0], axis=0)
+        atype = paddle.to_tensor([0, 2], dtype=paddle.int32).to(device=env.DEVICE)
+        test_keys = ["energy", "force", "virial"]
+        result = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        ret0 = {key: result[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        expect_e_shape = [1]
+        expect_f = paddle.zeros([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        expect_v = paddle.zeros([9], dtype=dtype).to(device=env.DEVICE)
+        self.assertEqual(list(ret0["energy"].shape), expect_e_shape)
+        self.assertFalse(np.isnan(to_numpy_array(ret0["energy"])[0]))
+        np.testing.assert_allclose(
+            ret0["force"].numpy(), expect_f.numpy(), rtol=prec, atol=prec
+        )
+        if not hasattr(self, "test_virial") or self.test_virial:
+            np.testing.assert_allclose(
+                ret0["virial"].numpy(), expect_v.numpy(), rtol=prec, atol=prec
+            )
+
+
+class TestEnergyModelSeA(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_zbl_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pd/model/test_pairtab_atomic_model.py b/source/tests/pd/model/test_pairtab_atomic_model.py
new file mode 100644
index 0000000000..7ae1ca7848
--- /dev/null
+++ b/source/tests/pd/model/test_pairtab_atomic_model.py
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.atomic_model import PairTabAtomicModel as DPPairTabAtomicModel
+from deepmd.pd.model.atomic_model import (
+    PairTabAtomicModel,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+
+class TestPairTab(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+
+        self.model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.02, sel=2, type_map=["H", "O"]
+        )
+
+        self.extended_coord = paddle.to_tensor(
+            [
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.02, 0.01, 0.01],
+                ],
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.05, 0.01, 0.01],
+                ],
+            ],
+            place=env.DEVICE,
+        )
+
+        # nframes=2, nall=4
+        self.extended_atype = paddle.to_tensor(
+            [[0, 1, 0, 1], [0, 0, 1, 1]], place=env.DEVICE
+        )
+
+        # nframes=2, nloc=2, nnei=2
+        self.nlist = paddle.to_tensor(
+            [[[1, 2], [0, 2]], [[1, 2], [0, 3]]], place=env.DEVICE
+        )
+
+    def test_without_mask(self):
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = paddle.to_tensor(
+            [[[1.2000], [1.3614]], [[1.2000], [0.4000]]],
+            dtype=paddle.float64,
+            place=env.DEVICE,
+        )
+
+        np.testing.assert_allclose(
+            result["energy"].numpy(), expected_result.numpy(), rtol=0.0001, atol=0.0001
+        )
+
+    @unittest.skip("Temporarily skip")
+    def test_with_mask(self):
+        self.nlist = paddle.to_tensor(
+            [[[1, -1], [0, 2]], [[1, 2], [0, 3]]], place=env.DEVICE
+        )
+
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = paddle.to_tensor(
+            [[[0.8000], [1.3614]], [[1.2000], [0.4000]]],
+            dtype=paddle.float64,
+            place=env.DEVICE,
+        )
+
+        np.testing.assert_allclose(
+            result["energy"].numpy(), expected_result.numpy(), rtol=0.0001, atol=0.0001
+        )
+
+    def test_jit(self):
+        model = paddle.jit.to_static(self.model)
+        # atomic model no more export methods
+        # self.assertEqual(model.get_rcut(), 0.02)
+        # self.assertEqual(model.get_type_map(), ["H", "O"])
+
+    def test_deserialize(self):
+        model1 = PairTabAtomicModel.deserialize(self.model.serialize())
+        np.testing.assert_allclose(self.model.tab_data.numpy(), model1.tab_data.numpy())
+        np.testing.assert_allclose(self.model.tab_info.numpy(), model1.tab_info.numpy())
+
+        self.nlist = paddle.to_tensor(
+            [[[1, -1], [0, 2]], [[1, 2], [0, 3]]], place=env.DEVICE
+        )
+        result = model1.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+
+        np.testing.assert_allclose(
+            result["energy"].numpy(),
+            expected_result["energy"].numpy(),
+            rtol=0.0001,
+            atol=0.0001,
+        )
+
+        # model1 = paddle.jit.to_static(model1)
+        # atomic model no more export methods
+        # self.assertEqual(model1.get_rcut(), 0.02)
+        # self.assertEqual(model1.get_type_map(), ["H", "O"])
+
+    def test_cross_deserialize(self):
+        model_dict = self.model.serialize()  # paddle model to dict
+        model1 = DPPairTabAtomicModel.deserialize(model_dict)  # dict to numpy model
+        np.testing.assert_allclose(self.model.tab_data, model1.tab_data)
+        np.testing.assert_allclose(self.model.tab_info, model1.tab_info)
+
+        self.nlist = np.array([[[1, -1], [0, 2]], [[1, 2], [0, 3]]])
+        result = model1.forward_atomic(
+            self.extended_coord.cpu().numpy(),
+            self.extended_atype.cpu().numpy(),
+            self.nlist,
+        )
+        expected_result = self.model.forward_atomic(
+            self.extended_coord,
+            self.extended_atype,
+            paddle.to_tensor(self.nlist).to(device=env.DEVICE),
+        )
+        np.testing.assert_allclose(
+            result["energy"], to_numpy_array(expected_result["energy"]), 0.0001, 0.0001
+        )
+
+
+class TestPairTabTwoAtoms(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_extrapolation_nonzero_rmax(self, mock_loadtxt) -> None:
+        """Scenarios to test.
+
+        rcut < rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut == rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut > rmax:
+            rr < rmax: use table values, or interpolate.
+            rr == rmax: use table values, or interpolate.
+            rmax < rr < rcut: extrapolate
+            rr >= rcut: should be 0
+
+        """
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0],
+                [0.01, 0.8],
+                [0.015, 0.5],
+                [0.02, 0.25],
+            ]
+        )
+
+        # nframes=1, nall=2
+        extended_atype = paddle.to_tensor([[0, 0]]).to(device=env.DEVICE)
+
+        # nframes=1, nloc=2, nnei=1
+        nlist = paddle.to_tensor([[[1], [-1]]]).to(device=env.DEVICE)
+
+        results = []
+
+        for dist, rcut in zip(
+            [
+                0.01,
+                0.015,
+                0.020,
+                0.015,
+                0.02,
+                0.021,
+                0.015,
+                0.02,
+                0.021,
+                0.025,
+                0.026,
+                0.025,
+                0.025,
+                0.0216161,
+            ],
+            [
+                0.015,
+                0.015,
+                0.015,
+                0.02,
+                0.02,
+                0.02,
+                0.022,
+                0.022,
+                0.022,
+                0.025,
+                0.025,
+                0.03,
+                0.035,
+                0.025,
+            ],
+        ):
+            extended_coord = paddle.to_tensor(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ],
+                place=env.DEVICE,
+            )
+
+            model = PairTabAtomicModel(
+                tab_file=file_path, rcut=rcut, sel=2, type_map=["H"]
+            )
+            results.append(
+                model.forward_atomic(extended_coord, extended_atype, nlist)["energy"]
+            )
+
+        expected_result = paddle.stack(
+            [
+                paddle.to_tensor(
+                    [
+                        [
+                            [0.4, 0],
+                            [0.0, 0],
+                            [0.0, 0],
+                            [0.25, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.25, 0],
+                            [0.125, 0],
+                            [0.0922, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.0923, 0],
+                            [0.0713, 0],
+                        ]
+                    ],
+                    dtype=paddle.float64,
+                    place=env.DEVICE,
+                )
+            ]
+        ).reshape([14, 2])
+        results = paddle.stack(results).reshape([14, 2])
+
+        np.testing.assert_allclose(results, expected_result, rtol=0.0001, atol=0.0001)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/pd/model/test_permutation.py b/source/tests/pd/model/test_permutation.py
new file mode 100644
index 0000000000..ffb8f96c93
--- /dev/null
+++ b/source/tests/pd/model/test_permutation.py
@@ -0,0 +1,481 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import os
+import unittest
+
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+dtype = paddle.float64
+import numpy as np
+
+model_se_e2_a = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+}
+
+model_dos = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+        "type": "dos",
+        "numb_dos": 250,
+    },
+    "data_stat_nbatch": 20,
+}
+
+model_zbl = {
+    "type_map": ["O", "H", "B"],
+    "use_srtab": f"{CUR_DIR}/water/data/zbl_tab_potential/H2O_tab_potential.txt",
+    "smin_alpha": 0.1,
+    "sw_rmin": 0.2,
+    "sw_rmax": 4.0,
+    "descriptor": {
+        "type": "se_atten",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [25, 50, 100],
+        "axis_neuron": 16,
+        "attn": 64,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "activation_function": "tanh",
+        "scaling_factor": 1.0,
+        "normalize": False,
+        "temperature": 1.0,
+        "set_davg_zero": True,
+        "type_one_side": True,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+}
+
+
+model_spin = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+    "spin": {
+        "use_spin": [True, False, False],
+        "virtual_scale": [0.3140],
+        "_comment": " that's all",
+    },
+}
+
+model_dpa2 = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "dpa2",
+        "repinit": {
+            "rcut": 6.0,
+            "rcut_smth": 2.0,
+            "nsel": 100,
+            "neuron": [2, 4, 8],
+            "axis_neuron": 4,
+            "activation_function": "tanh",
+        },
+        "repformer": {
+            "rcut": 4.0,
+            "rcut_smth": 0.5,
+            "nsel": 40,
+            "nlayers": 12,
+            "g1_dim": 8,
+            "g2_dim": 5,
+            "attn2_hidden": 3,
+            "attn2_nhead": 1,
+            "attn1_hidden": 5,
+            "attn1_nhead": 1,
+            "axis_neuron": 4,
+            "update_h2": False,
+            "update_g1_has_conv": True,
+            "update_g1_has_grrg": True,
+            "update_g1_has_drrd": True,
+            "update_g1_has_attn": True,
+            "update_g2_has_g1g1": True,
+            "update_g2_has_attn": True,
+            "attn2_has_gate": True,
+        },
+        "seed": 1,
+        "add_tebd_to_repinit_out": False,
+    },
+    "fitting_net": {
+        "neuron": [24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+}
+
+model_dpa2tebd = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "dpa2",
+        "repinit": {
+            "rcut": 6.0,
+            "rcut_smth": 0.5,
+            "nsel": 100,
+            "neuron": [2, 4, 8],
+            "axis_neuron": 4,
+            "activation_function": "tanh",
+            "three_body_sel": 40,
+            "three_body_rcut": 4.0,
+            "three_body_rcut_smth": 3.5,
+            "use_three_body": True,
+        },
+        "repformer": {
+            "rcut": 4.0,
+            "rcut_smth": 0.5,
+            "nsel": 40,
+            "nlayers": 6,
+            "g1_dim": 8,
+            "g2_dim": 5,
+            "attn2_hidden": 3,
+            "attn2_nhead": 1,
+            "attn1_hidden": 5,
+            "attn1_nhead": 1,
+            "axis_neuron": 4,
+            "update_h2": False,
+            "update_g1_has_conv": True,
+            "update_g1_has_grrg": True,
+            "update_g1_has_drrd": True,
+            "update_g1_has_attn": False,
+            "update_g2_has_g1g1": False,
+            "update_g2_has_attn": True,
+            "update_style": "res_residual",
+            "update_residual": 0.01,
+            "update_residual_init": "norm",
+            "attn2_has_gate": True,
+            "use_sqrt_nnei": True,
+            "g1_out_conv": True,
+            "g1_out_mlp": True,
+        },
+        "seed": 1,
+        "add_tebd_to_repinit_out": False,
+    },
+    "fitting_net": {
+        "neuron": [24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+}
+
+model_dpa1 = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_atten",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [25, 50, 100],
+        "axis_neuron": 16,
+        "attn": 64,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "activation_function": "tanh",
+        "scaling_factor": 1.0,
+        "normalize": False,
+        "temperature": 1.0,
+        "set_davg_zero": True,
+        "type_one_side": True,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+}
+
+
+model_hybrid = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "hybrid",
+        "list": [
+            {
+                "type": "se_atten",
+                "sel": 120,
+                "rcut_smth": 0.5,
+                "rcut": 6.0,
+                "neuron": [25, 50, 100],
+                "axis_neuron": 16,
+                "attn": 128,
+                "attn_layer": 0,
+                "attn_dotr": True,
+                "attn_mask": False,
+                "activation_function": "tanh",
+                "scaling_factor": 1.0,
+                "normalize": True,
+                "temperature": 1.0,
+                "seed": 1,
+            },
+            {
+                "type": "dpa2",
+                "repinit": {
+                    "rcut": 6.0,
+                    "rcut_smth": 2.0,
+                    "nsel": 30,
+                    "neuron": [2, 4, 8],
+                    "axis_neuron": 4,
+                    "activation_function": "tanh",
+                },
+                "repformer": {
+                    "rcut": 4.0,
+                    "rcut_smth": 0.5,
+                    "nsel": 10,
+                    "nlayers": 12,
+                    "g1_dim": 8,
+                    "g2_dim": 5,
+                    "attn2_hidden": 3,
+                    "attn2_nhead": 1,
+                    "attn1_hidden": 5,
+                    "attn1_nhead": 1,
+                    "axis_neuron": 4,
+                    "update_h2": False,
+                    "update_g1_has_conv": True,
+                    "update_g1_has_grrg": True,
+                    "update_g1_has_drrd": True,
+                    "update_g1_has_attn": True,
+                    "update_g2_has_g1g1": True,
+                    "update_g2_has_attn": True,
+                    "attn2_has_gate": True,
+                },
+                "seed": 1,
+                "add_tebd_to_repinit_out": False,
+            },
+        ],
+    },
+    "fitting_net": {
+        "neuron": [240, 240, 240],
+        "resnet_dt": True,
+        "seed": 1,
+        "_comment": " that's all",
+    },
+    "_comment": " that's all",
+}
+
+model_property = {
+    "type_map": ["H", "C", "N", "O"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [3, 3, 3, 3],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "type": "property",
+        "task_dim": 3,
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "bias_method": "normal",
+        "intensive": True,
+        "seed": 1,
+    },
+}
+
+
+class PermutationTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3)
+        coord = paddle.rand([natoms, 3], dtype=dtype)
+        spin = paddle.rand([natoms, 3], dtype=dtype)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32)
+        idx_perm = [1, 0, 4, 3, 2]
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord[idx_perm].unsqueeze(0),
+            cell.unsqueeze(0),
+            atype[idx_perm],
+            spins=spin[idx_perm].unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        for key in test_keys:
+            if key in ["energy"]:
+                np.testing.assert_allclose(
+                    ret0[key].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                )
+            elif key in ["force", "force_mag"]:
+                np.testing.assert_allclose(
+                    ret0[key][idx_perm].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    np.testing.assert_allclose(
+                        ret0[key], ret1[key], rtol=prec, atol=prec
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+# class TestEnergyFoo(unittest.TestCase):
+#   def test(self):
+#     model_params = model_dpau
+#     self.model = EnergyModelDPAUni(model_params).to(env.DEVICE)
+
+#     natoms = 5
+#     cell = paddle.rand([3, 3], dtype=dtype)
+#     cell = (cell + cell.T) + 5. * paddle.eye(3)
+#     coord = paddle.rand([natoms, 3], dtype=dtype)
+#     coord = paddle.matmul(coord, cell)
+#     atype = paddle.to_tensor([0, 0, 0, 1, 1])
+#     idx_perm = [1, 0, 4, 3, 2]
+#     ret0 = infer_model(self.model, coord, cell, atype, type_split=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_permutation_denoise.py b/source/tests/pd/model/test_permutation_denoise.py
new file mode 100644
index 0000000000..0f3dc9e871
--- /dev/null
+++ b/source/tests/pd/model/test_permutation_denoise.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (  # model_dpau,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+)
+
+dtype = paddle.float64
+
+model_dpa1 = copy.deepcopy(model_dpa1)
+model_dpa2 = copy.deepcopy(model_dpa2)
+model_hybrid = copy.deepcopy(model_hybrid)
+model_dpa1["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_dpa1.pop("fitting_net")
+model_dpa2["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_dpa2.pop("fitting_net")
+model_hybrid["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_hybrid.pop("fitting_net")
+
+
+class PermutationDenoiseTest:
+    def test(
+        self,
+    ):
+        generator = paddle.seed(GLOBAL_SEED)
+        natoms = 5
+        cell = paddle.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        updated_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": updated_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        updated_c1, logits1 = eval_model(
+            self.model,
+            coord[idx_perm].unsqueeze(0),
+            cell.unsqueeze(0),
+            atype[idx_perm],
+            denoise=True,
+        )
+        ret1 = {"updated_coord": updated_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        prec = 1e-10
+        np.testing.assert_allclose(
+            ret0["updated_coord"][idx_perm].numpy(),
+            ret1["updated_coord"].numpy(),
+            rtol=prec,
+            atol=prec,
+        )
+        np.testing.assert_allclose(
+            ret0["logits"][idx_perm].numpy(),
+            ret1["logits"].numpy(),
+            rtol=prec,
+            atol=prec,
+        )
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, PermutationDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, PermutationDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(
+            model_params,
+        ).to(env.DEVICE)
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestDenoiseModelHybrid(unittest.TestCase, TestPermutationDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_polar_atomic_model_stat.py b/source/tests/pd/model/test_polar_atomic_model_stat.py
new file mode 100644
index 0000000000..bc086ea0c0
--- /dev/null
+++ b/source/tests/pd/model/test_polar_atomic_model_stat.py
@@ -0,0 +1,293 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import h5py
+import numpy as np
+import paddle
+
+from deepmd.pd.model.atomic_model import (
+    BaseAtomicModel,
+    DPPolarAtomicModel,
+)
+from deepmd.pd.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pd.model.task.polarizability import (
+    PolarFittingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class FooFitting(PolarFittingNet):
+    def forward(
+        self,
+        descriptor: paddle.Tensor,
+        atype: paddle.Tensor,
+        gr: Optional[paddle.Tensor] = None,
+        g2: Optional[paddle.Tensor] = None,
+        h2: Optional[paddle.Tensor] = None,
+        fparam: Optional[paddle.Tensor] = None,
+        aparam: Optional[paddle.Tensor] = None,
+    ):
+        nf, nloc, _ = descriptor.shape
+        ret = {}
+        ret["polarizability"] = (
+            paddle.to_tensor(
+                [
+                    [
+                        [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                        [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
+                        [[3.0, 3.0, 3.0], [3.0, 3.0, 3.0], [6.0, 6.0, 6.0]],
+                    ],
+                    [
+                        [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0], [4.0, 4.0, 4.0]],
+                        [[4.0, 4.0, 4.0], [5.0, 5.0, 5.0], [6.0, 6.0, 6.0]],
+                        [[6.0, 6.0, 6.0], [4.0, 4.0, 4.0], [2.0, 2.0, 2.0]],
+                    ],
+                ]
+            )
+            .reshape([nf, nloc, *self.output_def()["polarizability"].shape])
+            .to(env.GLOBAL_PD_FLOAT_PRECISION)
+            .to(env.DEVICE)
+        )
+
+        return ret
+
+
+class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.merged_output_stat = [
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5, 6
+                "atom_polarizability": to_paddle_tensor(
+                    np.array(
+                        [
+                            [
+                                [[5.0, 5.0, 5.0], [5.0, 5.0, 5.0], [5.0, 5.0, 5.0]],
+                                [[5.0, 5.0, 5.0], [5.0, 5.0, 5.0], [5.0, 5.0, 5.0]],
+                                [[5.0, 5.0, 5.0], [5.0, 5.0, 5.0], [5.0, 5.0, 5.0]],
+                            ],
+                            [
+                                [[5.0, 5.0, 5.0], [5.0, 5.0, 5.0], [5.0, 5.0, 5.0]],
+                                [[6.0, 6.0, 6.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0]],
+                                [[7.0, 7.0, 7.0], [7.0, 7.0, 7.0], [7.0, 7.0, 7.0]],
+                            ],
+                        ]
+                    ).reshape(2, 3, 3, 3)
+                ),
+                "find_atom_polarizability": np.float32(1.0),
+            },
+            {
+                "coord": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "atype": to_paddle_tensor(
+                    np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32)
+                ),
+                "atype_ext": to_paddle_tensor(
+                    np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32)
+                ),
+                "box": to_paddle_tensor(np.zeros([2, 3, 3])),
+                "natoms": to_paddle_tensor(
+                    np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32)
+                ),
+                # bias of foo: 5, 6 from atomic label.
+                "polarizability": to_paddle_tensor(
+                    np.array(
+                        [
+                            [[5.0, 5.0, 5.0], [5.0, 5.0, 5.0], [5.0, 5.0, 5.0]],
+                            [[7.0, 7.0, 7.0], [7.0, 7.0, 7.0], [7.0, 7.0, 7.0]],
+                        ]
+                    ).reshape(2, 3, 3)
+                ),
+                "find_polarizability": np.float32(1.0),
+            },
+        ]
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def test_output_stat(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            self.nt,
+        ).to(env.DEVICE)
+        ft = FooFitting(self.nt, 1, 1).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPPolarAtomicModel(
+            ds,
+            ft,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        args = [
+            to_paddle_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        # nf x nloc
+        at = self.atype_ext[:, :nloc]
+
+        def cvt_ret(x):
+            return {kk: to_numpy_array(vv) for kk, vv in x.items()}
+
+        # 1. test run without bias
+        # nf x na x odim
+        ret0 = md0.forward_common_atomic(*args)
+        ret0 = cvt_ret(ret0)
+        expected_ret0 = {}
+        expected_ret0["polarizability"] = np.array(
+            [
+                [
+                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                    [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
+                    [[3.0, 3.0, 3.0], [3.0, 3.0, 3.0], [6.0, 6.0, 6.0]],
+                ],
+                [
+                    [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0], [4.0, 4.0, 4.0]],
+                    [[4.0, 4.0, 4.0], [5.0, 5.0, 5.0], [6.0, 6.0, 6.0]],
+                    [[6.0, 6.0, 6.0], [4.0, 4.0, 4.0], [2.0, 2.0, 2.0]],
+                ],
+            ]
+        ).reshape([nf, nloc, *md0.fitting_output_def()["polarizability"].shape])
+
+        np.testing.assert_almost_equal(
+            ret0["polarizability"], expected_ret0["polarizability"]
+        )
+
+        # 2. test bias is applied
+        md0.compute_or_load_out_stat(
+            self.merged_output_stat, stat_file_path=self.stat_file_path
+        )
+        ret1 = md0.forward_common_atomic(*args)
+        ret1 = cvt_ret(ret1)
+        expected_std = np.zeros(
+            (1, 2, 9), dtype=np.float64
+        )  # 1 keys, 2 atypes, 9 max dims.
+        expected_std[:, 1, :] = np.ones(9, dtype=np.float64) * 0.8164966  # updating std
+        # nt x odim (dia)
+        diagnoal_bias = np.array(
+            [
+                [[5.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 5.0]],
+                [[6.0, 0.0, 0.0], [0.0, 6.0, 0.0], [0.0, 0.0, 6.0]],
+            ]
+        ).reshape(2, 3, 3)
+        expected_ret1 = {}
+        expected_ret1["polarizability"] = ret0["polarizability"] + diagnoal_bias[at]
+        np.testing.assert_almost_equal(
+            ret1["polarizability"], expected_ret1["polarizability"]
+        )
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
+
+        # 3. test bias load from file
+        def raise_error():
+            raise RuntimeError
+
+        md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
+        ret2 = md0.forward_common_atomic(*args)
+        ret2 = cvt_ret(ret2)
+        np.testing.assert_almost_equal(ret1["polarizability"], ret2["polarizability"])
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
+
+        # 4. test change bias
+        BaseAtomicModel.change_out_bias(
+            md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic"
+        )
+        args = [
+            to_paddle_tensor(ii)
+            for ii in [
+                self.coord_ext,
+                to_numpy_array(self.merged_output_stat[0]["atype_ext"]),
+                self.nlist,
+            ]
+        ]
+        ret3 = md0.forward_common_atomic(*args)
+        ret3 = cvt_ret(ret3)
+
+        expected_ret3 = {}
+        expected_std = np.array(
+            [
+                [
+                    [
+                        1.4142136,
+                        1.4142136,
+                        1.4142136,
+                        1.2472191,
+                        1.2472191,
+                        1.2472191,
+                        1.2472191,
+                        1.2472191,
+                        1.2472191,
+                    ],
+                    [
+                        0.4714045,
+                        0.4714045,
+                        0.4714045,
+                        0.8164966,
+                        0.8164966,
+                        0.8164966,
+                        2.6246693,
+                        2.6246693,
+                        2.6246693,
+                    ],
+                ]
+            ]
+        )
+        # new bias [[[3.0000, -, -, -, 2.6667, -, -, -, 2.3333],
+        # [1.6667, -, -, -, 2.0000, -, -, -, 1.3333]]]
+        # which yields [2.667, 1.667]
+        expected_ret3["polarizability"] = np.array(
+            [
+                [
+                    [[3.6667, 1.0, 1.0], [1.0, 3.6667, 1.0], [1.0, 1.0, 3.6667]],
+                    [[3.6667, 1.0, 1.0], [2.0, 4.6667, 2.0], [3.0, 3.0, 5.6667]],
+                    [[4.6667, 3.0, 3.0], [3.0, 4.6667, 3.0], [6.0, 6.0, 7.6667]],
+                ],
+                [
+                    [[6.6667, 4.0, 4.0], [4.0, 6.6667, 4.0], [4.0, 4.0, 6.6667]],
+                    [[5.6667, 4.0, 4.0], [5.0, 6.6667, 5.0], [6.0, 6.0, 7.6667]],
+                    [[7.6667, 6.0, 6.0], [4.0, 5.6667, 4.0], [2.0, 2.0, 3.6667]],
+                ],
+            ]
+        ).reshape(2, 3, 3, 3)
+        np.testing.assert_almost_equal(
+            ret3["polarizability"], expected_ret3["polarizability"], decimal=4
+        )
+        np.testing.assert_almost_equal(to_numpy_array(md0.out_std), expected_std)
diff --git a/source/tests/pd/model/test_polarizability_fitting.py b/source/tests/pd/model/test_polarizability_fitting.py
new file mode 100644
index 0000000000..7280fbd1b0
--- /dev/null
+++ b/source/tests/pd/model/test_polarizability_fitting.py
@@ -0,0 +1,384 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import os
+import unittest
+
+import numpy as np
+import paddle
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.dpmodel.fitting import PolarFitting as DPPolarFitting
+from deepmd.infer.deep_polar import (
+    DeepPolar,
+)
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.model.model.polar_model import (
+    PolarModel,
+)
+from deepmd.pd.model.task.polarizability import (
+    PolarFittingNet,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestPolarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.rng = np.random.default_rng(GLOBAL_SEED)
+        self.nf, self.nloc, _ = self.nlist.shape
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.scale = self.rng.uniform(0, 1, self.nt).tolist()
+
+    def test_consistency(
+        self,
+    ):
+        rd0, gr, _, _, _ = self.dd0(
+            paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+            paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+            paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+        )
+        atype = paddle.to_tensor(
+            self.atype_ext[:, : self.nloc], dtype="int64", place=env.DEVICE
+        )
+
+        for nfp, nap, fit_diag, scale in itertools.product(
+            [0, 3],
+            [0, 4],
+            [True, False],
+            [None, self.scale],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=self.dd0.mixed_types(),
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            ft1 = DPPolarFitting.deserialize(ft0.serialize())
+            ft2 = PolarFittingNet.deserialize(ft0.serialize())
+            ft3 = DPPolarFitting.deserialize(ft1.serialize())
+
+            if nfp > 0:
+                ifp = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype, place=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    place=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret3 = ft3(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polarizability"]),
+                ret1["polarizability"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polarizability"]),
+                to_numpy_array(ret2["polarizability"]),
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polarizability"]),
+                ret3["polarizability"],
+            )
+
+    def test_jit(
+        self,
+    ):
+        for mixed_types, nfp, nap, fit_diag in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [True, False],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                fit_diag=fit_diag,
+            ).to(env.DEVICE)
+            paddle.jit.to_static(ft0)
+
+
+class TestEquivalence(unittest.TestCase):
+    def setUp(self) -> None:
+        self.natoms = 5
+        self.rcut = 4
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.nt = 3
+        self.rng = np.random.default_rng(GLOBAL_SEED)
+        self.coord = 2 * paddle.rand([self.natoms, 3], dtype=dtype).to(
+            device=env.DEVICE
+        )
+        self.shift = paddle.to_tensor([4, 4, 4], dtype=dtype).to(device=env.DEVICE)
+        self.atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32).to(
+            device=env.DEVICE
+        )
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        self.cell = (self.cell + self.cell.T) + 5.0 * paddle.eye(3).to(
+            device=env.DEVICE
+        )
+        self.scale = self.rng.uniform(0, 1, self.nt).tolist()
+
+    def test_rot(self):
+        atype = self.atype.reshape([1, 5])
+        rmat = paddle.to_tensor(special_ortho_group.rvs(3), dtype=dtype).to(
+            device=env.DEVICE
+        )
+        coord_rot = paddle.matmul(self.coord, rmat)
+        # use larger cell to rotate only coord and shift to the center of cell
+        cell_rot = 10.0 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE)
+
+        for nfp, nap, fit_diag, scale in itertools.product(
+            [0, 3],
+            [0, 4],
+            [True, False],
+            [None, self.scale],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,  # dim_descrpt
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=self.dd0.mixed_types(),
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            if nfp > 0:
+                ifp = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype, place=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = paddle.to_tensor(
+                    self.rng.normal(size=(self.nf, self.natoms, nap)),
+                    dtype=dtype,
+                    place=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            res = []
+            for xyz in [self.coord, coord_rot]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz + self.shift,
+                    atype,
+                    self.rcut,
+                    self.sel,
+                    self.dd0.mixed_types(),
+                    box=cell_rot,
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, atype, gr0, fparam=ifp, aparam=iap)
+                res.append(ret0["polarizability"])
+            np.testing.assert_allclose(
+                to_numpy_array(res[1]),
+                to_numpy_array(
+                    paddle.matmul(
+                        rmat.T,
+                        paddle.matmul(res[0], rmat),
+                    )
+                ),
+            )
+
+    def test_permu(self):
+        coord = paddle.matmul(self.coord, self.cell)
+        for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=0,
+                numb_aparam=0,
+                mixed_types=self.dd0.mixed_types(),
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            res = []
+            for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
+                atype = self.atype[idx_perm].reshape([1, 5])
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    coord[idx_perm],
+                    atype,
+                    self.rcut,
+                    self.sel,
+                    self.dd0.mixed_types(),
+                    box=self.cell,
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, atype, gr0, fparam=None, aparam=None)
+                res.append(ret0["polarizability"])
+
+            np.testing.assert_allclose(
+                to_numpy_array(res[0][:, idx_perm]),
+                to_numpy_array(res[1]),
+            )
+
+    def test_trans(self):
+        atype = self.atype.reshape([1, 5])
+        coord_s = paddle.matmul(
+            paddle.remainder(
+                paddle.matmul(self.coord + self.shift, paddle.linalg.inv(self.cell)),
+                paddle.full([], 1.0),
+            ),
+            self.cell,
+        )
+        for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=0,
+                numb_aparam=0,
+                mixed_types=self.dd0.mixed_types(),
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            res = []
+            for xyz in [self.coord, coord_s]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz,
+                    atype,
+                    self.rcut,
+                    self.sel,
+                    self.dd0.mixed_types(),
+                    box=self.cell,
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, atype, gr0, fparam=0, aparam=0)
+                res.append(ret0["polarizability"])
+
+            np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
+
+
+class TestPolarModel(unittest.TestCase):
+    def setUp(self):
+        self.natoms = 5
+        self.rcut = 4.0
+        self.nt = 3
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.coord = 2 * paddle.rand([self.natoms, 3], dtype=dtype).to(device="cpu")
+        cell = paddle.rand([3, 3], dtype=dtype).to(device="cpu")
+        self.cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device="cpu")
+        self.atype = paddle.to_tensor([0, 0, 0, 1, 1], place="cpu")
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.ft0 = PolarFittingNet(
+            self.nt,
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=self.dd0.mixed_types(),
+        ).to(env.DEVICE)
+        self.type_mapping = ["O", "H", "B"]
+        self.model = PolarModel(self.dd0, self.ft0, self.type_mapping)
+        self.file_path = "model_output.pd"
+
+    @unittest.skip(
+        "Paddle do not support finetune in frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_deepdipole_infer(self):
+        atype = self.atype.reshape([self.nf, self.natoms])
+        coord = self.coord.reshape([1, 5, 3])
+        cell = self.cell.reshape([1, 9])
+        jit_md = paddle.jit.to_static(self.model)
+        paddle.jit.save(jit_md, self.file_path)
+        load_md = DeepPolar(self.file_path)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=False)
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_region.py b/source/tests/pd/model/test_region.py
new file mode 100644
index 0000000000..7878e73cab
--- /dev/null
+++ b/source/tests/pd/model/test_region.py
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.preprocess import (
+    Region3D,
+)
+from deepmd.pd.utils.region import (
+    inter2phys,
+    to_face_distance,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+dtype = paddle.float64
+
+
+class TestRegion(unittest.TestCase):
+    def setUp(self):
+        self.cell = paddle.to_tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, place="cpu"
+        )
+        self.cell = self.cell.unsqueeze(0).unsqueeze(0)
+        self.cell = paddle.tile(self.cell, [4, 5, 1, 1])
+        self.prec = 1e-8
+
+    def test_inter_to_phys(self):
+        generator = paddle.seed(GLOBAL_SEED)
+        inter = paddle.rand([4, 5, 3, 3], dtype=dtype).to(device="cpu")
+        phys = inter2phys(inter, self.cell)
+        for ii in range(4):
+            for jj in range(5):
+                expected_phys = paddle.matmul(inter[ii, jj], self.cell[ii, jj])
+                np.testing.assert_allclose(
+                    phys[ii, jj].numpy(),
+                    expected_phys.numpy(),
+                    rtol=self.prec,
+                    atol=self.prec,
+                )
+
+    def test_to_face_dist(self):
+        cell0 = self.cell[0][0].numpy()
+        vol = np.linalg.det(cell0)
+        # area of surfaces xy, xz, yz
+        sxy = np.linalg.norm(np.cross(cell0[0], cell0[1]))
+        sxz = np.linalg.norm(np.cross(cell0[0], cell0[2]))
+        syz = np.linalg.norm(np.cross(cell0[1], cell0[2]))
+        # vol / area gives distance
+        dz = vol / sxy
+        dy = vol / sxz
+        dx = vol / syz
+        dists = to_face_distance(self.cell)
+        expected = paddle.to_tensor([dx, dy, dz], dtype=dists.dtype).to(device="cpu")
+        for ii in range(4):
+            for jj in range(5):
+                np.testing.assert_allclose(
+                    dists[ii][jj].numpy(),
+                    expected.numpy(),
+                    rtol=self.prec,
+                    atol=self.prec,
+                )
+
+
+class TestLegacyRegion(unittest.TestCase):
+    def setUp(self):
+        self.cell = paddle.to_tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, place=env.DEVICE
+        )
+        self.prec = 1e-6
+
+    def test_inter_to_phys(self):
+        generator = paddle.seed(GLOBAL_SEED)
+        inter = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        reg = Region3D(self.cell)
+        phys = reg.inter2phys(inter)
+        expected_phys = paddle.matmul(inter, self.cell)
+        np.testing.assert_allclose(
+            phys.numpy(), expected_phys.numpy(), rtol=self.prec, atol=self.prec
+        )
+
+    def test_inter_to_inter(self):
+        generator = paddle.seed(GLOBAL_SEED)
+        inter = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        reg = Region3D(self.cell)
+        new_inter = reg.phys2inter(reg.inter2phys(inter))
+        np.testing.assert_allclose(
+            inter.numpy(), new_inter.numpy(), rtol=self.prec, atol=self.prec
+        )
+
+    def test_to_face_dist(self):
+        pass
diff --git a/source/tests/pd/model/test_rot.py b/source/tests/pd/model/test_rot.py
new file mode 100644
index 0000000000..4e5f2b950f
--- /dev/null
+++ b/source/tests/pd/model/test_rot.py
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = paddle.float64
+
+
+class RotTest:
+    def test(
+        self,
+    ):
+        generator = paddle.seed(GLOBAL_SEED)
+        prec = 1e-9
+        natoms = 5
+        cell = 10.0 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE)
+        coord = 2 * paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        spin = 2 * paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        shift = paddle.to_tensor([4, 4, 4], dtype=dtype).to(device=env.DEVICE)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32).to(
+            device=env.DEVICE
+        )
+        from scipy.stats import (
+            special_ortho_group,
+        )
+
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag"]
+        rmat = paddle.to_tensor(special_ortho_group.rvs(3), dtype=dtype).to(
+            device=env.DEVICE
+        )
+
+        # rotate only coord and shift to the center of cell
+        coord_rot = paddle.matmul(coord, rmat)
+        spin_rot = paddle.matmul(spin, rmat)
+        result_0 = eval_model(
+            self.model,
+            (coord + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            (coord_rot + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin_rot.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        for key in test_keys:
+            if key in ["energy"]:
+                np.testing.assert_allclose(
+                    ret0[key].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                )
+            elif key in ["force", "force_mag"]:
+                np.testing.assert_allclose(
+                    paddle.matmul(ret0[key], rmat).numpy(),
+                    ret1[key].numpy(),
+                    rtol=prec,
+                    atol=prec,
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    np.testing.assert_allclose(
+                        paddle.matmul(
+                            rmat.T, paddle.matmul(ret0[key].reshape([3, 3]), rmat)
+                        ).numpy(),
+                        ret1[key].reshape([3, 3]).numpy(),
+                        rtol=prec,
+                        atol=prec,
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+        # rotate coord and cell
+        paddle.seed(0)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        spin = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32).to(
+            device=env.DEVICE
+        )
+        coord_rot = paddle.matmul(coord, rmat)
+        spin_rot = paddle.matmul(spin, rmat)
+        cell_rot = paddle.matmul(cell, rmat)
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord_rot.unsqueeze(0),
+            cell_rot.unsqueeze(0),
+            atype,
+            spins=spin_rot.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        for key in test_keys:
+            if key in ["energy"]:
+                np.testing.assert_allclose(
+                    ret0[key].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                )
+            elif key in ["force", "force_mag"]:
+                np.testing.assert_allclose(
+                    paddle.matmul(ret0[key], rmat).numpy(),
+                    ret1[key].numpy(),
+                    rtol=prec,
+                    atol=prec,
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    np.testing.assert_allclose(
+                        paddle.matmul(
+                            rmat.T, paddle.matmul(ret0[key].reshape([3, 3]), rmat)
+                        ).numpy(),
+                        ret1[key].reshape([3, 3]).numpy(),
+                        rtol=prec,
+                        atol=prec,
+                    )
+
+
+class TestEnergyModelSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_rot_denoise.py b/source/tests/pd/model/test_rot_denoise.py
new file mode 100644
index 0000000000..9526084efe
--- /dev/null
+++ b/source/tests/pd/model/test_rot_denoise.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation_denoise import (
+    model_dpa1,
+    model_dpa2,
+)
+
+dtype = paddle.float64
+
+
+class RotDenoiseTest:
+    def test(
+        self,
+    ):
+        generator = paddle.seed(GLOBAL_SEED)
+        prec = 1e-10
+        natoms = 5
+        cell = 10.0 * paddle.eye(3, dtype=dtype).to(env.DEVICE)
+        coord = 2 * paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        shift = paddle.to_tensor([4, 4, 4], dtype=dtype).to(env.DEVICE)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        from scipy.stats import (
+            special_ortho_group,
+        )
+
+        rmat = paddle.to_tensor(special_ortho_group.rvs(3), dtype=dtype).to(env.DEVICE)
+
+        # rotate only coord and shift to the center of cell
+        coord_rot = paddle.matmul(coord, rmat)
+        update_c0, logits0 = eval_model(
+            self.model,
+            (coord + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        update_c0 = update_c0 - (coord + shift).unsqueeze(0)
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model,
+            (coord_rot + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        update_c1 = update_c1 - (coord_rot + shift).unsqueeze(0)
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        np.testing.assert_allclose(
+            paddle.matmul(ret0["updated_coord"], rmat).numpy(),
+            ret1["updated_coord"].numpy(),
+            rtol=prec,
+            atol=prec,
+        )
+        np.testing.assert_allclose(
+            ret0["logits"].numpy(), ret1["logits"].numpy(), rtol=prec, atol=prec
+        )
+
+        # rotate coord and cell
+        paddle.seed(0)
+        cell = paddle.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        coord_rot = paddle.matmul(coord, rmat)
+        cell_rot = paddle.matmul(cell, rmat)
+        update_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model,
+            coord_rot.unsqueeze(0),
+            cell_rot.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        np.testing.assert_allclose(
+            ret0["logits"].numpy(), ret1["logits"].numpy(), rtol=prec, atol=prec
+        )
+        np.testing.assert_allclose(
+            paddle.matmul(ret0["updated_coord"], rmat).numpy(),
+            ret1["updated_coord"].numpy(),
+            rtol=prec,
+            atol=prec,
+        )
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, RotDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, RotDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestEnergyModelHybrid(unittest.TestCase, TestRotDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_rotation.py b/source/tests/pd/model/test_rotation.py
new file mode 100644
index 0000000000..94e3442631
--- /dev/null
+++ b/source/tests/pd/model/test_rotation.py
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+import numpy as np
+import paddle
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+
+class CheckSymmetry(DeepmdData):
+    def __init__(
+        self,
+        sys_path: str,
+        type_map: Optional[list[str]] = None,
+    ):
+        super().__init__(sys_path=sys_path, type_map=type_map)
+        self.add("energy", 1, atomic=False, must=False, high_prec=True)
+        self.add("force", 3, atomic=True, must=False, high_prec=False)
+        self.add("virial", 9, atomic=False, must=False, high_prec=False)
+
+    def get_rotation(self, index, rotation_matrix):
+        for i in range(
+            0, len(self.dirs) + 1
+        ):  # note: if different sets can be merged, prefix sum is unused to calculate
+            if index < self.prefix_sum[i]:
+                break
+        frames = self._load_set(self.dirs[i - 1])
+        frames["coord"] = np.dot(
+            rotation_matrix, frames["coord"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frames["box"] = np.dot(
+            rotation_matrix, frames["box"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frames["force"] = np.dot(
+            rotation_matrix, frames["force"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frame = self._get_subdata(frames, index - self.prefix_sum[i - 1])
+        frame = self.reformat_data_torch(frame)
+        return frame
+
+
+def get_data(batch):
+    inputs = {}
+    for key in ["coord", "atype", "box"]:
+        inputs[key] = paddle.to_tensor(batch[key]).to(device=env.DEVICE)
+        inputs[key] = inputs[key].unsqueeze(0).to(env.DEVICE)
+    return inputs
+
+
+class TestRotation(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.rotation = special_ortho_group.rvs(3)
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        self.get_dataset(0)
+        paddle.set_device(device)
+        self.get_model()
+
+    def get_model(self):
+        self.model = get_model(self.config["model"]).to(env.DEVICE)
+
+    def get_dataset(self, system_index=0, batch_index=0):
+        systems = self.config["training"]["training_data"]["systems"]
+        type_map = self.config["model"]["type_map"]
+        dpdatasystem = CheckSymmetry(sys_path=systems[system_index], type_map=type_map)
+        self.origin_batch = dpdatasystem.get_item_paddle(batch_index)
+        self.rotated_batch = dpdatasystem.get_rotation(batch_index, self.rotation)
+
+    def test_rotation(self):
+        result1 = self.model(**get_data(self.origin_batch))
+        result2 = self.model(**get_data(self.rotated_batch))
+        rotation = paddle.to_tensor(self.rotation).to(env.DEVICE)
+        np.testing.assert_allclose(result1["energy"].numpy(), result2["energy"].numpy())
+        if "force" in result1:
+            np.testing.assert_allclose(
+                result2["force"][0].numpy(),
+                paddle.matmul(rotation, result1["force"][0].T).T.numpy(),
+            )
+        if "virial" in result1:
+            np.testing.assert_allclose(
+                result2["virial"][0].view([3, 3]).numpy(),
+                paddle.matmul(
+                    paddle.matmul(rotation, result1["virial"][0].view([3, 3]).T),
+                    rotation.T,
+                ).numpy(),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_saveload_dpa1.py b/source/tests/pd/model/test_saveload_dpa1.py
new file mode 100644
index 0000000000..54a82e479a
--- /dev/null
+++ b/source/tests/pd/model/test_saveload_dpa1.py
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+from paddle.io import (
+    DataLoader,
+)
+
+from deepmd.pd.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.dataloader import (
+    BufferedIterator,
+    DpLoaderSet,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+
+
+def get_dataset(config):
+    model_config = config["model"]
+    rcut = model_config["descriptor"]["rcut"]
+    sel = model_config["descriptor"]["sel"]
+    systems = config["training"]["validation_data"]["systems"]
+    if isinstance(systems, str):
+        systems = expand_sys_str(systems)
+    batch_size = config["training"]["training_data"]["batch_size"]
+    type_map = model_config["type_map"]
+
+    dataset = DpLoaderSet(systems, batch_size, type_map)
+    data_stat_nbatch = model_config.get("data_stat_nbatch", 10)
+    sampled = make_stat_input(dataset.systems, dataset.dataloaders, data_stat_nbatch)
+    return dataset, sampled
+
+
+class TestSaveLoadDPA1(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as fin:
+            self.config = json.load(fin)
+        self.config["loss"]["starter_learning_rate"] = self.config["learning_rate"][
+            "start_lr"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.dataset, self.sampled = get_dataset(self.config)
+        self.training_dataloader = DataLoader(
+            self.dataset,
+            batch_sampler=paddle.io.BatchSampler(
+                sampler=paddle.io.RandomSampler(self.dataset),
+                drop_last=False,
+            ),
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            collate_fn=lambda x: x[0],
+        )
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        self.training_data = BufferedIterator(iter(self.training_dataloader))
+        paddle.set_device(device)
+        self.loss = EnergyStdLoss(**self.config["loss"])
+        self.cur_lr = 1
+        self.task_key = "Default"
+        self.input_dict, self.label_dict = self.get_data()
+        self.start_lr = self.config["learning_rate"]["start_lr"]
+
+    def get_model_result(self, read=False, model_file="tmp_model.pd"):
+        wrapper = self.create_wrapper(read)
+        optimizer = paddle.optimizer.Adam(
+            learning_rate=self.start_lr, parameters=wrapper.parameters()
+        )
+        optimizer.clear_grad()
+        if read:
+            wrapper.set_state_dict(paddle.load(model_file))
+            os.remove(model_file)
+        else:
+            paddle.save(wrapper.state_dict(), model_file)
+        result = wrapper(
+            **self.input_dict,
+            cur_lr=self.cur_lr,
+            label=self.label_dict,
+            task_key=self.task_key,
+        )[0]
+        return result
+
+    def create_wrapper(self, read: bool):
+        model_config = copy.deepcopy(self.config["model"])
+        model_config["resuming"] = read
+        model_config["stat_file_dir"] = "stat_files"
+        model_config["stat_file"] = "stat.hdf5"
+        model_config["stat_file_path"] = os.path.join(
+            model_config["stat_file_dir"], model_config["stat_file"]
+        )
+        model = get_model(model_config).to(env.DEVICE)
+        return ModelWrapper(model, self.loss)
+
+    def get_data(self):
+        try:
+            batch_data = next(iter(self.training_data))
+        except StopIteration:
+            # Refresh the status of the dataloader to start from a new epoch
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+            batch_data = next(iter(self.training_data))
+        input_dict = {}
+        for item in ["coord", "atype", "box"]:
+            if item in batch_data:
+                input_dict[item] = batch_data[item].to(env.DEVICE)
+            else:
+                input_dict[item] = None
+        label_dict = {}
+        for item in ["energy", "force", "virial"]:
+            if item in batch_data:
+                label_dict[item] = batch_data[item].to(env.DEVICE)
+        return input_dict, label_dict
+
+    def test_saveload(self):
+        result1 = self.get_model_result()
+        result2 = self.get_model_result(read=True)
+        for item in result1:
+            np.testing.assert_allclose(result1[item].numpy(), result2[item].numpy())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_saveload_se_e2_a.py b/source/tests/pd/model/test_saveload_se_e2_a.py
new file mode 100644
index 0000000000..c1c2ba2cdd
--- /dev/null
+++ b/source/tests/pd/model/test_saveload_se_e2_a.py
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+from paddle.io import (
+    DataLoader,
+)
+
+from deepmd.pd.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.dataloader import (
+    BufferedIterator,
+    DpLoaderSet,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+
+
+def get_dataset(config):
+    model_config = config["model"]
+    rcut = model_config["descriptor"]["rcut"]
+    sel = model_config["descriptor"]["sel"]
+    systems = config["training"]["validation_data"]["systems"]
+    if isinstance(systems, str):
+        systems = expand_sys_str(systems)
+    batch_size = config["training"]["training_data"]["batch_size"]
+    type_map = model_config["type_map"]
+
+    dataset = DpLoaderSet(systems, batch_size, type_map)
+    data_stat_nbatch = model_config.get("data_stat_nbatch", 10)
+    sampled = make_stat_input(dataset.systems, dataset.dataloaders, data_stat_nbatch)
+    return dataset, sampled
+
+
+class TestSaveLoadSeA(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_e2_a.json")
+        with open(input_json) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["loss"]["starter_learning_rate"] = self.config["learning_rate"][
+            "start_lr"
+        ]
+        self.dataset, self.sampled = get_dataset(self.config)
+        self.training_dataloader = DataLoader(
+            self.dataset,
+            batch_sampler=paddle.io.BatchSampler(
+                sampler=paddle.io.RandomSampler(self.dataset),
+                drop_last=False,
+            ),
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            collate_fn=lambda batch: batch[0],
+        )
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        self.training_data = BufferedIterator(iter(self.training_dataloader))
+        paddle.set_device(device)
+        self.loss = EnergyStdLoss(**self.config["loss"])
+        self.cur_lr = 1
+        self.task_key = "Default"
+        self.input_dict, self.label_dict = self.get_data()
+        self.start_lr = self.config["learning_rate"]["start_lr"]
+
+    def get_model_result(self, read=False, model_file="tmp_model.pd"):
+        wrapper = self.create_wrapper()
+        optimizer = paddle.optimizer.Adam(
+            learning_rate=self.start_lr, parameters=wrapper.parameters()
+        )
+        optimizer.clear_grad()
+        if read:
+            wrapper.set_state_dict(paddle.load(model_file))
+            os.remove(model_file)
+        else:
+            paddle.save(wrapper.state_dict(), model_file)
+        result = wrapper(
+            **self.input_dict,
+            cur_lr=self.cur_lr,
+            label=self.label_dict,
+            task_key=self.task_key,
+        )[0]
+        return result
+
+    def create_wrapper(self):
+        model_config = copy.deepcopy(self.config["model"])
+        model = get_model(model_config).to(env.DEVICE)
+        return ModelWrapper(model, self.loss)
+
+    def get_data(self):
+        try:
+            batch_data = next(iter(self.training_data))
+        except StopIteration:
+            # Refresh the status of the dataloader to start from a new epoch
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+            batch_data = next(iter(self.training_data))
+        input_dict = {}
+        for item in ["coord", "atype", "box"]:
+            if item in batch_data:
+                input_dict[item] = batch_data[item].to(env.DEVICE)
+            else:
+                input_dict[item] = None
+        label_dict = {}
+        for item in ["energy", "force", "virial"]:
+            if item in batch_data:
+                label_dict[item] = batch_data[item].to(env.DEVICE)
+        return input_dict, label_dict
+
+    def test_saveload(self):
+        result1 = self.get_model_result()
+        result2 = self.get_model_result(read=True)
+        for item in result1:
+            np.testing.assert_allclose(result1[item].numpy(), result2[item].numpy())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_se_atten_v2.py b/source/tests/pd/model/test_se_atten_v2.py
new file mode 100644
index 0000000000..3ab28eeacd
--- /dev/null
+++ b/source/tests/pd/model/test_se_atten_v2.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor.se_atten_v2 import DescrptSeAttenV2 as DPDescrptSeAttenV2
+from deepmd.pd.model.descriptor.se_atten_v2 import (
+    DescrptSeAttenV2,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDescrptSeAttenV2(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, to, prec, ect in itertools.product(
+            [False, True],  # resnet_dt
+            [False, True],  # type_one_side
+            [
+                "float64",
+            ],  # precision
+            [False, True],  # use_econf_tebd
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+
+            # dpa1 new impl
+            dd0 = DescrptSeAttenV2(
+                self.rcut,
+                self.rcut_smth,
+                self.sel_mix,
+                self.nt,
+                attn_layer=2,
+                precision=prec,
+                resnet_dt=idt,
+                type_one_side=to,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            dd0.se_atten.mean = paddle.to_tensor(davg, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.se_atten.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeAttenV2.deserialize(dd0.serialize())
+            rd1, _, _, _, _ = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeAttenV2.deserialize(dd0.serialize())
+            rd2, _, _, _, _ = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd2,
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng()
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, to, ect in itertools.product(
+            [
+                False,
+            ],  # resnet_dt
+            [
+                "float64",
+            ],  # precision
+            [
+                False,
+            ],  # type_one_side
+            [False, True],  # use_econf_tebd
+        ):
+            dtype = PRECISION_DICT[prec]
+            # dpa1 new impl
+            dd0 = DescrptSeAttenV2(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                self.nt,
+                precision=prec,
+                resnet_dt=idt,
+                type_one_side=to,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            )
+            dd0.se_atten.mean = paddle.to_tensor(davg, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.se_atten.dstd = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            _ = paddle.jit.to_static(dd0)
diff --git a/source/tests/pd/model/test_se_e2_a.py b/source/tests/pd/model/test_se_e2_a.py
new file mode 100644
index 0000000000..b1e6abe5ae
--- /dev/null
+++ b/source/tests/pd/model/test_se_e2_a.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.pd.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+# to be merged with the tf test case
+class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, em in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+            [[], [[0, 1]], [[1, 1]]],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                exclude_types=em,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+            dd0.sea.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.sea.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeA.deserialize(dd0.serialize())
+            rd1, gr1, _, _, sw1 = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]],
+                rd0.detach().cpu().numpy()[1],
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeA.deserialize(dd0.serialize())
+            rd2, gr2, _, _, sw2 = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            for aa, bb in zip([rd1, gr1, sw1], [rd2, gr2, sw2]):
+                np.testing.assert_allclose(
+                    aa.detach().cpu().numpy(),
+                    bb,
+                    rtol=rtol,
+                    atol=atol,
+                    err_msg=err_msg,
+                )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                seed=GLOBAL_SEED,
+            )
+            dd0.sea.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.sea.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            dd1 = DescrptSeA.deserialize(dd0.serialize())
+            model = paddle.jit.to_static(dd0)
+            model = paddle.jit.to_static(dd1)
diff --git a/source/tests/pd/model/test_se_t.py b/source/tests/pd/model/test_se_t.py
new file mode 100644
index 0000000000..8eb0db3f26
--- /dev/null
+++ b/source/tests/pd/model/test_se_t.py
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor import DescrptSeT as DPDescrptSeT
+from deepmd.pd.model.descriptor.se_t import (
+    DescrptSeT,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+# to be merged with the tf test case
+class TestDescrptSeT(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, em in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+            [
+                [],
+                [[0, 1]],
+                [[1, 1]],
+            ],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # pt impl
+            dd0 = DescrptSeT(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                exclude_types=em,
+            ).to(env.DEVICE)
+            dd0.seat.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.seat.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            rd0, _, _, _, sw0 = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeT.deserialize(dd0.serialize())
+            rd1, _, _, _, sw1 = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]],
+                rd0.detach().cpu().numpy()[1],
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeT.deserialize(dd0.serialize())
+            rd2, _, _, _, sw2 = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            for aa, bb in zip([rd1, sw1], [rd2, sw2]):
+                np.testing.assert_allclose(
+                    aa.detach().cpu().numpy(),
+                    bb,
+                    rtol=rtol,
+                    atol=atol,
+                    err_msg=err_msg,
+                )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # pt impl
+            dd0 = DescrptSeT(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+            )
+            dd0.seat.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.seat.dstd = paddle.to_tensor(dstd, dtype=dtype).to(device=env.DEVICE)
+            dd1 = DescrptSeT.deserialize(dd0.serialize())
+            model = paddle.jit.to_static(dd0)
+            model = paddle.jit.to_static(dd1)
diff --git a/source/tests/pd/model/test_smooth.py b/source/tests/pd/model/test_smooth.py
new file mode 100644
index 0000000000..796b15faf4
--- /dev/null
+++ b/source/tests/pd/model/test_smooth.py
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = paddle.float64
+
+
+class SmoothTest:
+    def test(
+        self,
+    ):
+        generator = paddle.seed(GLOBAL_SEED)
+        # displacement of atoms
+        epsilon = 1e-5 if self.epsilon is None else self.epsilon
+        # required prec. relative prec is not checked.
+        rprec = 0.0
+        aprec = 1e-5 if self.aprec is None else self.aprec
+
+        natoms = 10
+        cell = 8.6 * paddle.eye(3, dtype=dtype).to(device=env.DEVICE)
+        atype0 = paddle.arange(3, dtype=dtype).to(device=env.DEVICE)
+        atype1 = paddle.randint(0, 3, [natoms - 3]).to(
+            device=env.DEVICE, dtype=atype0.dtype
+        )
+        atype = paddle.concat([atype0, atype1]).reshape([natoms])
+        coord0 = (
+            paddle.to_tensor(
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                ],
+                dtype=dtype,
+            )
+            .reshape([-1, 3])
+            .to(device=env.DEVICE)
+        )
+        coord1 = paddle.rand(
+            [natoms - coord0.shape[0], 3],
+            dtype=dtype,
+        ).to(device=env.DEVICE)
+        coord1 = paddle.matmul(coord1, cell)
+        coord = paddle.concat([coord0, coord1], axis=0)
+        spin = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord0 = paddle.clone(coord)
+        coord1 = paddle.clone(coord)
+        coord1[1][0] += epsilon
+        coord2 = paddle.clone(coord)
+        coord2[2][1] += epsilon
+        coord3 = paddle.clone(coord)
+        coord3[1][0] += epsilon
+        coord3[2][1] += epsilon
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+
+        result_0 = eval_model(
+            self.model,
+            coord0.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord1.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        result_2 = eval_model(
+            self.model,
+            coord2.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret2 = {key: result_2[key].squeeze(0) for key in test_keys}
+        result_3 = eval_model(
+            self.model,
+            coord3.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret3 = {key: result_3[key].squeeze(0) for key in test_keys}
+
+        def compare(ret0, ret1):
+            for key in test_keys:
+                if key in ["energy"]:
+                    np.testing.assert_allclose(
+                        ret0[key].numpy(), ret1[key].numpy(), rtol=rprec, atol=aprec
+                    )
+                elif key in ["force", "force_mag"]:
+                    # plus 1. to avoid the divided-by-zero issue
+                    np.testing.assert_allclose(
+                        (1.0 + ret0[key]).numpy(),
+                        (1.0 + ret1[key]).numpy(),
+                        rtol=rprec,
+                        atol=aprec,
+                    )
+                elif key == "virial":
+                    if not hasattr(self, "test_virial") or self.test_virial:
+                        np.testing.assert_allclose(
+                            (1.0 + ret0[key]).numpy(),
+                            (1.0 + ret1[key]).numpy(),
+                            rtol=rprec,
+                            atol=aprec,
+                        )
+                else:
+                    raise RuntimeError(f"Unexpected test key {key}")
+
+        compare(ret0, ret1)
+        compare(ret1, ret2)
+        compare(ret0, ret3)
+
+
+class TestEnergyModelSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestDOSModelSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelDPA1(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        # less degree of smoothness,
+        # error can be systematically removed by reducing epsilon
+        self.epsilon = 1e-5
+        self.aprec = 1e-5
+
+
+class TestEnergyModelDPA1Excl1(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        model_params["pair_exclude_types"] = [[0, 1]]
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        # less degree of smoothness,
+        # error can be systematically removed by reducing epsilon
+        self.epsilon = 1e-5
+        self.aprec = 1e-5
+
+
+class TestEnergyModelDPA1Excl12(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        model_params["pair_exclude_types"] = [[0, 1], [0, 2]]
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        # less degree of smoothness,
+        # error can be systematically removed by reducing epsilon
+        self.epsilon = 1e-5
+        self.aprec = 1e-5
+
+
+class TestEnergyModelDPA2(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["descriptor"]["repinit"]["rcut"] = 8
+        model_params["descriptor"]["repinit"]["rcut_smth"] = 3.5
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = 1e-5, 1e-4
+
+
+class TestEnergyModelDPA2_1(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelDPA2_2(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelHybrid(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelZBL(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, 5e-2
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+# class TestEnergyFoo(unittest.TestCase):
+#   def test(self):
+#     model_params = model_dpau
+#     self.model = EnergyModelDPAUni(model_params).to(env.DEVICE)
+
+#     natoms = 5
+#     cell = paddle.rand([3, 3], dtype=dtype)
+#     cell = (cell + cell.T) + 5. * paddle.eye(3)
+#     coord = paddle.rand([natoms, 3], dtype=dtype)
+#     coord = paddle.matmul(coord, cell)
+#     atype = paddle.to_tensor([0, 0, 0, 1, 1])
+#     idx_perm = [1, 0, 4, 3, 2]
+#     ret0 = infer_model(self.model, coord, cell, atype, type_split=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_smooth_denoise.py b/source/tests/pd/model/test_smooth_denoise.py
new file mode 100644
index 0000000000..d94f15863d
--- /dev/null
+++ b/source/tests/pd/model/test_smooth_denoise.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation_denoise import (
+    model_dpa2,
+)
+
+dtype = paddle.float64
+
+
+class SmoothDenoiseTest:
+    def test(
+        self,
+    ):
+        # displacement of atoms
+        epsilon = 1e-5 if self.epsilon is None else self.epsilon
+        # required prec. relative prec is not checked.
+        rprec = 0
+        aprec = 1e-5 if self.aprec is None else self.aprec
+
+        natoms = 10
+        cell = 8.6 * paddle.eye(3, dtype=dtype).to(env.DEVICE)
+        seed = paddle.seed(GLOBAL_SEED)
+        atype = paddle.randint(0, 3, [natoms]).to(device=env.DEVICE)
+        coord0 = (
+            paddle.to_tensor(
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                ],
+                dtype=dtype,
+            )
+            .reshape([-1, 3])
+            .to(env.DEVICE)
+        )
+        coord1 = paddle.rand([natoms - coord0.shape[0], 3], dtype=dtype).to(env.DEVICE)
+        coord1 = paddle.matmul(coord1, cell)
+        coord = paddle.concat([coord0, coord1], axis=0)
+
+        coord0 = paddle.clone(coord)
+        coord1 = paddle.clone(coord)
+        coord1[1][0] += epsilon
+        coord2 = paddle.clone(coord)
+        coord2[2][1] += epsilon
+        coord3 = paddle.clone(coord)
+        coord3[1][0] += epsilon
+        coord3[2][1] += epsilon
+
+        update_c0, logits0 = eval_model(
+            self.model, coord0.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model, coord1.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        update_c2, logits2 = eval_model(
+            self.model, coord2.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret2 = {"updated_coord": update_c2.squeeze(0), "logits": logits2.squeeze(0)}
+        update_c3, logits3 = eval_model(
+            self.model, coord3.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret3 = {"updated_coord": update_c3.squeeze(0), "logits": logits3.squeeze(0)}
+
+        def compare(ret0, ret1):
+            np.testing.assert_allclose(
+                ret0["updated_coord"].numpy(),
+                ret1["updated_coord"].numpy(),
+                rtol=rprec,
+                atol=aprec,
+            )
+            np.testing.assert_allclose(
+                ret0["logits"].numpy(), ret1["logits"].numpy(), rtol=rprec, atol=aprec
+            )
+
+        compare(ret0, ret1)
+        compare(ret1, ret2)
+        compare(ret0, ret3)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, SmoothDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["descriptor"]["sel"] = 8
+        model_params["descriptor"]["rcut_smth"] = 3.5
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+        self.epsilon = 1e-7
+        self.aprec = 1e-5
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2_1(unittest.TestCase, SmoothDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        # model_params["descriptor"]["combine_grrg"] = True
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+        self.epsilon = 1e-7
+        self.aprec = 1e-5
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestDenoiseModelHybrid(unittest.TestCase, TestSmoothDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+#         self.epsilon, self.aprec = None, None
+#         self.epsilon = 1e-7
+#         self.aprec = 1e-5
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_trans.py b/source/tests/pd/model/test_trans.py
new file mode 100644
index 0000000000..ac5fce2dc3
--- /dev/null
+++ b/source/tests/pd/model/test_trans.py
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = paddle.float64
+
+
+class TransTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        spin = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32).to(
+            device=env.DEVICE
+        )
+        shift = (paddle.rand([3], dtype=dtype).to(device=env.DEVICE) - 0.5) * 2.0
+        coord_s = paddle.matmul(
+            paddle.remainder(
+                paddle.matmul(coord + shift, paddle.linalg.inv(cell)), paddle.ones([])
+            ),
+            cell,
+        )
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord_s.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        prec = 1e-7
+        for key in test_keys:
+            if key in ["energy", "force", "force_mag"]:
+                np.testing.assert_allclose(
+                    ret0[key].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    np.testing.assert_allclose(
+                        ret0[key].numpy(), ret1[key].numpy(), rtol=prec, atol=prec
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_trans_denoise.py b/source/tests/pd/model/test_trans_denoise.py
new file mode 100644
index 0000000000..8317d4d2ae
--- /dev/null
+++ b/source/tests/pd/model/test_trans_denoise.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation_denoise import (
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+)
+
+dtype = paddle.float64
+
+
+class TransDenoiseTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        shift = (paddle.rand([3], dtype=dtype) - 0.5).to(env.DEVICE) * 2.0
+        coord_s = paddle.matmul(
+            paddle.remainder(
+                paddle.matmul(coord + shift, paddle.linalg.inv(cell)), 1.0
+            ),
+            cell,
+        )
+        updated_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        updated_c0 = updated_c0 - coord.unsqueeze(0)
+        ret0 = {"updated_coord": updated_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        updated_c1, logits1 = eval_model(
+            self.model, coord_s.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        updated_c1 = updated_c1 - coord_s.unsqueeze(0)
+        ret1 = {"updated_coord": updated_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        prec = 1e-10
+        np.testing.assert_allclose(
+            ret0["updated_coord"].numpy(),
+            ret1["updated_coord"].numpy(),
+            rtol=prec,
+            atol=prec,
+        )
+        np.testing.assert_allclose(
+            ret0["logits"].numpy(), ret1["logits"].numpy(), rtol=prec, atol=prec
+        )
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("hybrid not supported at the moment")
+class TestDenoiseModelHybrid(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/test_unused_params.py b/source/tests/pd/model/test_unused_params.py
new file mode 100644
index 0000000000..3424e9dafa
--- /dev/null
+++ b/source/tests/pd/model/test_unused_params.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (
+    model_dpa2,
+)
+
+dtype = paddle.float64
+
+
+class TestUnusedParamsDPA2(unittest.TestCase):
+    @unittest.skip("paddle do not support unpacking grad_fn.next_functions")
+    def test_unused(self):
+        import itertools
+
+        for conv, drrd, grrg, attn1, g1g1, attn2, h2 in itertools.product(
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+        ):
+            if (not drrd) and (not grrg) and h2:
+                # skip the case h2 is not envolved
+                continue
+            if (not grrg) and (not conv):
+                # skip the case g2 is not envolved
+                continue
+            model = copy.deepcopy(model_dpa2)
+            model["descriptor"]["repformer"]["nlayers"] = 2
+            # model["descriptor"]["combine_grrg"] = cmbg2
+            model["descriptor"]["repformer"]["update_g1_has_conv"] = conv
+            model["descriptor"]["repformer"]["update_g1_has_drrd"] = drrd
+            model["descriptor"]["repformer"]["update_g1_has_grrg"] = grrg
+            model["descriptor"]["repformer"]["update_g1_has_attn"] = attn1
+            model["descriptor"]["repformer"]["update_g2_has_g1g1"] = g1g1
+            model["descriptor"]["repformer"]["update_g2_has_attn"] = attn2
+            model["descriptor"]["repformer"]["update_h2"] = h2
+            model["fitting_net"]["neuron"] = [12, 12, 12]
+            self._test_unused(model)
+
+    def _test_unused(self, model_params):
+        self.model = get_model(model_params).to(env.DEVICE)
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        result_0 = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        test_keys = ["energy", "force", "virial"]
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+
+        # use computation graph to find all contributing tensors
+        def get_contributing_params(y, top_level=True):
+            nf = y.grad_fn.next_functions if top_level else y.next_functions
+            for f, _ in nf:
+                try:
+                    yield f.variable
+                except AttributeError:
+                    pass  # node has no tensor
+                if f is not None:
+                    yield from get_contributing_params(f, top_level=False)
+
+        contributing_parameters = set(get_contributing_params(ret0["energy"]))
+        all_parameters = set(self.model.parameters())
+        non_contributing = all_parameters - contributing_parameters
+        self.assertEqual(len(non_contributing), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/water/data/data_0/set.000/box.npy b/source/tests/pd/model/water/data/data_0/set.000/box.npy
new file mode 100644
index 0000000000..6ad2de625b
Binary files /dev/null and b/source/tests/pd/model/water/data/data_0/set.000/box.npy differ
diff --git a/source/tests/pd/model/water/data/data_0/set.000/coord.npy b/source/tests/pd/model/water/data/data_0/set.000/coord.npy
new file mode 100644
index 0000000000..8bd448b125
Binary files /dev/null and b/source/tests/pd/model/water/data/data_0/set.000/coord.npy differ
diff --git a/source/tests/pd/model/water/data/data_0/set.000/energy.npy b/source/tests/pd/model/water/data/data_0/set.000/energy.npy
new file mode 100644
index 0000000000..d03db103f5
Binary files /dev/null and b/source/tests/pd/model/water/data/data_0/set.000/energy.npy differ
diff --git a/source/tests/pd/model/water/data/data_0/set.000/force.npy b/source/tests/pd/model/water/data/data_0/set.000/force.npy
new file mode 100644
index 0000000000..10b2ab83a2
Binary files /dev/null and b/source/tests/pd/model/water/data/data_0/set.000/force.npy differ
diff --git a/source/tests/pd/model/water/data/data_0/type.raw b/source/tests/pd/model/water/data/data_0/type.raw
new file mode 100644
index 0000000000..97e8fdfcf8
--- /dev/null
+++ b/source/tests/pd/model/water/data/data_0/type.raw
@@ -0,0 +1,192 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pd/model/water/data/data_0/type_map.raw b/source/tests/pd/model/water/data/data_0/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/model/water/data/data_0/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/model/water/data/single/set.000/box.npy b/source/tests/pd/model/water/data/single/set.000/box.npy
new file mode 100644
index 0000000000..65897e0f9c
Binary files /dev/null and b/source/tests/pd/model/water/data/single/set.000/box.npy differ
diff --git a/source/tests/pd/model/water/data/single/set.000/coord.npy b/source/tests/pd/model/water/data/single/set.000/coord.npy
new file mode 100644
index 0000000000..6e0594a803
Binary files /dev/null and b/source/tests/pd/model/water/data/single/set.000/coord.npy differ
diff --git a/source/tests/pd/model/water/data/single/set.000/energy.npy b/source/tests/pd/model/water/data/single/set.000/energy.npy
new file mode 100644
index 0000000000..a0a88fb78a
Binary files /dev/null and b/source/tests/pd/model/water/data/single/set.000/energy.npy differ
diff --git a/source/tests/pd/model/water/data/single/set.000/force.npy b/source/tests/pd/model/water/data/single/set.000/force.npy
new file mode 100644
index 0000000000..d5b847a86e
Binary files /dev/null and b/source/tests/pd/model/water/data/single/set.000/force.npy differ
diff --git a/source/tests/pd/model/water/data/single/type.raw b/source/tests/pd/model/water/data/single/type.raw
new file mode 100644
index 0000000000..97e8fdfcf8
--- /dev/null
+++ b/source/tests/pd/model/water/data/single/type.raw
@@ -0,0 +1,192 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pd/model/water/data/single/type_map.raw b/source/tests/pd/model/water/data/single/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/model/water/data/single/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/model/water/data/zbl_tab_potential/H2O_tab_potential.txt b/source/tests/pd/model/water/data/zbl_tab_potential/H2O_tab_potential.txt
new file mode 100644
index 0000000000..b4d146017f
--- /dev/null
+++ b/source/tests/pd/model/water/data/zbl_tab_potential/H2O_tab_potential.txt
@@ -0,0 +1,1000 @@
+0.001	913709.625838	114389.26607	14320.660836	913709.625838	114389.26607	14320.660836
+0.002	453190.075792	56822.165078	7124.559066	453190.075792	56822.165078	7124.559066
+0.003	299716.609389	37635.860646	4726.059712	299716.609389	37635.860646	4726.059712
+0.004	223004.208152	28044.724786	3526.959232	223004.208152	28044.724786	3526.959232
+0.005	176995.875921	22291.63231	2807.616935	176995.875921	22291.63231	2807.616935
+0.006	146339.286793	18457.541826	2328.152606	146339.286793	18457.541826	2328.152606
+0.007	124454.877677	15720.007305	1985.760451	124454.877677	15720.007305	1985.760451
+0.008	108052.871443	13667.805976	1729.037583	108052.871443	13667.805976	1729.037583
+0.009	95305.6179694	12072.480958	1529.426853	95305.6179694	12072.480958	1529.426853
+0.01	85116.5305655	10796.958308	1369.793979	85116.5305655	10796.958308	1369.793979
+0.011	76787.7843454	9754.009324	1239.235334	76787.7843454	9754.009324	1239.235334
+0.012	69854.1654175	8885.4816862	1130.481842	69854.1654175	8885.4816862	1130.481842
+0.013	63993.6050636	8151.1162355	1038.501071	63993.6050636	8151.1162355	1038.501071
+0.014	58976.0564146	7522.1565542	959.6984312	58976.0564146	7522.1565542	959.6984312
+0.015	54632.8204564	6977.5147177	891.4378965	54632.8204564	6977.5147177	891.4378965
+0.016	50837.3747846	6501.3748881	831.7424519	50837.3747846	6501.3748881	831.7424519
+0.017	47492.968682	6081.6426991	779.1002669	47492.968682	6081.6426991	779.1002669
+0.018	44524.3531708	5708.9115119	732.3354774	44524.3531708	5708.9115119	732.3354774
+0.019	41872.1226283	5375.7551174	690.5197734	41872.1226283	5375.7551174	690.5197734
+0.02	39488.7539185	5076.2326272	652.9105109	39488.7539185	5076.2326272	652.9105109
+0.021	37335.7772003	4805.5348243	618.9065049	37335.7772003	4805.5348243	618.9065049
+0.022	35381.7183353	4559.7269643	588.0158802	35381.7183353	4559.7269643	588.0158802
+0.023	33600.5780582	4335.5586712	559.8323083	33600.5780582	4335.5586712	559.8323083
+0.024	31970.6913556	4130.3213594	534.0171847	31970.6913556	4130.3213594	534.0171847
+0.025	30473.8605947	3941.739875	510.2860845	30473.8605947	3941.739875	510.2860845
+0.026	29094.6886984	3767.8891424	488.398343	29094.6886984	3767.8891424	488.398343
+0.027	27820.0605059	3607.1293341	468.1489521	27820.0605059	3607.1293341	468.1489521
+0.028	26638.7352692	3458.0549325	449.3621928	26638.7352692	3458.0549325	449.3621928
+0.029	25541.023462	3319.4543312	431.8865855	25541.023462	3319.4543312	431.8865855
+0.03	24518.5282265	3190.2775152	415.5908499	24518.5282265	3190.2775152	415.5908499
+0.031	23563.9368637	3069.6099976	400.3606472	23563.9368637	3069.6099976	400.3606472
+0.032	22670.8514191	2956.651642	386.0959329	22670.8514191	2956.651642	386.0959329
+0.033	21833.6500715	2850.6993366	372.708791	21833.6500715	2850.6993366	372.708791
+0.034	21047.372983	2751.1327248	360.1216502	21047.372983	2751.1327248	360.1216502
+0.035	20307.6277175	2657.4023825	348.2658062	20307.6277175	2657.4023825	348.2658062
+0.036	19610.5104235	2569.0199661	337.0801904	19610.5104235	2569.0199661	337.0801904
+0.037	18952.5397978	2485.5499575	326.5103374	18952.5397978	2485.5499575	326.5103374
+0.038	18330.6014769	2406.6027127	316.5075168	18330.6014769	2406.6027127	316.5075168
+0.039	17741.9009829	2331.8285805	307.0279975	17741.9009829	2331.8285805	307.0279975
+0.04	17183.9237284	2260.9129024	298.0324229	17183.9237284	2260.9129024	298.0324229
+0.041	16654.4008745	2193.5717452	289.4852776	16654.4008745	2193.5717452	289.4852776
+0.042	16151.2800661	2129.5482423	281.3544296	16151.2800661	2129.5482423	281.3544296
+0.043	15672.7002509	2068.6094464	273.6107373	15672.7002509	2068.6094464	273.6107373
+0.044	15216.9699328	2010.5436107	266.2277097	15216.9699328	2010.5436107	266.2277097
+0.045	14782.5483242	1955.1578329	259.1812115	14782.5483242	1955.1578329	259.1812115
+0.046	14368.028958	1902.2760069	252.4492073	14368.028958	1902.2760069	252.4492073
+0.047	13972.1253902	1851.737035	246.0115383	13972.1253902	1851.737035	246.0115383
+0.048	13593.6586918	1803.3932646	239.8497262	13593.6586918	1803.3932646	239.8497262
+0.049	13231.5464708	1757.1091159	233.9468027	13231.5464708	1757.1091159	233.9468027
+0.05	12884.7932124	1712.759874	228.2871576	12884.7932124	1712.759874	228.2871576
+0.051	12552.4817558	1670.2306236	222.856406	12552.4817558	1670.2306236	222.856406
+0.052	12233.7657548	1629.4153064	217.6412705	12233.7657548	1629.4153064	217.6412705
+0.053	11927.862991	1590.2158855	212.6294767	11927.862991	1590.2158855	212.6294767
+0.054	11634.0494314	1552.5416017	207.8096602	11634.0494314	1552.5416017	207.8096602
+0.055	11351.6539336	1516.3083123	203.1712838	11351.6539336	1516.3083123	203.1712838
+0.056	11080.0535186	1481.4378999	198.7045641	11080.0535186	1481.4378999	198.7045641
+0.057	10818.6691413	1447.8577434	194.4004048	10818.6691413	1447.8577434	194.4004048
+0.058	10566.9618984	1415.5002442	190.2503376	10566.9618984	1415.5002442	190.2503376
+0.059	10324.4296227	1384.3024001	186.2464693	10324.4296227	1384.3024001	186.2464693
+0.06	10090.6038173	1354.2054222	182.3814335	10090.6038173	1354.2054222	182.3814335
+0.061	9865.0468917	1325.1543893	178.6483475	9865.0468917	1325.1543893	178.6483475
+0.062	9647.3496659	1297.0979357	175.0407734	9647.3496659	1297.0979357	175.0407734
+0.063	9437.1291115	1269.9879689	171.5526826	9437.1291115	1269.9879689	171.5526826
+0.064	9234.0263053	1243.7794136	168.178424	9234.0263053	1243.7794136	168.178424
+0.065	9037.7045714	1218.4299795	164.9126949	9037.7045714	1218.4299795	164.9126949
+0.066	8847.8477928	1193.8999501	161.7505145	8847.8477928	1193.8999501	161.7505145
+0.067	8664.1588738	1170.1519903	158.6871999	8664.1588738	1170.1519903	158.6871999
+0.068	8486.3583383	1147.1509714	155.7183444	8486.3583383	1147.1509714	155.7183444
+0.069	8314.1830501	1124.8638108	152.8397972	8314.1830501	1124.8638108	152.8397972
+0.07	8147.3850427	1103.2593259	150.0476452	8147.3850427	1103.2593259	150.0476452
+0.071	7985.7304489	1082.3080999	147.3381963	7985.7304489	1082.3080999	147.3381963
+0.072	7828.9985183	1061.9823592	144.707964	7828.9985183	1061.9823592	144.707964
+0.073	7676.9807165	1042.2558606	142.1536534	7676.9807165	1042.2558606	142.1536534
+0.074	7529.4798977	1023.1037878	139.6721482	7529.4798977	1023.1037878	139.6721482
+0.075	7386.3095424	1004.5026562	137.2604986	7386.3095424	1004.5026562	137.2604986
+0.076	7247.2930565	986.430225	134.9159107	7247.2930565	986.430225	134.9159107
+0.077	7112.2631243	968.8654164	132.6357361	7112.2631243	968.8654164	132.6357361
+0.078	6981.0611116	951.7882409	130.4174626	6981.0611116	951.7882409	130.4174626
+0.079	6853.5365143	935.1797284	128.2587056	6853.5365143	935.1797284	128.2587056
+0.08	6729.5464483	919.0218641	126.1572004	6729.5464483	919.0218641	126.1572004
+0.081	6608.9551768	903.2975297	124.1107942	6608.9551768	903.2975297	124.1107942
+0.082	6491.6336731	887.9904484	122.1174397	6491.6336731	887.9904484	122.1174397
+0.083	6377.4592142	873.0851342	120.1751889	6377.4592142	873.0851342	120.1751889
+0.084	6266.3150042	858.5668449	118.2821865	6266.3150042	858.5668449	118.2821865
+0.085	6158.0898234	844.4215378	116.4366651	6158.0898234	844.4215378	116.4366651
+0.086	6052.677703	830.6358295	114.63694	6052.677703	830.6358295	114.63694
+0.087	5949.9776216	817.1969572	112.881404	5949.9776216	817.1969572	112.881404
+0.088	5849.8932223	804.0927442	111.1685235	5849.8932223	804.0927442	111.1685235
+0.089	5752.3325494	791.311566	109.4968341	5752.3325494	791.311566	109.4968341
+0.09	5657.2078026	778.8423203	107.8649368	5657.2078026	778.8423203	107.8649368
+0.091	5564.4351069	766.6743978	106.2714943	5564.4351069	766.6743978	106.2714943
+0.092	5473.9342981	754.7976551	104.7152279	5473.9342981	754.7976551	104.7152279
+0.093	5385.6287222	743.2023904	103.1949141	5385.6287222	743.2023904	103.1949141
+0.094	5299.4450471	731.879319	101.7093819	5299.4450471	731.879319	101.7093819
+0.095	5215.3130867	720.8195518	100.2575097	5215.3130867	720.8195518	100.2575097
+0.096	5133.1656359	710.0145745	98.8382229	5133.1656359	710.0145745	98.8382229
+0.097	5052.9383157	699.4562281	97.4504918	5052.9383157	699.4562281	97.4504918
+0.098	4974.5694279	689.1366911	96.0933285	4974.5694279	689.1366911	96.0933285
+0.099	4897.9998188	679.0484617	94.7657857	4897.9998188	679.0484617	94.7657857
+0.1	4823.1727507	669.1843423	93.466954	4823.1727507	669.1843423	93.466954
+0.101	4750.0337815	659.5374244	92.1959604	4750.0337815	659.5374244	92.1959604
+0.102	4678.530651	650.1010737	90.9519663	4678.530651	650.1010737	90.9519663
+0.103	4608.6131741	640.8689177	89.7341659	4608.6131741	640.8689177	89.7341659
+0.104	4540.2331402	631.8348323	88.5417846	4540.2331402	631.8348323	88.5417846
+0.105	4473.3442182	622.9929301	87.3740776	4473.3442182	622.9929301	87.3740776
+0.106	4407.9018671	614.3375495	86.2303284	4407.9018671	614.3375495	86.2303284
+0.107	4343.8632512	605.8632435	85.1098475	4343.8632512	605.8632435	85.1098475
+0.108	4281.1871608	597.5647703	84.0119712	4281.1871608	597.5647703	84.0119712
+0.109	4219.8339365	589.4370834	82.9360602	4219.8339365	589.4370834	82.9360602
+0.11	4159.7653981	581.475323	81.8814988	4159.7653981	581.475323	81.8814988
+0.111	4100.944777	573.6748074	80.8476936	4100.944777	573.6748074	80.8476936
+0.112	4043.3366528	566.0310249	79.8340726	4043.3366528	566.0310249	79.8340726
+0.113	3986.9068928	558.5396262	78.8400844	3986.9068928	558.5396262	78.8400844
+0.114	3931.6225949	551.1964175	77.8651968	3931.6225949	551.1964175	77.8651968
+0.115	3877.4520333	543.9973537	76.9088966	3877.4520333	543.9973537	76.9088966
+0.116	3824.3646071	536.9385314	75.9706883	3824.3646071	536.9385314	75.9706883
+0.117	3772.3307921	530.0161836	75.0500935	3772.3307921	530.0161836	75.0500935
+0.118	3721.3220939	523.2266731	74.1466504	3721.3220939	523.2266731	74.1466504
+0.119	3671.3110047	516.5664876	73.2599126	3671.3110047	516.5664876	73.2599126
+0.12	3622.270961	510.0322343	72.3894489	3622.270961	510.0322343	72.3894489
+0.121	3574.1763045	503.6206346	71.5348425	3574.1763045	503.6206346	71.5348425
+0.122	3527.0022444	497.3285198	70.6956904	3527.0022444	497.3285198	70.6956904
+0.123	3480.7248214	491.1528264	69.8716028	3480.7248214	491.1528264	69.8716028
+0.124	3435.3208739	485.0905919	69.0622028	3435.3208739	485.0905919	69.0622028
+0.125	3390.7680053	479.1389505	68.2671256	3390.7680053	479.1389505	68.2671256
+0.126	3347.0445536	473.2951298	67.486018	3347.0445536	473.2951298	67.486018
+0.127	3304.1295618	467.5564462	66.7185382	3304.1295618	467.5564462	66.7185382
+0.128	3262.0027498	461.9203022	65.9643553	3262.0027498	461.9203022	65.9643553
+0.129	3220.6444879	456.3841826	65.2231486	3220.6444879	456.3841826	65.2231486
+0.13	3180.0357713	450.9456517	64.4946075	3180.0357713	450.9456517	64.4946075
+0.131	3140.1581958	445.6023495	63.778431	3140.1581958	445.6023495	63.778431
+0.132	3100.9939349	440.3519898	63.0743273	3100.9939349	440.3519898	63.0743273
+0.133	3062.5257173	435.1923563	62.3820136	3062.5257173	435.1923563	62.3820136
+0.134	3024.736806	430.1213011	61.7012156	3024.736806	430.1213011	61.7012156
+0.135	2987.6109783	425.1367411	61.0316673	2987.6109783	425.1367411	61.0316673
+0.136	2951.1325064	420.2366563	60.3731105	2951.1325064	420.2366563	60.3731105
+0.137	2915.2861387	415.4190873	59.7252948	2915.2861387	415.4190873	59.7252948
+0.138	2880.0570829	410.6821328	59.0879771	2880.0570829	410.6821328	59.0879771
+0.139	2845.4309885	406.0239478	58.4609214	2845.4309885	406.0239478	58.4609214
+0.14	2811.3939311	401.4427415	57.8438986	2811.3939311	401.4427415	57.8438986
+0.141	2777.9323966	396.9367752	57.2366861	2777.9323966	396.9367752	57.2366861
+0.142	2745.0332671	392.5043608	56.6390678	2745.0332671	392.5043608	56.6390678
+0.143	2712.683806	388.1438584	56.0508336	2712.683806	388.1438584	56.0508336
+0.144	2680.871645	383.8536753	55.4717796	2680.871645	383.8536753	55.4717796
+0.145	2649.584771	379.6322639	54.9017073	2649.584771	379.6322639	54.9017073
+0.146	2618.8115134	375.4781203	54.3404239	2618.8115134	375.4781203	54.3404239
+0.147	2588.5405327	371.3897825	53.787742	2588.5405327	371.3897825	53.787742
+0.148	2558.7608086	367.3658297	53.2434792	2558.7608086	367.3658297	53.2434792
+0.149	2529.4616294	363.40488	52.7074581	2529.4616294	363.40488	52.7074581
+0.15	2500.6325811	359.5055896	52.1795062	2500.6325811	359.5055896	52.1795062
+0.151	2472.2635377	355.6666516	51.6594557	2472.2635377	355.6666516	51.6594557
+0.152	2444.3446512	351.8867943	51.1471432	2444.3446512	351.8867943	51.1471432
+0.153	2416.8663423	348.1647806	50.6424097	2416.8663423	348.1647806	50.6424097
+0.154	2389.8192919	344.4994064	50.1451003	2389.8192919	344.4994064	50.1451003
+0.155	2363.1944319	340.8894997	49.6550644	2363.1944319	340.8894997	49.6550644
+0.156	2336.9829372	337.3339196	49.1721551	2336.9829372	337.3339196	49.1721551
+0.157	2311.1762181	333.8315552	48.6962295	2311.1762181	333.8315552	48.6962295
+0.158	2285.765912	330.3813249	48.2271484	2285.765912	330.3813249	48.2271484
+0.159	2260.7438767	326.9821749	47.7647759	2260.7438767	326.9821749	47.7647759
+0.16	2236.1021827	323.6330788	47.3089799	2236.1021827	323.6330788	47.3089799
+0.161	2211.8331072	320.3330368	46.8596315	2211.8331072	320.3330368	46.8596315
+0.162	2187.9291268	317.0810744	46.416605	2187.9291268	317.0810744	46.416605
+0.163	2164.3829117	313.8762419	45.9797781	2164.3829117	313.8762419	45.9797781
+0.164	2141.1873194	310.7176137	45.5490312	2141.1873194	310.7176137	45.5490312
+0.165	2118.335389	307.6042874	45.1242479	2118.335389	307.6042874	45.1242479
+0.166	2095.8203354	304.5353832	44.7053147	2095.8203354	304.5353832	44.7053147
+0.167	2073.6355442	301.5100431	44.2921206	2073.6355442	301.5100431	44.2921206
+0.168	2051.774566	298.5274302	43.8845577	2051.774566	298.5274302	43.8845577
+0.169	2030.2311119	295.5867284	43.4825203	2030.2311119	295.5867284	43.4825203
+0.17	2008.9990482	292.6871414	43.0859057	2008.9990482	292.6871414	43.0859057
+0.171	1988.0723922	289.8278921	42.6946132	1988.0723922	289.8278921	42.6946132
+0.172	1967.445307	287.0082225	42.3085448	1967.445307	287.0082225	42.3085448
+0.173	1947.1120979	284.2273926	41.9276048	1947.1120979	284.2273926	41.9276048
+0.174	1927.0672078	281.48468	41.5516997	1927.0672078	281.48468	41.5516997
+0.175	1907.3052129	278.7793797	41.180738	1907.3052129	278.7793797	41.180738
+0.176	1887.8208195	276.1108033	40.8146308	1887.8208195	276.1108033	40.8146308
+0.177	1868.6088596	273.4782784	40.4532907	1868.6088596	273.4782784	40.4532907
+0.178	1849.6642873	270.8811486	40.0966328	1849.6642873	270.8811486	40.0966328
+0.179	1830.9821758	268.3187725	39.7445739	1830.9821758	268.3187725	39.7445739
+0.18	1812.5577133	265.7905239	39.3970327	1812.5577133	265.7905239	39.3970327
+0.181	1794.3862002	263.2957906	39.0539298	1794.3862002	263.2957906	39.0539298
+0.182	1776.4630458	260.8339748	38.7151877	1776.4630458	260.8339748	38.7151877
+0.183	1758.7837651	258.404492	38.3807303	1758.7837651	258.404492	38.3807303
+0.184	1741.3439759	256.0067712	38.0504836	1741.3439759	256.0067712	38.0504836
+0.185	1724.139396	253.6402542	37.724375	1724.139396	253.6402542	37.724375
+0.186	1707.1658404	251.3043952	37.4023336	1707.1658404	251.3043952	37.4023336
+0.187	1690.4192185	248.9986608	37.08429	1690.4192185	248.9986608	37.08429
+0.188	1673.8955316	246.7225293	36.7701764	1673.8955316	246.7225293	36.7701764
+0.189	1657.5908704	244.4754905	36.4599264	1657.5908704	244.4754905	36.4599264
+0.19	1641.5014126	242.2570456	36.1534752	1641.5014126	242.2570456	36.1534752
+0.191	1625.6234204	240.0667066	35.850759	1625.6234204	240.0667066	35.850759
+0.192	1609.9532382	237.903996	35.5517159	1609.9532382	237.903996	35.5517159
+0.193	1594.4872906	235.768447	35.256285	1594.4872906	235.768447	35.256285
+0.194	1579.2220803	233.6596024	34.9644066	1579.2220803	233.6596024	34.9644066
+0.195	1564.1541856	231.5770153	34.6760226	1564.1541856	231.5770153	34.6760226
+0.196	1549.2802589	229.520248	34.3910759	1549.2802589	229.520248	34.3910759
+0.197	1534.5970244	227.4888722	34.1095106	1534.5970244	227.4888722	34.1095106
+0.198	1520.1012763	225.4824686	33.831272	1520.1012763	225.4824686	33.831272
+0.199	1505.7898772	223.5006269	33.5563066	1505.7898772	223.5006269	33.5563066
+0.2	1491.6597561	221.5429453	33.2845619	1491.6597561	221.5429453	33.2845619
+0.201	1477.7079067	219.6090303	33.0159865	1477.7079067	219.6090303	33.0159865
+0.202	1463.9313857	217.6984967	32.7505301	1463.9313857	217.6984967	32.7505301
+0.203	1450.3273114	215.8109671	32.4881435	1450.3273114	215.8109671	32.4881435
+0.204	1436.892862	213.946072	32.2287782	1436.892862	213.946072	32.2287782
+0.205	1423.6252739	212.1034495	31.972387	1423.6252739	212.1034495	31.972387
+0.206	1410.5218407	210.2827449	31.7189234	1410.5218407	210.2827449	31.7189234
+0.207	1397.5799113	208.4836109	31.4683421	1397.5799113	208.4836109	31.4683421
+0.208	1384.7968886	206.7057069	31.2205985	1384.7968886	206.7057069	31.2205985
+0.209	1372.1702286	204.9486995	30.975649	1372.1702286	204.9486995	30.975649
+0.21	1359.6974383	203.2122618	30.7334506	1359.6974383	203.2122618	30.7334506
+0.211	1347.3760753	201.4960735	30.4939616	1347.3760753	201.4960735	30.4939616
+0.212	1335.2037458	199.7998204	30.2571406	1335.2037458	199.7998204	30.2571406
+0.213	1323.178104	198.1231947	30.0229474	1323.178104	198.1231947	30.0229474
+0.214	1311.2968506	196.4658948	29.7913425	1311.2968506	196.4658948	29.7913425
+0.215	1299.5577318	194.8276247	29.5622869	1299.5577318	194.8276247	29.5622869
+0.216	1287.958538	193.2080943	29.3357428	1287.958538	193.2080943	29.3357428
+0.217	1276.4971033	191.6070192	29.1116726	1276.4971033	191.6070192	29.1116726
+0.218	1265.1713036	190.0241204	28.8900399	1265.1713036	190.0241204	28.8900399
+0.219	1253.9790564	188.4591242	28.6708087	1253.9790564	188.4591242	28.6708087
+0.22	1242.9183194	186.9117623	28.4539438	1242.9183194	186.9117623	28.4539438
+0.221	1231.9870896	185.3817715	28.2394106	1231.9870896	185.3817715	28.2394106
+0.222	1221.1834024	183.8688934	28.0271751	1221.1834024	183.8688934	28.0271751
+0.223	1210.5053309	182.3728746	27.817204	1210.5053309	182.3728746	27.817204
+0.224	1199.9509845	180.8934666	27.6094647	1199.9509845	180.8934666	27.6094647
+0.225	1189.5185088	179.4304255	27.4039251	1189.5185088	179.4304255	27.4039251
+0.226	1179.2060841	177.9835117	27.2005537	1179.2060841	177.9835117	27.2005537
+0.227	1169.0119251	176.5524904	26.9993196	1169.0119251	176.5524904	26.9993196
+0.228	1158.9342796	175.1371309	26.8001924	1158.9342796	175.1371309	26.8001924
+0.229	1148.9714282	173.7372069	26.6031423	1148.9714282	173.7372069	26.6031423
+0.23	1139.1216834	172.3524963	26.4081402	1139.1216834	172.3524963	26.4081402
+0.231	1129.3833889	170.9827808	26.2151571	1129.3833889	170.9827808	26.2151571
+0.232	1119.7549187	169.6278463	26.024165	1119.7549187	169.6278463	26.024165
+0.233	1110.2346768	168.2874825	25.8351362	1110.2346768	168.2874825	25.8351362
+0.234	1100.8210961	166.9614829	25.6480433	1100.8210961	166.9614829	25.6480433
+0.235	1091.5126382	165.6496448	25.4628597	1091.5126382	165.6496448	25.4628597
+0.236	1082.3077924	164.3517691	25.2795591	1082.3077924	164.3517691	25.2795591
+0.237	1073.2050753	163.0676601	25.0981157	1073.2050753	163.0676601	25.0981157
+0.238	1064.20303	161.7971257	24.9185042	1064.20303	161.7971257	24.9185042
+0.239	1055.3002259	160.5399772	24.7406996	1055.3002259	160.5399772	24.7406996
+0.24	1046.4952577	159.2960293	24.5646775	1046.4952577	159.2960293	24.5646775
+0.241	1037.786745	158.0650998	24.3904138	1037.786745	158.0650998	24.3904138
+0.242	1029.1733319	156.8470097	24.2178849	1029.1733319	156.8470097	24.2178849
+0.243	1020.6536864	155.6415833	24.0470676	1020.6536864	155.6415833	24.0470676
+0.244	1012.2264998	154.4486478	23.877939	1012.2264998	154.4486478	23.877939
+0.245	1003.8904862	153.2680334	23.7104767	1003.8904862	153.2680334	23.7104767
+0.246	995.6443822	152.0995732	23.5446586	995.6443822	152.0995732	23.5446586
+0.247	987.4869462	150.9431032	23.3804631	987.4869462	150.9431032	23.3804631
+0.248	979.416958	149.7984622	23.2178688	979.416958	149.7984622	23.2178688
+0.249	971.4332186	148.6654918	23.0568547	971.4332186	148.6654918	23.0568547
+0.25	963.5345494	147.5440362	22.8974003	963.5345494	147.5440362	22.8974003
+0.251	955.7197919	146.4339423	22.7394853	955.7197919	146.4339423	22.7394853
+0.252	947.9878073	145.3350596	22.5830897	947.9878073	145.3350596	22.5830897
+0.253	940.3374762	144.2472399	22.4281939	940.3374762	144.2472399	22.4281939
+0.254	932.7676981	143.1703377	22.2747787	932.7676981	143.1703377	22.2747787
+0.255	925.2773908	142.1042099	22.1228251	925.2773908	142.1042099	22.1228251
+0.256	917.8654906	141.0487157	21.9723144	917.8654906	141.0487157	21.9723144
+0.257	910.5309511	140.0037168	21.8232283	910.5309511	140.0037168	21.8232283
+0.258	903.2727438	138.9690768	21.6755488	903.2727438	138.9690768	21.6755488
+0.259	896.0898568	137.9446619	21.529258	896.0898568	137.9446619	21.529258
+0.26	888.9812952	136.9303404	21.3843385	888.9812952	136.9303404	21.3843385
+0.261	881.9460805	135.9259827	21.2407731	881.9460805	135.9259827	21.2407731
+0.262	874.9832499	134.9314613	21.0985449	874.9832499	134.9314613	21.0985449
+0.263	868.0918568	133.9466506	20.9576372	868.0918568	133.9466506	20.9576372
+0.264	861.2709696	132.9714274	20.8180337	861.2709696	132.9714274	20.8180337
+0.265	854.5196721	132.0056702	20.6797182	854.5196721	132.0056702	20.6797182
+0.266	847.8370627	131.0492595	20.5426748	847.8370627	131.0492595	20.5426748
+0.267	841.2222545	130.1020778	20.406888	841.2222545	130.1020778	20.406888
+0.268	834.6743747	129.1640092	20.2723424	834.6743747	129.1640092	20.2723424
+0.269	828.1925646	128.2349399	20.1390228	828.1925646	128.2349399	20.1390228
+0.27	821.775979	127.3147578	20.0069143	821.775979	127.3147578	20.0069143
+0.271	815.4237863	126.4033526	19.8760022	815.4237863	126.4033526	19.8760022
+0.272	809.135168	125.5006157	19.7462722	809.135168	125.5006157	19.7462722
+0.273	802.9093184	124.6064402	19.6177099	802.9093184	124.6064402	19.6177099
+0.274	796.7454448	123.7207207	19.4903015	796.7454448	123.7207207	19.4903015
+0.275	790.6427665	122.8433537	19.364033	790.6427665	122.8433537	19.364033
+0.276	784.6005152	121.9742372	19.2388909	784.6005152	121.9742372	19.2388909
+0.277	778.6179347	121.1132707	19.1148619	778.6179347	121.1132707	19.1148619
+0.278	772.6942802	120.2603553	18.9919327	772.6942802	120.2603553	18.9919327
+0.279	766.8288187	119.4153936	18.8700905	766.8288187	119.4153936	18.8700905
+0.28	761.0208283	118.5782897	18.7493224	761.0208283	118.5782897	18.7493224
+0.281	755.2695984	117.748949	18.6296158	755.2695984	117.748949	18.6296158
+0.282	749.5744291	116.9272787	18.5109583	749.5744291	116.9272787	18.5109583
+0.283	743.9346311	116.113187	18.3933378	743.9346311	116.113187	18.3933378
+0.284	738.3495259	115.3065837	18.2767421	738.3495259	115.3065837	18.2767421
+0.285	732.818445	114.50738	18.1611595	732.818445	114.50738	18.1611595
+0.286	727.34073	113.7154881	18.0465783	727.34073	113.7154881	18.0465783
+0.287	721.9157327	112.9308219	17.9329869	721.9157327	112.9308219	17.9329869
+0.288	716.5428142	112.1532963	17.820374	716.5428142	112.1532963	17.820374
+0.289	711.2213456	111.3828276	17.7087284	711.2213456	111.3828276	17.7087284
+0.29	705.9507071	110.6193334	17.5980392	705.9507071	110.6193334	17.5980392
+0.291	700.7302882	109.8627322	17.4882954	700.7302882	109.8627322	17.4882954
+0.292	695.5594874	109.1129441	17.3794864	695.5594874	109.1129441	17.3794864
+0.293	690.4377121	108.36989	17.2716016	690.4377121	108.36989	17.2716016
+0.294	685.3643785	107.6334922	17.1646306	685.3643785	107.6334922	17.1646306
+0.295	680.3389114	106.9036741	17.0585633	680.3389114	106.9036741	17.0585633
+0.296	675.3607438	106.18036	16.9533894	675.3607438	106.18036	16.9533894
+0.297	670.4293171	105.4634755	16.8490991	670.4293171	105.4634755	16.8490991
+0.298	665.5440809	104.7529472	16.7456826	665.5440809	104.7529472	16.7456826
+0.299	660.7044927	104.0487027	16.64313	660.7044927	104.0487027	16.64313
+0.3	655.9100179	103.3506708	16.541432	655.9100179	103.3506708	16.541432
+0.301	651.1601295	102.6587812	16.4405792	651.1601295	102.6587812	16.4405792
+0.302	646.4543081	101.9729644	16.3405621	646.4543081	101.9729644	16.3405621
+0.303	641.7920419	101.2931523	16.2413718	641.7920419	101.2931523	16.2413718
+0.304	637.1728262	100.6192774	16.1429992	637.1728262	100.6192774	16.1429992
+0.305	632.5961636	99.9512734	16.0454353	632.5961636	99.9512734	16.0454353
+0.306	628.0615636	99.2890746	15.9486715	628.0615636	99.2890746	15.9486715
+0.307	623.568543	98.6326167	15.8526991	623.568543	98.6326167	15.8526991
+0.308	619.116625	97.9818358	15.7575095	619.116625	97.9818358	15.7575095
+0.309	614.7053397	97.3366692	15.6630943	614.7053397	97.3366692	15.6630943
+0.31	610.3342239	96.697055	15.5694453	610.3342239	96.697055	15.5694453
+0.311	606.0028208	96.0629321	15.4765543	606.0028208	96.0629321	15.4765543
+0.312	601.7106798	95.4342402	15.3844132	601.7106798	95.4342402	15.3844132
+0.313	597.4573568	94.81092	15.2930139	597.4573568	94.81092	15.2930139
+0.314	593.2424137	94.1929129	15.2023488	593.2424137	94.1929129	15.2023488
+0.315	589.0654185	93.580161	15.1124099	589.0654185	93.580161	15.1124099
+0.316	584.9259453	92.9726074	15.0231897	584.9259453	92.9726074	15.0231897
+0.317	580.8235739	92.3701958	14.9346807	580.8235739	92.3701958	14.9346807
+0.318	576.7578898	91.7728708	14.8468753	576.7578898	91.7728708	14.8468753
+0.319	572.7284843	91.1805775	14.7597662	572.7284843	91.1805775	14.7597662
+0.32	568.7349543	90.593262	14.6733463	568.7349543	90.593262	14.6733463
+0.321	564.776902	90.0108711	14.5876082	564.776902	90.0108711	14.5876082
+0.322	560.8539351	89.4333521	14.5025451	560.8539351	89.4333521	14.5025451
+0.323	556.9656667	88.8606532	14.4181498	556.9656667	88.8606532	14.4181498
+0.324	553.111715	88.2927232	14.3344156	553.111715	88.2927232	14.3344156
+0.325	549.2917033	87.7295116	14.2513356	549.2917033	87.7295116	14.2513356
+0.326	545.50526	87.1709686	14.1689032	545.50526	87.1709686	14.1689032
+0.327	541.7520185	86.617045	14.0871117	541.7520185	86.617045	14.0871117
+0.328	538.031617	86.0676922	14.0059546	538.031617	86.0676922	14.0059546
+0.329	534.3436986	85.5228624	13.9254255	534.3436986	85.5228624	13.9254255
+0.33	530.6879111	84.9825082	13.845518	530.6879111	84.9825082	13.845518
+0.331	527.0639069	84.4465831	13.7662258	527.0639069	84.4465831	13.7662258
+0.332	523.4713431	83.9150409	13.6875428	523.4713431	83.9150409	13.6875428
+0.333	519.9098812	83.3878361	13.6094628	519.9098812	83.3878361	13.6094628
+0.334	516.3791872	82.864924	13.5319798	516.3791872	82.864924	13.5319798
+0.335	512.8789315	82.3462602	13.4550878	512.8789315	82.3462602	13.4550878
+0.336	509.4087887	81.831801	13.3787809	509.4087887	81.831801	13.3787809
+0.337	505.9684377	81.3215032	13.3030534	505.9684377	81.3215032	13.3030534
+0.338	502.5575616	80.8153242	13.2278994	502.5575616	80.8153242	13.2278994
+0.339	499.1758475	80.3132218	13.1533134	499.1758475	80.3132218	13.1533134
+0.34	495.8229866	79.8151546	13.0792897	495.8229866	79.8151546	13.0792897
+0.341	492.4986741	79.3210816	13.0058227	492.4986741	79.3210816	13.0058227
+0.342	489.2026091	78.8309622	12.9329071	489.2026091	78.8309622	12.9329071
+0.343	485.9344945	78.3447564	12.8605374	485.9344945	78.3447564	12.8605374
+0.344	482.6940372	77.8624247	12.7887084	482.6940372	77.8624247	12.7887084
+0.345	479.4809474	77.3839282	12.7174147	479.4809474	77.3839282	12.7174147
+0.346	476.2949395	76.9092283	12.6466511	476.2949395	76.9092283	12.6466511
+0.347	473.1357312	76.438287	12.5764126	473.1357312	76.438287	12.5764126
+0.348	470.0030438	75.9710667	12.5066941	470.0030438	75.9710667	12.5066941
+0.349	466.8966023	75.5075304	12.4374905	466.8966023	75.5075304	12.4374905
+0.35	463.8161349	75.0476413	12.3687969	463.8161349	75.0476413	12.3687969
+0.351	460.7613734	74.5913633	12.3006084	460.7613734	74.5913633	12.3006084
+0.352	457.7320529	74.1386607	12.2329203	457.7320529	74.1386607	12.2329203
+0.353	454.7279118	73.6894981	12.1657276	454.7279118	73.6894981	12.1657276
+0.354	451.7486918	73.2438407	12.0990258	451.7486918	73.2438407	12.0990258
+0.355	448.7941378	72.801654	12.0328101	448.7941378	72.801654	12.0328101
+0.356	445.8639978	72.362904	11.9670761	445.8639978	72.362904	11.9670761
+0.357	442.958023	71.927557	11.901819	442.958023	71.927557	11.901819
+0.358	440.0759676	71.4955799	11.8370344	440.0759676	71.4955799	11.8370344
+0.359	437.2175888	71.0669398	11.772718	437.2175888	71.0669398	11.772718
+0.36	434.382647	70.6416043	11.7088653	434.382647	70.6416043	11.7088653
+0.361	431.5709052	70.2195415	11.6454719	431.5709052	70.2195415	11.6454719
+0.362	428.7821296	69.8007195	11.5825337	428.7821296	69.8007195	11.5825337
+0.363	426.0160891	69.3851072	11.5200463	426.0160891	69.3851072	11.5200463
+0.364	423.2725553	68.9726737	11.4580056	423.2725553	68.9726737	11.4580056
+0.365	420.5513029	68.5633884	11.3964074	420.5513029	68.5633884	11.3964074
+0.366	417.852109	68.1572211	11.3352478	417.852109	68.1572211	11.3352478
+0.367	415.1747536	67.7541421	11.2745225	415.1747536	67.7541421	11.2745225
+0.368	412.5190192	67.3541219	11.2142277	412.5190192	67.3541219	11.2142277
+0.369	409.884691	66.9571314	11.1543594	409.884691	66.9571314	11.1543594
+0.37	407.2715568	66.5631417	11.0949137	407.2715568	66.5631417	11.0949137
+0.371	404.6794069	66.1721246	11.0358867	404.6794069	66.1721246	11.0358867
+0.372	402.1080341	65.7840517	10.9772747	402.1080341	65.7840517	10.9772747
+0.373	399.5572337	65.3988954	10.9190739	399.5572337	65.3988954	10.9190739
+0.374	397.0268033	65.0166283	10.8612804	397.0268033	65.0166283	10.8612804
+0.375	394.5165432	64.6372231	10.8038908	394.5165432	64.6372231	10.8038908
+0.376	392.0262556	64.260653	10.7469013	392.0262556	64.260653	10.7469013
+0.377	389.5557456	63.8868916	10.6903083	389.5557456	63.8868916	10.6903083
+0.378	387.10482	63.5159126	10.6341083	387.10482	63.5159126	10.6341083
+0.379	384.6732884	63.1476901	10.5782978	384.6732884	63.1476901	10.5782978
+0.38	382.2609623	62.7821985	10.5228732	382.2609623	62.7821985	10.5228732
+0.381	379.8676555	62.4194124	10.4678312	379.8676555	62.4194124	10.4678312
+0.382	377.493184	62.0593069	10.4131683	377.493184	62.0593069	10.4131683
+0.383	375.1373659	61.7018572	10.3588812	375.1373659	61.7018572	10.3588812
+0.384	372.8000214	61.3470387	10.3049666	372.8000214	61.3470387	10.3049666
+0.385	370.4809729	60.9948274	10.2514211	370.4809729	60.9948274	10.2514211
+0.386	368.1800447	60.6451993	10.1982415	368.1800447	60.6451993	10.1982415
+0.387	365.8970633	60.2981307	10.1454247	365.8970633	60.2981307	10.1454247
+0.388	363.631857	59.9535983	10.0929674	363.631857	59.9535983	10.0929674
+0.389	361.3842561	59.611579	10.0408664	361.3842561	59.611579	10.0408664
+0.39	359.1540931	59.2720498	9.9891187	359.1540931	59.2720498	9.9891187
+0.391	356.9412021	58.9349881	9.9377213	356.9412021	58.9349881	9.9377213
+0.392	354.7454193	58.6003717	9.8866709	354.7454193	58.6003717	9.8866709
+0.393	352.5665826	58.2681784	9.8359647	352.5665826	58.2681784	9.8359647
+0.394	350.4045319	57.9383862	9.7855997	350.4045319	57.9383862	9.7855997
+0.395	348.2591089	57.6109737	9.7355729	348.2591089	57.6109737	9.7355729
+0.396	346.1301569	57.2859194	9.6858814	346.1301569	57.2859194	9.6858814
+0.397	344.0175213	56.9632022	9.6365223	344.0175213	56.9632022	9.6365223
+0.398	341.9210489	56.6428011	9.5874928	341.9210489	56.6428011	9.5874928
+0.399	339.8405885	56.3246954	9.5387901	339.8405885	56.3246954	9.5387901
+0.4	337.7759903	56.0088648	9.4904113	337.7759903	56.0088648	9.4904113
+0.401	335.7271066	55.6952889	9.4423537	335.7271066	55.6952889	9.4423537
+0.402	333.6937909	55.3839477	9.3946146	333.6937909	55.3839477	9.3946146
+0.403	331.6758987	55.0748214	9.3471913	331.6758987	55.0748214	9.3471913
+0.404	329.6732868	54.7678904	9.3000811	329.6732868	54.7678904	9.3000811
+0.405	327.6858138	54.4631355	9.2532814	327.6858138	54.4631355	9.2532814
+0.406	325.7133399	54.1605373	9.2067894	325.7133399	54.1605373	9.2067894
+0.407	323.7557266	53.8600769	9.1606028	323.7557266	53.8600769	9.1606028
+0.408	321.8128372	53.5617355	9.1147188	321.8128372	53.5617355	9.1147188
+0.409	319.8845364	53.2654947	9.069135	319.8845364	53.2654947	9.069135
+0.41	317.9706903	52.971336	9.0238489	317.9706903	52.971336	9.0238489
+0.411	316.0711666	52.6792413	8.9788579	316.0711666	52.6792413	8.9788579
+0.412	314.1858343	52.3891926	8.9341596	314.1858343	52.3891926	8.9341596
+0.413	312.3145641	52.1011721	8.8897516	312.3145641	52.1011721	8.8897516
+0.414	310.4572279	51.8151622	8.8456315	310.4572279	51.8151622	8.8456315
+0.415	308.6136989	51.5311456	8.8017969	308.6136989	51.5311456	8.8017969
+0.416	306.783852	51.2491049	8.7582454	306.783852	51.2491049	8.7582454
+0.417	304.9675631	50.9690232	8.7149747	304.9675631	50.9690232	8.7149747
+0.418	303.1647097	50.6908836	8.6719826	303.1647097	50.6908836	8.6719826
+0.419	301.3751706	50.4146694	8.6292666	301.3751706	50.4146694	8.6292666
+0.42	299.5988257	50.1403641	8.5868246	299.5988257	50.1403641	8.5868246
+0.421	297.8355565	49.8679514	8.5446543	297.8355565	49.8679514	8.5446543
+0.422	296.0852455	49.597415	8.5027536	296.0852455	49.597415	8.5027536
+0.423	294.3477765	49.328739	8.4611201	294.3477765	49.328739	8.4611201
+0.424	292.6230348	49.0619075	8.4197518	292.6230348	49.0619075	8.4197518
+0.425	290.9109067	48.7969049	8.3786464	290.9109067	48.7969049	8.3786464
+0.426	289.2112796	48.5337157	8.337802	289.2112796	48.5337157	8.337802
+0.427	287.5240424	48.2723245	8.2972163	287.5240424	48.2723245	8.2972163
+0.428	285.8490849	48.0127161	8.2568873	285.8490849	48.0127161	8.2568873
+0.429	284.1862984	47.7548755	8.2168129	284.1862984	47.7548755	8.2168129
+0.43	282.5355749	47.4987878	8.176991	282.5355749	47.4987878	8.176991
+0.431	280.8968079	47.2444382	8.1374197	280.8968079	47.2444382	8.1374197
+0.432	279.2698919	46.9918123	8.098097	279.2698919	46.9918123	8.098097
+0.433	277.6547226	46.7408954	8.0590208	277.6547226	46.7408954	8.0590208
+0.434	276.0511966	46.4916735	8.0201893	276.0511966	46.4916735	8.0201893
+0.435	274.4592117	46.2441323	7.9816004	274.4592117	46.2441323	7.9816004
+0.436	272.8786668	45.9982578	7.9432522	272.8786668	45.9982578	7.9432522
+0.437	271.3094618	45.7540362	7.9051429	271.3094618	45.7540362	7.9051429
+0.438	269.7514977	45.5114537	7.8672705	269.7514977	45.5114537	7.8672705
+0.439	268.2046764	45.2704969	7.8296332	268.2046764	45.2704969	7.8296332
+0.44	266.668901	45.0311521	7.7922291	266.668901	45.0311521	7.7922291
+0.441	265.1440755	44.7934062	7.7550565	265.1440755	44.7934062	7.7550565
+0.442	263.6301049	44.557246	7.7181134	263.6301049	44.557246	7.7181134
+0.443	262.1268953	44.3226583	7.6813981	262.1268953	44.3226583	7.6813981
+0.444	260.6343534	44.0896304	7.6449088	260.6343534	44.0896304	7.6449088
+0.445	259.1523873	43.8581493	7.6086438	259.1523873	43.8581493	7.6086438
+0.446	257.6809058	43.6282025	7.5726013	257.6809058	43.6282025	7.5726013
+0.447	256.2198188	43.3997775	7.5367796	256.2198188	43.3997775	7.5367796
+0.448	254.7690369	43.1728617	7.5011769	254.7690369	43.1728617	7.5011769
+0.449	253.3284718	42.947443	7.4657916	253.3284718	42.947443	7.4657916
+0.45	251.8980359	42.7235091	7.430622	251.8980359	42.7235091	7.430622
+0.451	250.4776428	42.501048	7.3956665	250.4776428	42.501048	7.3956665
+0.452	249.0672067	42.2800478	7.3609233	249.0672067	42.2800478	7.3609233
+0.453	247.6666428	42.0604967	7.3263909	247.6666428	42.0604967	7.3263909
+0.454	246.2758672	41.842383	7.2920677	246.2758672	41.842383	7.2920677
+0.455	244.8947966	41.625695	7.257952	244.8947966	41.625695	7.257952
+0.456	243.5233489	41.4104214	7.2240422	243.5233489	41.4104214	7.2240422
+0.457	242.1614425	41.1965508	7.1903369	242.1614425	41.1965508	7.1903369
+0.458	240.8089969	40.9840718	7.1568344	240.8089969	40.9840718	7.1568344
+0.459	239.4659322	40.7729735	7.1235332	239.4659322	40.7729735	7.1235332
+0.46	238.1321693	40.5632447	7.0904317	238.1321693	40.5632447	7.0904317
+0.461	236.8076301	40.3548745	7.0575286	236.8076301	40.3548745	7.0575286
+0.462	235.4922372	40.1478521	7.0248222	235.4922372	40.1478521	7.0248222
+0.463	234.1859137	39.9421668	6.992311	234.1859137	39.9421668	6.992311
+0.464	232.8885838	39.7378079	6.9599937	232.8885838	39.7378079	6.9599937
+0.465	231.6001723	39.5347651	6.9278688	231.6001723	39.5347651	6.9278688
+0.466	230.3206049	39.3330277	6.8959348	230.3206049	39.3330277	6.8959348
+0.467	229.0498078	39.1325857	6.8641902	229.0498078	39.1325857	6.8641902
+0.468	227.787708	38.9334286	6.8326338	227.787708	38.9334286	6.8326338
+0.469	226.5342334	38.7355464	6.801264	226.5342334	38.7355464	6.801264
+0.47	225.2893125	38.5389292	6.7700794	225.2893125	38.5389292	6.7700794
+0.471	224.0528743	38.3435668	6.7390788	224.0528743	38.3435668	6.7390788
+0.472	222.8248488	38.1494496	6.7082608	222.8248488	38.1494496	6.7082608
+0.473	221.6051665	37.9565678	6.6776239	221.6051665	37.9565678	6.6776239
+0.474	220.3937588	37.7649118	6.6471669	220.3937588	37.7649118	6.6471669
+0.475	219.1905574	37.5744719	6.6168884	219.1905574	37.5744719	6.6168884
+0.476	217.995495	37.3852387	6.5867871	217.995495	37.3852387	6.5867871
+0.477	216.8085049	37.1972029	6.5568617	216.8085049	37.1972029	6.5568617
+0.478	215.6295208	37.0103551	6.5271109	215.6295208	37.0103551	6.5271109
+0.479	214.4584774	36.8246861	6.4975335	214.4584774	36.8246861	6.4975335
+0.48	213.2953097	36.6401869	6.4681281	213.2953097	36.6401869	6.4681281
+0.481	212.1399536	36.4568483	6.4388935	212.1399536	36.4568483	6.4388935
+0.482	210.9923455	36.2746615	6.4098286	210.9923455	36.2746615	6.4098286
+0.483	209.8524225	36.0936176	6.3809319	209.8524225	36.0936176	6.3809319
+0.484	208.720122	35.9137077	6.3522023	208.720122	35.9137077	6.3522023
+0.485	207.5953824	35.7349232	6.3236387	207.5953824	35.7349232	6.3236387
+0.486	206.4781425	35.5572555	6.2952397	206.4781425	35.5572555	6.2952397
+0.487	205.3683417	35.3806959	6.2670043	205.3683417	35.3806959	6.2670043
+0.488	204.26592	35.2052361	6.2389312	204.26592	35.2052361	6.2389312
+0.489	203.1708179	35.0308677	6.2110192	203.1708179	35.0308677	6.2110192
+0.49	202.0829765	34.8575823	6.1832672	202.0829765	34.8575823	6.1832672
+0.491	201.0023376	34.6853717	6.1556741	201.0023376	34.6853717	6.1556741
+0.492	199.9288434	34.5142278	6.1282387	199.9288434	34.5142278	6.1282387
+0.493	198.8624366	34.3441424	6.1009599	198.8624366	34.3441424	6.1009599
+0.494	197.8030607	34.1751076	6.0738365	197.8030607	34.1751076	6.0738365
+0.495	196.7506594	34.0071154	6.0468675	196.7506594	34.0071154	6.0468675
+0.496	195.7051773	33.8401579	6.0200517	195.7051773	33.8401579	6.0200517
+0.497	194.6665591	33.6742273	5.9933881	194.6665591	33.6742273	5.9933881
+0.498	193.6347503	33.509316	5.9668756	193.6347503	33.509316	5.9668756
+0.499	192.6096969	33.3454163	5.9405131	192.6096969	33.3454163	5.9405131
+0.5	191.5913454	33.1825205	5.9142996	191.5913454	33.1825205	5.9142996
+0.501	190.5796426	33.0206211	5.888234	190.5796426	33.0206211	5.888234
+0.502	189.5745362	32.8597108	5.8623152	189.5745362	32.8597108	5.8623152
+0.503	188.5759739	32.6997821	5.8365423	188.5759739	32.6997821	5.8365423
+0.504	187.5839043	32.5408276	5.8109141	187.5839043	32.5408276	5.8109141
+0.505	186.5982763	32.3828402	5.7854298	186.5982763	32.3828402	5.7854298
+0.506	185.6190392	32.2258127	5.7600882	185.6190392	32.2258127	5.7600882
+0.507	184.646143	32.0697379	5.7348884	184.646143	32.0697379	5.7348884
+0.508	183.6795379	31.9146087	5.7098294	183.6795379	31.9146087	5.7098294
+0.509	182.7191747	31.7604183	5.6849101	182.7191747	31.7604183	5.6849101
+0.51	181.7650046	31.6071595	5.6601298	181.7650046	31.6071595	5.6601298
+0.511	180.8169795	31.4548256	5.6354873	180.8169795	31.4548256	5.6354873
+0.512	179.8750513	31.3034097	5.6109817	179.8750513	31.3034097	5.6109817
+0.513	178.9391727	31.1529051	5.586612	178.9391727	31.1529051	5.586612
+0.514	178.0092967	31.0033051	5.5623774	178.0092967	31.0033051	5.5623774
+0.515	177.0853767	30.8546031	5.5382769	177.0853767	30.8546031	5.5382769
+0.516	176.1673666	30.7067924	5.5143096	176.1673666	30.7067924	5.5143096
+0.517	175.2552207	30.5598666	5.4904745	175.2552207	30.5598666	5.4904745
+0.518	174.3488937	30.4138191	5.4667707	174.3488937	30.4138191	5.4667707
+0.519	173.4483407	30.2686436	5.4431974	173.4483407	30.2686436	5.4431974
+0.52	172.5535172	30.1243337	5.4197536	172.5535172	30.1243337	5.4197536
+0.521	171.6643793	29.9808832	5.3964385	171.6643793	29.9808832	5.3964385
+0.522	170.7808831	29.8382858	5.3732511	170.7808831	29.8382858	5.3732511
+0.523	169.9029855	29.6965352	5.3501907	169.9029855	29.6965352	5.3501907
+0.524	169.0306436	29.5556254	5.3272563	169.0306436	29.5556254	5.3272563
+0.525	168.1638148	29.4155503	5.3044471	168.1638148	29.4155503	5.3044471
+0.526	167.3024571	29.2763038	5.2817622	167.3024571	29.2763038	5.2817622
+0.527	166.4465288	29.1378801	5.2592009	166.4465288	29.1378801	5.2592009
+0.528	165.5959885	29.000273	5.2367621	165.5959885	29.000273	5.2367621
+0.529	164.7507952	28.8634769	5.2144453	164.7507952	28.8634769	5.2144453
+0.53	163.9109083	28.7274858	5.1922494	163.9109083	28.7274858	5.1922494
+0.531	163.0762876	28.5922939	5.1701737	163.0762876	28.5922939	5.1701737
+0.532	162.2468931	28.4578957	5.1482173	162.2468931	28.4578957	5.1482173
+0.533	161.4226854	28.3242853	5.1263796	161.4226854	28.3242853	5.1263796
+0.534	160.6036253	28.1914571	5.1046596	160.6036253	28.1914571	5.1046596
+0.535	159.7896739	28.0594056	5.0830567	159.7896739	28.0594056	5.0830567
+0.536	158.9807927	27.9281253	5.0615699	158.9807927	27.9281253	5.0615699
+0.537	158.1769437	27.7976106	5.0401986	158.1769437	27.7976106	5.0401986
+0.538	157.378089	27.6678562	5.018942	157.378089	27.6678562	5.018942
+0.539	156.5841911	27.5388565	4.9977992	156.5841911	27.5388565	4.9977992
+0.54	155.7952129	27.4106063	4.9767696	155.7952129	27.4106063	4.9767696
+0.541	155.0111177	27.2831003	4.9558524	155.0111177	27.2831003	4.9558524
+0.542	154.2318688	27.1563333	4.9350468	154.2318688	27.1563333	4.9350468
+0.543	153.4574302	27.0302999	4.9143522	153.4574302	27.0302999	4.9143522
+0.544	152.687766	26.904995	4.8937677	152.687766	26.904995	4.8937677
+0.545	151.9228407	26.7804136	4.8732926	151.9228407	26.7804136	4.8732926
+0.546	151.1626191	26.6565505	4.8529263	151.1626191	26.6565505	4.8529263
+0.547	150.4070662	26.5334007	4.832668	150.4070662	26.5334007	4.832668
+0.548	149.6561475	26.4109592	4.812517	149.6561475	26.4109592	4.812517
+0.549	148.9098286	26.289221	4.7924726	148.9098286	26.289221	4.7924726
+0.55	148.1680756	26.1681812	4.7725341	148.1680756	26.1681812	4.7725341
+0.551	147.4308547	26.0478349	4.7527008	147.4308547	26.0478349	4.7527008
+0.552	146.6981325	25.9281774	4.7329721	146.6981325	25.9281774	4.7329721
+0.553	145.969876	25.8092038	4.7133471	145.969876	25.8092038	4.7133471
+0.554	145.2460523	25.6909093	4.6938253	145.2460523	25.6909093	4.6938253
+0.555	144.5266287	25.5732893	4.674406	144.5266287	25.5732893	4.674406
+0.556	143.8115732	25.4563391	4.6550885	143.8115732	25.4563391	4.6550885
+0.557	143.1008536	25.340054	4.6358722	143.1008536	25.340054	4.6358722
+0.558	142.3944382	25.2244294	4.6167564	142.3944382	25.2244294	4.6167564
+0.559	141.6922957	25.1094608	4.5977404	141.6922957	25.1094608	4.5977404
+0.56	140.9943949	24.9951437	4.5788237	140.9943949	24.9951437	4.5788237
+0.561	140.3007048	24.8814735	4.5600055	140.3007048	24.8814735	4.5600055
+0.562	139.6111948	24.7684458	4.5412852	139.6111948	24.7684458	4.5412852
+0.563	138.9258345	24.6560562	4.5226623	138.9258345	24.6560562	4.5226623
+0.564	138.2445939	24.5443004	4.504136	138.2445939	24.5443004	4.504136
+0.565	137.567443	24.4331739	4.4857058	137.567443	24.4331739	4.4857058
+0.566	136.8943523	24.3226725	4.467371	136.8943523	24.3226725	4.467371
+0.567	136.2252924	24.2127919	4.4491311	136.2252924	24.2127919	4.4491311
+0.568	135.5602343	24.1035279	4.4309854	135.5602343	24.1035279	4.4309854
+0.569	134.899149	23.9948762	4.4129333	134.899149	23.9948762	4.4129333
+0.57	134.242008	23.8868327	4.3949742	134.242008	23.8868327	4.3949742
+0.571	133.5887829	23.7793933	4.3771076	133.5887829	23.7793933	4.3771076
+0.572	132.9394456	23.6725538	4.3593328	132.9394456	23.6725538	4.3593328
+0.573	132.2939681	23.5663102	4.3416493	132.2939681	23.5663102	4.3416493
+0.574	131.6523229	23.4606585	4.3240564	131.6523229	23.4606585	4.3240564
+0.575	131.0144825	23.3555946	4.3065537	131.0144825	23.3555946	4.3065537
+0.576	130.3804198	23.2511146	4.2891405	130.3804198	23.2511146	4.2891405
+0.577	129.7501078	23.1472146	4.2718163	129.7501078	23.1472146	4.2718163
+0.578	129.1235197	23.0438906	4.2545805	129.1235197	23.0438906	4.2545805
+0.579	128.500629	22.9411387	4.2374326	128.500629	22.9411387	4.2374326
+0.58	127.8814095	22.8389551	4.220372	127.8814095	22.8389551	4.220372
+0.581	127.2658351	22.7373361	4.2033981	127.2658351	22.7373361	4.2033981
+0.582	126.65388	22.6362777	4.1865105	126.65388	22.6362777	4.1865105
+0.583	126.0455185	22.5357763	4.1697085	126.0455185	22.5357763	4.1697085
+0.584	125.4407252	22.4358282	4.1529917	125.4407252	22.4358282	4.1529917
+0.585	124.8394749	22.3364296	4.1363594	124.8394749	22.3364296	4.1363594
+0.586	124.2417426	22.2375768	4.1198113	124.2417426	22.2375768	4.1198113
+0.587	123.6475035	22.1392663	4.1033467	123.6475035	22.1392663	4.1033467
+0.588	123.056733	22.0414944	4.0869651	123.056733	22.0414944	4.0869651
+0.589	122.4694068	21.9442576	4.0706661	122.4694068	21.9442576	4.0706661
+0.59	121.8855007	21.8475522	4.0544491	121.8855007	21.8475522	4.0544491
+0.591	121.3049907	21.7513748	4.0383135	121.3049907	21.7513748	4.0383135
+0.592	120.727853	21.6557219	4.022259	120.727853	21.6557219	4.022259
+0.593	120.154064	21.56059	4.0062849	120.154064	21.56059	4.0062849
+0.594	119.5836004	21.4659757	3.9903909	119.5836004	21.4659757	3.9903909
+0.595	119.016439	21.3718756	3.9745764	119.016439	21.3718756	3.9745764
+0.596	118.4525566	21.2782862	3.9588408	118.4525566	21.2782862	3.9588408
+0.597	117.8919307	21.1852042	3.9431838	117.8919307	21.1852042	3.9431838
+0.598	117.3345384	21.0926262	3.9276049	117.3345384	21.0926262	3.9276049
+0.599	116.7803573	21.0005491	3.9121035	116.7803573	21.0005491	3.9121035
+0.6	116.2293653	20.9089694	3.8966792	116.2293653	20.9089694	3.8966792
+0.601	115.6815402	20.8178839	3.8813315	115.6815402	20.8178839	3.8813315
+0.602	115.13686	20.7272894	3.86606	115.13686	20.7272894	3.86606
+0.603	114.5953032	20.6371827	3.8508642	114.5953032	20.6371827	3.8508642
+0.604	114.0568481	20.5475606	3.8357436	114.0568481	20.5475606	3.8357436
+0.605	113.5214734	20.45842	3.8206977	113.5214734	20.45842	3.8206977
+0.606	112.9891578	20.3697576	3.8057262	112.9891578	20.3697576	3.8057262
+0.607	112.4598804	20.2815705	3.7908285	112.4598804	20.2815705	3.7908285
+0.608	111.9336202	20.1938554	3.7760043	111.9336202	20.1938554	3.7760043
+0.609	111.4103567	20.1066094	3.761253	111.4103567	20.1066094	3.761253
+0.61	110.8900692	20.0198294	3.7465743	110.8900692	20.0198294	3.7465743
+0.611	110.3727375	19.9335124	3.7319676	110.3727375	19.9335124	3.7319676
+0.612	109.8583413	19.8476555	3.7174326	109.8583413	19.8476555	3.7174326
+0.613	109.3468606	19.7622555	3.7029688	109.3468606	19.7622555	3.7029688
+0.614	108.8382755	19.6773096	3.6885758	108.8382755	19.6773096	3.6885758
+0.615	108.3325663	19.592815	3.6742532	108.3325663	19.592815	3.6742532
+0.616	107.8297135	19.5087685	3.6600006	107.8297135	19.5087685	3.6600006
+0.617	107.3296977	19.4251675	3.6458174	107.3296977	19.4251675	3.6458174
+0.618	106.8324997	19.342009	3.6317034	106.8324997	19.342009	3.6317034
+0.619	106.3381002	19.2592901	3.6176581	106.3381002	19.2592901	3.6176581
+0.62	105.8464805	19.1770082	3.6036811	105.8464805	19.1770082	3.6036811
+0.621	105.3576217	19.0951603	3.5897719	105.3576217	19.0951603	3.5897719
+0.622	104.8715052	19.0137437	3.5759302	104.8715052	19.0137437	3.5759302
+0.623	104.3881125	18.9327557	3.5621556	104.3881125	18.9327557	3.5621556
+0.624	103.9074253	18.8521936	3.5484477	103.9074253	18.8521936	3.5484477
+0.625	103.4294254	18.7720545	3.534806	103.4294254	18.7720545	3.534806
+0.626	102.9540947	18.6923359	3.5212303	102.9540947	18.6923359	3.5212303
+0.627	102.4814152	18.613035	3.50772	102.4814152	18.613035	3.50772
+0.628	102.0113694	18.5341493	3.4942748	102.0113694	18.5341493	3.4942748
+0.629	101.5439394	18.455676	3.4808944	101.5439394	18.455676	3.4808944
+0.63	101.0791079	18.3776126	3.4675783	101.0791079	18.3776126	3.4675783
+0.631	100.6168575	18.2999565	3.4543261	100.6168575	18.2999565	3.4543261
+0.632	100.1571709	18.2227051	3.4411376	100.1571709	18.2227051	3.4411376
+0.633	99.7000311	18.1458558	3.4280123	99.7000311	18.1458558	3.4280123
+0.634	99.2454212	18.0694062	3.4149498	99.2454212	18.0694062	3.4149498
+0.635	98.7933242	17.9933537	3.4019497	98.7933242	17.9933537	3.4019497
+0.636	98.3437237	17.9176958	3.3890118	98.3437237	17.9176958	3.3890118
+0.637	97.8966029	17.8424301	3.3761357	97.8966029	17.8424301	3.3761357
+0.638	97.4519455	17.767554	3.3633209	97.4519455	17.767554	3.3633209
+0.639	97.0097351	17.6930652	3.3505671	97.0097351	17.6930652	3.3505671
+0.64	96.5699557	17.6189612	3.337874	96.5699557	17.6189612	3.337874
+0.641	96.1325912	17.5452397	3.3252412	96.1325912	17.5452397	3.3252412
+0.642	95.6976256	17.4718981	3.3126684	95.6976256	17.4718981	3.3126684
+0.643	95.2650431	17.3989342	3.3001551	95.2650431	17.3989342	3.3001551
+0.644	94.8348282	17.3263457	3.2877012	94.8348282	17.3263457	3.2877012
+0.645	94.4069651	17.2541301	3.2753062	94.4069651	17.2541301	3.2753062
+0.646	93.9814386	17.1822851	3.2629697	93.9814386	17.1822851	3.2629697
+0.647	93.5582333	17.1108086	3.2506915	93.5582333	17.1108086	3.2506915
+0.648	93.1373339	17.0396981	3.2384713	93.1373339	17.0396981	3.2384713
+0.649	92.7187255	16.9689514	3.2263085	92.7187255	16.9689514	3.2263085
+0.65	92.302393	16.8985663	3.2142031	92.302393	16.8985663	3.2142031
+0.651	91.8883217	16.8285405	3.2021545	91.8883217	16.8285405	3.2021545
+0.652	91.4764967	16.7588718	3.1901625	91.4764967	16.7588718	3.1901625
+0.653	91.0669035	16.689558	3.1782268	91.0669035	16.689558	3.1782268
+0.654	90.6595276	16.620597	3.166347	90.6595276	16.620597	3.166347
+0.655	90.2543545	16.5519865	3.1545229	90.2543545	16.5519865	3.1545229
+0.656	89.85137	16.4837244	3.142754	89.85137	16.4837244	3.142754
+0.657	89.4505599	16.4158086	3.1310401	89.4505599	16.4158086	3.1310401
+0.658	89.0519101	16.3482369	3.1193809	89.0519101	16.3482369	3.1193809
+0.659	88.6554066	16.2810073	3.1077761	88.6554066	16.2810073	3.1077761
+0.66	88.2610357	16.2141176	3.0962252	88.2610357	16.2141176	3.0962252
+0.661	87.8687835	16.1475658	3.0847282	87.8687835	16.1475658	3.0847282
+0.662	87.4786365	16.0813498	3.0732845	87.4786365	16.0813498	3.0732845
+0.663	87.090581	16.0154675	3.061894	87.090581	16.0154675	3.061894
+0.664	86.7046036	15.949917	3.0505563	86.7046036	15.949917	3.0505563
+0.665	86.320691	15.8846962	3.0392712	86.320691	15.8846962	3.0392712
+0.666	85.93883	15.8198031	3.0280382	85.93883	15.8198031	3.0280382
+0.667	85.5590074	15.7552357	3.0168572	85.5590074	15.7552357	3.0168572
+0.668	85.1812101	15.690992	3.0057279	85.1812101	15.690992	3.0057279
+0.669	84.8054253	15.6270702	2.9946499	84.8054253	15.6270702	2.9946499
+0.67	84.43164	15.5634682	2.983623	84.43164	15.5634682	2.983623
+0.671	84.0598416	15.500184	2.9726468	84.0598416	15.500184	2.9726468
+0.672	83.6900173	15.4372159	2.9617211	83.6900173	15.4372159	2.9617211
+0.673	83.3221547	15.3745619	2.9508456	83.3221547	15.3745619	2.9508456
+0.674	82.9562412	15.31222	2.9400201	82.9562412	15.31222	2.9400201
+0.675	82.5922645	15.2501885	2.9292442	82.5922645	15.2501885	2.9292442
+0.676	82.2302123	15.1884654	2.9185176	82.2302123	15.1884654	2.9185176
+0.677	81.8700724	15.1270489	2.9078402	81.8700724	15.1270489	2.9078402
+0.678	81.5118328	15.0659372	2.8972116	81.5118328	15.0659372	2.8972116
+0.679	81.1554813	15.0051284	2.8866315	81.1554813	15.0051284	2.8866315
+0.68	80.8010062	14.9446207	2.8760997	80.8010062	14.9446207	2.8760997
+0.681	80.4483955	14.8844124	2.8656158	80.4483955	14.8844124	2.8656158
+0.682	80.0976376	14.8245015	2.8551798	80.0976376	14.8245015	2.8551798
+0.683	79.7487207	14.7648865	2.8447912	79.7487207	14.7648865	2.8447912
+0.684	79.4016333	14.7055654	2.8344498	79.4016333	14.7055654	2.8344498
+0.685	79.0563639	14.6465366	2.8241553	79.0563639	14.6465366	2.8241553
+0.686	78.7129012	14.5877983	2.8139075	78.7129012	14.5877983	2.8139075
+0.687	78.3712338	14.5293488	2.8037062	78.3712338	14.5293488	2.8037062
+0.688	78.0313504	14.4711864	2.793551	78.0313504	14.4711864	2.793551
+0.689	77.69324	14.4133093	2.7834418	77.69324	14.4133093	2.7834418
+0.69	77.3568914	14.355716	2.7733782	77.3568914	14.355716	2.7733782
+0.691	77.0222938	14.2984046	2.76336	77.0222938	14.2984046	2.76336
+0.692	76.689436	14.2413736	2.753387	76.689436	14.2413736	2.753387
+0.693	76.3583075	14.1846212	2.7434589	76.3583075	14.1846212	2.7434589
+0.694	76.0288973	14.1281459	2.7335755	76.0288973	14.1281459	2.7335755
+0.695	75.7011949	14.0719461	2.7237365	75.7011949	14.0719461	2.7237365
+0.696	75.3751896	14.01602	2.7139416	75.3751896	14.01602	2.7139416
+0.697	75.0508709	13.9603661	2.7041907	75.0508709	13.9603661	2.7041907
+0.698	74.7282285	13.9049827	2.6944835	74.7282285	13.9049827	2.6944835
+0.699	74.4072519	13.8498684	2.6848198	74.4072519	13.8498684	2.6848198
+0.7	74.0879308	13.7950215	2.6751993	74.0879308	13.7950215	2.6751993
+0.701	73.7702551	13.7404405	2.6656217	73.7702551	13.7404405	2.6656217
+0.702	73.4542145	13.6861238	2.656087	73.4542145	13.6861238	2.656087
+0.703	73.1397992	13.6320698	2.6465947	73.1397992	13.6320698	2.6465947
+0.704	72.8269989	13.5782771	2.6371447	72.8269989	13.5782771	2.6371447
+0.705	72.5158039	13.5247441	2.6277368	72.5158039	13.5247441	2.6277368
+0.706	72.2062043	13.4714693	2.6183707	72.2062043	13.4714693	2.6183707
+0.707	71.8981904	13.4184512	2.6090463	71.8981904	13.4184512	2.6090463
+0.708	71.5917523	13.3656882	2.5997632	71.5917523	13.3656882	2.5997632
+0.709	71.2868805	13.3131791	2.5905212	71.2868805	13.3131791	2.5905212
+0.71	70.9835654	13.2609221	2.5813202	70.9835654	13.2609221	2.5813202
+0.711	70.6817975	13.208916	2.5721599	70.6817975	13.208916	2.5721599
+0.712	70.3815674	13.1571592	2.5630401	70.3815674	13.1571592	2.5630401
+0.713	70.0828658	13.1056503	2.5539606	70.0828658	13.1056503	2.5539606
+0.714	69.7856832	13.0543879	2.5449211	69.7856832	13.0543879	2.5449211
+0.715	69.4900106	13.0033705	2.5359215	69.4900106	13.0033705	2.5359215
+0.716	69.1958387	12.9525968	2.5269615	69.1958387	12.9525968	2.5269615
+0.717	68.9031585	12.9020653	2.5180409	68.9031585	12.9020653	2.5180409
+0.718	68.6119608	12.8517746	2.5091596	68.6119608	12.8517746	2.5091596
+0.719	68.3222368	12.8017234	2.5003172	68.3222368	12.8017234	2.5003172
+0.72	68.0339775	12.7519103	2.4915136	68.0339775	12.7519103	2.4915136
+0.721	67.7471742	12.702334	2.4827486	67.7471742	12.702334	2.4827486
+0.722	67.4618179	12.652993	2.474022	67.4618179	12.652993	2.474022
+0.723	67.1779001	12.603886	2.4653335	67.1779001	12.603886	2.4653335
+0.724	66.895412	12.5550117	2.456683	66.895412	12.5550117	2.456683
+0.725	66.614345	12.5063688	2.4480703	66.614345	12.5063688	2.4480703
+0.726	66.3346907	12.4579559	2.4394952	66.3346907	12.4579559	2.4394952
+0.727	66.0564406	12.4097717	2.4309574	66.0564406	12.4097717	2.4309574
+0.728	65.7795861	12.3618149	2.4224568	65.7795861	12.3618149	2.4224568
+0.729	65.5041191	12.3140843	2.4139931	65.5041191	12.3140843	2.4139931
+0.73	65.2300311	12.2665786	2.4055663	65.2300311	12.2665786	2.4055663
+0.731	64.9573141	12.2192964	2.397176	64.9573141	12.2192964	2.397176
+0.732	64.6859596	12.1722365	2.3888221	64.6859596	12.1722365	2.3888221
+0.733	64.4159598	12.1253977	2.3805044	64.4159598	12.1253977	2.3805044
+0.734	64.1473064	12.0787787	2.3722227	64.1473064	12.0787787	2.3722227
+0.735	63.8799915	12.0323782	2.3639768	63.8799915	12.0323782	2.3639768
+0.736	63.6140072	11.9861951	2.3557666	63.6140072	11.9861951	2.3557666
+0.737	63.3493455	11.940228	2.3475917	63.3493455	11.940228	2.3475917
+0.738	63.0859986	11.8944759	2.3394522	63.0859986	11.8944759	2.3394522
+0.739	62.8239587	11.8489374	2.3313477	62.8239587	11.8489374	2.3313477
+0.74	62.5632181	11.8036114	2.3232781	62.5632181	11.8036114	2.3232781
+0.741	62.303769	11.7584966	2.3152432	62.303769	11.7584966	2.3152432
+0.742	62.045604	11.713592	2.3072429	62.045604	11.713592	2.3072429
+0.743	61.7887153	11.6688963	2.2992769	61.7887153	11.6688963	2.2992769
+0.744	61.5330955	11.6244083	2.291345	61.5330955	11.6244083	2.291345
+0.745	61.2787372	11.5801269	2.2834471	61.2787372	11.5801269	2.2834471
+0.746	61.0256328	11.5360509	2.2755831	61.0256328	11.5360509	2.2755831
+0.747	60.7737751	11.4921792	2.2677526	60.7737751	11.4921792	2.2677526
+0.748	60.5231566	11.4485107	2.2599557	60.5231566	11.4485107	2.2599557
+0.749	60.2737703	11.4050442	2.252192	60.2737703	11.4050442	2.252192
+0.75	60.0256087	11.3617785	2.2444614	60.0256087	11.3617785	2.2444614
+0.751	59.7786649	11.3187127	2.2367638	59.7786649	11.3187127	2.2367638
+0.752	59.5329316	11.2758455	2.2290989	59.5329316	11.2758455	2.2290989
+0.753	59.2884018	11.2331758	2.2214666	59.2884018	11.2331758	2.2214666
+0.754	59.0450684	11.1907026	2.2138668	59.0450684	11.1907026	2.2138668
+0.755	58.8029246	11.1484248	2.2062993	58.8029246	11.1484248	2.2062993
+0.756	58.5619634	11.1063413	2.1987638	58.5619634	11.1063413	2.1987638
+0.757	58.3221779	11.064451	2.1912603	58.3221779	11.064451	2.1912603
+0.758	58.0835612	11.0227529	2.1837885	58.0835612	11.0227529	2.1837885
+0.759	57.8461067	10.9812459	2.1763483	57.8461067	10.9812459	2.1763483
+0.76	57.6098075	10.9399289	2.1689396	57.6098075	10.9399289	2.1689396
+0.761	57.374657	10.8988008	2.1615622	57.374657	10.8988008	2.1615622
+0.762	57.1406485	10.8578608	2.1542159	57.1406485	10.8578608	2.1542159
+0.763	56.9077755	10.8171077	2.1469005	56.9077755	10.8171077	2.1469005
+0.764	56.6760313	10.7765404	2.139616	56.6760313	10.7765404	2.139616
+0.765	56.4454095	10.7361581	2.1323621	56.4454095	10.7361581	2.1323621
+0.766	56.2159036	10.6959596	2.1251387	56.2159036	10.6959596	2.1251387
+0.767	55.9875072	10.655944	2.1179456	55.9875072	10.655944	2.1179456
+0.768	55.760214	10.6161102	2.1107827	55.760214	10.6161102	2.1107827
+0.769	55.5340174	10.5764573	2.1036499	55.5340174	10.5764573	2.1036499
+0.77	55.3089114	10.5369842	2.0965469	55.3089114	10.5369842	2.0965469
+0.771	55.0848896	10.4976901	2.0894736	55.0848896	10.4976901	2.0894736
+0.772	54.8619458	10.4585739	2.0824299	54.8619458	10.4585739	2.0824299
+0.773	54.6400739	10.4196346	2.0754157	54.6400739	10.4196346	2.0754157
+0.774	54.4192677	10.3808713	2.0684307	54.4192677	10.3808713	2.0684307
+0.775	54.1995211	10.3422831	2.0614748	54.1995211	10.3422831	2.0614748
+0.776	53.9808282	10.303869	2.0545479	53.9808282	10.303869	2.0545479
+0.777	53.7631829	10.265628	2.0476498	53.7631829	10.265628	2.0476498
+0.778	53.5465792	10.2275592	2.0407804	53.5465792	10.2275592	2.0407804
+0.779	53.3310112	10.1896616	2.0339396	53.3310112	10.1896616	2.0339396
+0.78	53.116473	10.1519345	2.0271272	53.116473	10.1519345	2.0271272
+0.781	52.9029589	10.1143767	2.020343	52.9029589	10.1143767	2.020343
+0.782	52.6904629	10.0769875	2.0135869	52.6904629	10.0769875	2.0135869
+0.783	52.4789794	10.0397659	2.0068588	52.4789794	10.0397659	2.0068588
+0.784	52.2685025	10.0027109	2.0001585	52.2685025	10.0027109	2.0001585
+0.785	52.0590267	9.9658218	1.9934859	52.0590267	9.9658218	1.9934859
+0.786	51.8505462	9.9290976	1.9868409	51.8505462	9.9290976	1.9868409
+0.787	51.6430554	9.8925374	1.9802232	51.6430554	9.8925374	1.9802232
+0.788	51.4365489	9.8561404	1.9736329	51.4365489	9.8561404	1.9736329
+0.789	51.2310209	9.8199057	1.9670697	51.2310209	9.8199057	1.9670697
+0.79	51.026466	9.7838323	1.9605335	51.026466	9.7838323	1.9605335
+0.791	50.8228788	9.7479195	1.9540241	50.8228788	9.7479195	1.9540241
+0.792	50.6202538	9.7121664	1.9475415	50.6202538	9.7121664	1.9475415
+0.793	50.4185857	9.6765721	1.9410855	50.4185857	9.6765721	1.9410855
+0.794	50.217869	9.6411358	1.934656	50.217869	9.6411358	1.934656
+0.795	50.0180984	9.6058567	1.9282528	50.0180984	9.6058567	1.9282528
+0.796	49.8192687	9.5707338	1.9218759	49.8192687	9.5707338	1.9218759
+0.797	49.6213746	9.5357665	1.915525	49.6213746	9.5357665	1.915525
+0.798	49.4244109	9.5009538	1.9092	49.4244109	9.5009538	1.9092
+0.799	49.2283723	9.4662949	1.9029009	49.2283723	9.4662949	1.9029009
+0.8	49.0332538	9.431789	1.8966275	49.0332538	9.431789	1.8966275
+0.801	48.8390502	9.3974354	1.8903796	48.8390502	9.3974354	1.8903796
+0.802	48.6457564	9.3632331	1.8841572	48.6457564	9.3632331	1.8841572
+0.803	48.4533674	9.3291814	1.87796	48.4533674	9.3291814	1.87796
+0.804	48.2618782	9.2952795	1.8717881	48.2618782	9.2952795	1.8717881
+0.805	48.0712838	9.2615267	1.8656413	48.0712838	9.2615267	1.8656413
+0.806	47.8815791	9.227922	1.8595194	47.8815791	9.227922	1.8595194
+0.807	47.6927594	9.1944649	1.8534223	47.6927594	9.1944649	1.8534223
+0.808	47.5048196	9.1611543	1.8473499	47.5048196	9.1611543	1.8473499
+0.809	47.317755	9.1279897	1.841302	47.317755	9.1279897	1.841302
+0.81	47.1315608	9.0949702	1.8352787	47.1315608	9.0949702	1.8352787
+0.811	46.946232	9.0620951	1.8292797	46.946232	9.0620951	1.8292797
+0.812	46.7617641	9.0293636	1.8233049	46.7617641	9.0293636	1.8233049
+0.813	46.5781521	8.9967749	1.8173541	46.5781521	8.9967749	1.8173541
+0.814	46.3953915	8.9643283	1.8114274	46.3953915	8.9643283	1.8114274
+0.815	46.2134775	8.9320232	1.8055246	46.2134775	8.9320232	1.8055246
+0.816	46.0324056	8.8998586	1.7996454	46.0324056	8.8998586	1.7996454
+0.817	45.852171	8.8678339	1.79379	45.852171	8.8678339	1.79379
+0.818	45.6727692	8.8359484	1.787958	45.6727692	8.8359484	1.787958
+0.819	45.4941957	8.8042014	1.7821494	45.4941957	8.8042014	1.7821494
+0.82	45.316446	8.772592	1.7763642	45.316446	8.772592	1.7763642
+0.821	45.1395155	8.7411197	1.7706021	45.1395155	8.7411197	1.7706021
+0.822	44.9633997	8.7097837	1.764863	44.9633997	8.7097837	1.764863
+0.823	44.7880943	8.6785832	1.759147	44.7880943	8.6785832	1.759147
+0.824	44.6135948	8.6475176	1.7534537	44.6135948	8.6475176	1.7534537
+0.825	44.4398968	8.6165862	1.7477832	44.4398968	8.6165862	1.7477832
+0.826	44.2669961	8.5857883	1.7421353	44.2669961	8.5857883	1.7421353
+0.827	44.0948882	8.5551232	1.73651	44.0948882	8.5551232	1.73651
+0.828	43.9235689	8.5245902	1.730907	43.9235689	8.5245902	1.730907
+0.829	43.7530338	8.4941886	1.7253263	43.7530338	8.4941886	1.7253263
+0.83	43.5832788	8.4639178	1.7197678	43.5832788	8.4639178	1.7197678
+0.831	43.4142997	8.4337771	1.7142314	43.4142997	8.4337771	1.7142314
+0.832	43.2460921	8.4037657	1.708717	43.2460921	8.4037657	1.708717
+0.833	43.0786521	8.3738831	1.7032245	43.0786521	8.3738831	1.7032245
+0.834	42.9119754	8.3441286	1.6977537	42.9119754	8.3441286	1.6977537
+0.835	42.7460579	8.3145015	1.6923045	42.7460579	8.3145015	1.6923045
+0.836	42.5808956	8.2850012	1.686877	42.5808956	8.2850012	1.686877
+0.837	42.4164843	8.2556269	1.6814708	42.4164843	8.2556269	1.6814708
+0.838	42.2528201	8.2263782	1.6760861	42.2528201	8.2263782	1.6760861
+0.839	42.0898989	8.1972543	1.6707225	42.0898989	8.1972543	1.6707225
+0.84	41.9277168	8.1682546	1.6653802	41.9277168	8.1682546	1.6653802
+0.841	41.7662698	8.1393784	1.6600588	41.7662698	8.1393784	1.6600588
+0.842	41.605554	8.1106252	1.6547585	41.605554	8.1106252	1.6547585
+0.843	41.4455654	8.0819943	1.6494789	41.4455654	8.0819943	1.6494789
+0.844	41.2863002	8.053485	1.6442201	41.2863002	8.053485	1.6442201
+0.845	41.1277545	8.0250968	1.638982	41.1277545	8.0250968	1.638982
+0.846	40.9699245	7.9968291	1.6337644	40.9699245	7.9968291	1.6337644
+0.847	40.8128063	7.9686812	1.6285673	40.8128063	7.9686812	1.6285673
+0.848	40.6563962	7.9406525	1.6233905	40.6563962	7.9406525	1.6233905
+0.849	40.5006904	7.9127424	1.618234	40.5006904	7.9127424	1.618234
+0.85	40.3456852	7.8849503	1.6130976	40.3456852	7.8849503	1.6130976
+0.851	40.1913769	7.8572757	1.6079813	40.1913769	7.8572757	1.6079813
+0.852	40.0377617	7.8297179	1.602885	40.0377617	7.8297179	1.602885
+0.853	39.884836	7.8022763	1.5978086	39.884836	7.8022763	1.5978086
+0.854	39.7325961	7.7749504	1.592752	39.7325961	7.7749504	1.592752
+0.855	39.5810385	7.7477396	1.587715	39.5810385	7.7477396	1.587715
+0.856	39.4301594	7.7206432	1.5826977	39.4301594	7.7206432	1.5826977
+0.857	39.2799554	7.6936608	1.5776998	39.2799554	7.6936608	1.5776998
+0.858	39.1304228	7.6667916	1.5727214	39.1304228	7.6667916	1.5727214
+0.859	38.9815582	7.6400353	1.5677623	38.9815582	7.6400353	1.5677623
+0.86	38.833358	7.6133912	1.5628224	38.833358	7.6133912	1.5628224
+0.861	38.6858187	7.5868587	1.5579017	38.6858187	7.5868587	1.5579017
+0.862	38.5389369	7.5604372	1.5530001	38.5389369	7.5604372	1.5530001
+0.863	38.3927091	7.5341263	1.5481174	38.3927091	7.5341263	1.5481174
+0.864	38.2471318	7.5079254	1.5432535	38.2471318	7.5079254	1.5432535
+0.865	38.1022017	7.4818339	1.5384085	38.1022017	7.4818339	1.5384085
+0.866	37.9579153	7.4558513	1.5335822	37.9579153	7.4558513	1.5335822
+0.867	37.8142694	7.429977	1.5287744	37.8142694	7.429977	1.5287744
+0.868	37.6712605	7.4042105	1.5239853	37.6712605	7.4042105	1.5239853
+0.869	37.5288854	7.3785512	1.5192145	37.5288854	7.3785512	1.5192145
+0.87	37.3871406	7.3529987	1.5144621	37.3871406	7.3529987	1.5144621
+0.871	37.2460231	7.3275524	1.509728	37.2460231	7.3275524	1.509728
+0.872	37.1055294	7.3022117	1.505012	37.1055294	7.3022117	1.505012
+0.873	36.9656563	7.2769761	1.5003142	36.9656563	7.2769761	1.5003142
+0.874	36.8264006	7.2518452	1.4956344	36.8264006	7.2518452	1.4956344
+0.875	36.6877592	7.2268184	1.4909725	36.6877592	7.2268184	1.4909725
+0.876	36.5497287	7.2018952	1.4863284	36.5497287	7.2018952	1.4863284
+0.877	36.4123061	7.177075	1.4817022	36.4123061	7.177075	1.4817022
+0.878	36.2754882	7.1523574	1.4770936	36.2754882	7.1523574	1.4770936
+0.879	36.1392719	7.1277418	1.4725026	36.1392719	7.1277418	1.4725026
+0.88	36.003654	7.1032278	1.4679291	36.003654	7.1032278	1.4679291
+0.881	35.8686315	7.0788149	1.4633731	35.8686315	7.0788149	1.4633731
+0.882	35.7342013	7.0545025	1.4588345	35.7342013	7.0545025	1.4588345
+0.883	35.6003603	7.0302902	1.4543131	35.6003603	7.0302902	1.4543131
+0.884	35.4671056	7.0061774	1.4498089	35.4671056	7.0061774	1.4498089
+0.885	35.334434	6.9821637	1.4453219	35.334434	6.9821637	1.4453219
+0.886	35.2023426	6.9582486	1.4408519	35.2023426	6.9582486	1.4408519
+0.887	35.0708284	6.9344316	1.4363988	35.0708284	6.9344316	1.4363988
+0.888	34.9398885	6.9107122	1.4319627	34.9398885	6.9107122	1.4319627
+0.889	34.8095199	6.88709	1.4275434	34.8095199	6.88709	1.4275434
+0.89	34.6797197	6.8635645	1.4231408	34.6797197	6.8635645	1.4231408
+0.891	34.5504849	6.8401351	1.4187548	34.5504849	6.8401351	1.4187548
+0.892	34.4218127	6.8168015	1.4143854	34.4218127	6.8168015	1.4143854
+0.893	34.2937002	6.7935632	1.4100326	34.2937002	6.7935632	1.4100326
+0.894	34.1661445	6.7704197	1.4056962	34.1661445	6.7704197	1.4056962
+0.895	34.0391429	6.7473705	1.4013761	34.0391429	6.7473705	1.4013761
+0.896	33.9126924	6.7244151	1.3970723	33.9126924	6.7244151	1.3970723
+0.897	33.7867903	6.7015532	1.3927847	33.7867903	6.7015532	1.3927847
+0.898	33.6614338	6.6787843	1.3885133	33.6614338	6.6787843	1.3885133
+0.899	33.53662	6.6561079	1.3842579	33.53662	6.6561079	1.3842579
+0.9	33.4123463	6.6335236	1.3800185	33.4123463	6.6335236	1.3800185
+0.901	33.28861	6.6110309	1.375795	33.28861	6.6110309	1.375795
+0.902	33.1654082	6.5886293	1.3715874	33.1654082	6.5886293	1.3715874
+0.903	33.0427382	6.5663185	1.3673955	33.0427382	6.5663185	1.3673955
+0.904	32.9205975	6.5440981	1.3632194	32.9205975	6.5440981	1.3632194
+0.905	32.7989833	6.5219674	1.3590588	32.7989833	6.5219674	1.3590588
+0.906	32.6778929	6.4999262	1.3549139	32.6778929	6.4999262	1.3549139
+0.907	32.5573237	6.4779741	1.3507844	32.5573237	6.4779741	1.3507844
+0.908	32.4372731	6.4561104	1.3466703	32.4372731	6.4561104	1.3466703
+0.909	32.3177384	6.434335	1.3425716	32.3177384	6.434335	1.3425716
+0.91	32.1987171	6.4126472	1.3384881	32.1987171	6.4126472	1.3384881
+0.911	32.0802066	6.3910468	1.3344199	32.0802066	6.3910468	1.3344199
+0.912	31.9622042	6.3695332	1.3303668	31.9622042	6.3695332	1.3303668
+0.913	31.8447076	6.3481061	1.3263288	31.8447076	6.3481061	1.3263288
+0.914	31.727714	6.3267651	1.3223058	31.727714	6.3267651	1.3223058
+0.915	31.6112211	6.3055096	1.3182978	31.6112211	6.3055096	1.3182978
+0.916	31.4952262	6.2843395	1.3143046	31.4952262	6.2843395	1.3143046
+0.917	31.3797269	6.2632541	1.3103262	31.3797269	6.2632541	1.3103262
+0.918	31.2647207	6.2422532	1.3063625	31.2647207	6.2422532	1.3063625
+0.919	31.1502052	6.2213362	1.3024136	31.1502052	6.2213362	1.3024136
+0.92	31.0361779	6.2005029	1.2984792	31.0361779	6.2005029	1.2984792
+0.921	30.9226363	6.1797528	1.2945594	30.9226363	6.1797528	1.2945594
+0.922	30.8095781	6.1590855	1.2906541	30.8095781	6.1590855	1.2906541
+0.923	30.6970008	6.1385007	1.2867632	30.6970008	6.1385007	1.2867632
+0.924	30.5849021	6.1179979	1.2828866	30.5849021	6.1179979	1.2828866
+0.925	30.4732795	6.0975767	1.2790243	30.4732795	6.0975767	1.2790243
+0.926	30.3621308	6.0772368	1.2751763	30.3621308	6.0772368	1.2751763
+0.927	30.2514534	6.0569777	1.2713424	30.2514534	6.0569777	1.2713424
+0.928	30.1412452	6.0367991	1.2675226	30.1412452	6.0367991	1.2675226
+0.929	30.0315037	6.0167007	1.2637168	30.0315037	6.0167007	1.2637168
+0.93	29.9222268	5.996682	1.259925	29.9222268	5.996682	1.259925
+0.931	29.813412	5.9767426	1.2561471	29.813412	5.9767426	1.2561471
+0.932	29.705057	5.9568822	1.2523831	29.705057	5.9568822	1.2523831
+0.933	29.5971597	5.9371004	1.2486328	29.5971597	5.9371004	1.2486328
+0.934	29.4897178	5.9173969	1.2448963	29.4897178	5.9173969	1.2448963
+0.935	29.382729	5.8977712	1.2411735	29.382729	5.8977712	1.2411735
+0.936	29.276191	5.878223	1.2374642	29.276191	5.878223	1.2374642
+0.937	29.1701017	5.858752	1.2337685	29.1701017	5.858752	1.2337685
+0.938	29.0644589	5.8393577	1.2300863	29.0644589	5.8393577	1.2300863
+0.939	28.9592603	5.8200398	1.2264175	28.9592603	5.8200398	1.2264175
+0.94	28.8545038	5.800798	1.2227621	28.8545038	5.800798	1.2227621
+0.941	28.7501872	5.7816319	1.21912	28.7501872	5.7816319	1.21912
+0.942	28.6463084	5.7625412	1.2154912	28.6463084	5.7625412	1.2154912
+0.943	28.5428651	5.7435254	1.2118755	28.5428651	5.7435254	1.2118755
+0.944	28.4398554	5.7245843	1.208273	28.4398554	5.7245843	1.208273
+0.945	28.337277	5.7057175	1.2046836	28.337277	5.7057175	1.2046836
+0.946	28.2351278	5.6869246	1.2011071	28.2351278	5.6869246	1.2011071
+0.947	28.1334058	5.6682053	1.1975437	28.1334058	5.6682053	1.1975437
+0.948	28.0321089	5.6495593	1.1939931	28.0321089	5.6495593	1.1939931
+0.949	27.931235	5.6309862	1.1904554	27.931235	5.6309862	1.1904554
+0.95	27.830782	5.6124856	1.1869305	27.830782	5.6124856	1.1869305
+0.951	27.7307479	5.5940574	1.1834183	27.7307479	5.5940574	1.1834183
+0.952	27.6311306	5.575701	1.1799189	27.6311306	5.575701	1.1799189
+0.953	27.5319282	5.5574162	1.176432	27.5319282	5.5574162	1.176432
+0.954	27.4331386	5.5392026	1.1729577	27.4331386	5.5392026	1.1729577
+0.955	27.3347598	5.52106	1.169496	27.3347598	5.52106	1.169496
+0.956	27.2367898	5.5029879	1.1660467	27.2367898	5.5029879	1.1660467
+0.957	27.1392266	5.4849861	1.1626098	27.1392266	5.4849861	1.1626098
+0.958	27.0420683	5.4670542	1.1591853	27.0420683	5.4670542	1.1591853
+0.959	26.945313	5.4491919	1.155773	26.945313	5.4491919	1.155773
+0.96	26.8489586	5.431399	1.1523731	26.8489586	5.431399	1.1523731
+0.961	26.7530032	5.4136749	1.1489853	26.7530032	5.4136749	1.1489853
+0.962	26.6574449	5.3960196	1.1456096	26.6574449	5.3960196	1.1456096
+0.963	26.5622819	5.3784326	1.1422461	26.5622819	5.3784326	1.1422461
+0.964	26.4675121	5.3609136	1.1388946	26.4675121	5.3609136	1.1388946
+0.965	26.3731337	5.3434623	1.1355551	26.3731337	5.3434623	1.1355551
+0.966	26.2791448	5.3260784	1.1322275	26.2791448	5.3260784	1.1322275
+0.967	26.1855436	5.3087616	1.1289118	26.1855436	5.3087616	1.1289118
+0.968	26.0923281	5.2915115	1.125608	26.0923281	5.2915115	1.125608
+0.969	25.9994966	5.274328	1.1223159	25.9994966	5.274328	1.1223159
+0.97	25.9070472	5.2572106	1.1190356	25.9070472	5.2572106	1.1190356
+0.971	25.814978	5.2401591	1.1157669	25.814978	5.2401591	1.1157669
+0.972	25.7232873	5.2231731	1.1125099	25.7232873	5.2231731	1.1125099
+0.973	25.6319732	5.2062525	1.1092644	25.6319732	5.2062525	1.1092644
+0.974	25.541034	5.1893967	1.1060305	25.541034	5.1893967	1.1060305
+0.975	25.4504678	5.1726057	1.1028081	25.4504678	5.1726057	1.1028081
+0.976	25.3602728	5.1558791	1.0995971	25.3602728	5.1558791	1.0995971
+0.977	25.2704474	5.1392165	1.0963975	25.2704474	5.1392165	1.0963975
+0.978	25.1809897	5.1226177	1.0932092	25.1809897	5.1226177	1.0932092
+0.979	25.091898	5.1060825	1.0900323	25.091898	5.1060825	1.0900323
+0.98	25.0031705	5.0896104	1.0868665	25.0031705	5.0896104	1.0868665
+0.981	24.9148055	5.0732013	1.083712	24.9148055	5.0732013	1.083712
+0.982	24.8268013	5.0568548	1.0805686	24.8268013	5.0568548	1.0805686
+0.983	24.7391563	5.0405707	1.0774363	24.7391563	5.0405707	1.0774363
+0.984	24.6518686	5.0243487	1.074315	24.6518686	5.0243487	1.074315
+0.985	24.5649365	5.0081885	1.0712048	24.5649365	5.0081885	1.0712048
+0.986	24.4783585	4.9920898	1.0681055	24.4783585	4.9920898	1.0681055
+0.987	24.3921328	4.9760523	1.0650171	24.3921328	4.9760523	1.0650171
+0.988	24.3062578	4.9600758	1.0619396	24.3062578	4.9600758	1.0619396
+0.989	24.2207318	4.94416	1.0588729	24.2207318	4.94416	1.0588729
+0.99	24.1355532	4.9283046	1.0558169	24.1355532	4.9283046	1.0558169
+0.991	24.0507203	4.9125093	1.0527717	24.0507203	4.9125093	1.0527717
+0.992	23.9662314	4.896774	1.0497372	23.9662314	4.896774	1.0497372
+0.993	23.8820851	4.8810982	1.0467134	23.8820851	4.8810982	1.0467134
+0.994	23.7982796	4.8654818	1.0437001	23.7982796	4.8654818	1.0437001
+0.995	23.7148134	4.8499244	1.0406973	23.7148134	4.8499244	1.0406973
+0.996	23.6316848	4.8344259	1.0377051	23.6316848	4.8344259	1.0377051
+0.997	23.5488924	4.8189859	1.0347233	23.5488924	4.8189859	1.0347233
+0.998	23.4664344	4.8036042	1.0317519	23.4664344	4.8036042	1.0317519
+0.999	23.3843094	4.7882805	1.0287909	23.3843094	4.7882805	1.0287909
+1.0	23.3025158	4.7730145	1.0258403	23.3025158	4.7730145	1.0258403
diff --git a/source/tests/pd/model/water/lkf.json b/source/tests/pd/model/water/lkf.json
new file mode 100644
index 0000000000..377679c7ee
--- /dev/null
+++ b/source/tests/pd/model/water/lkf.json
@@ -0,0 +1,79 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        25,
+        25
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 3,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "opt_type": "LKF",
+    "kf_blocksize": 1024,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/source/tests/pd/model/water/multitask.json b/source/tests/pd/model/water/multitask.json
new file mode 100644
index 0000000000..83524a8b77
--- /dev/null
+++ b/source/tests/pd/model/water/multitask.json
@@ -0,0 +1,140 @@
+{
+  "model": {
+    "shared_dict": {
+      "my_type_map": [
+        "O",
+        "H",
+        "B"
+      ],
+      "my_descriptor": {
+        "type": "se_e2_a",
+        "sel": [
+          46,
+          92
+        ],
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "_comment": "that's all"
+    },
+    "model_dict": {
+      "model_1": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        },
+        "data_stat_nbatch": 1
+      },
+      "model_2": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        },
+        "data_stat_nbatch": 1
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "decay_rate": 0.98,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "model_1": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    },
+    "model_2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
+  "training": {
+    "model_prob": {
+      "model_1": 0.5,
+      "model_2": 0.5
+    },
+    "data_dict": {
+      "model_1": {
+        "stat_file": "./stat_files/model_1.hdf5",
+        "training_data": {
+          "systems": [
+            "pd/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pd/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      },
+      "model_2": {
+        "stat_file": "./stat_files/model_2.hdf5",
+        "training_data": {
+          "systems": [
+            "pd/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pd/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      }
+    },
+    "numb_steps": 100000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 100,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pd/model/water/se_atten.json b/source/tests/pd/model/water/se_atten.json
new file mode 100644
index 0000000000..70abf6759c
--- /dev/null
+++ b/source/tests/pd/model/water/se_atten.json
@@ -0,0 +1,83 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 40,
+      "rcut_smth": 0.5,
+      "rcut": 4.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "attn": 64,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false,
+      "activation_function": "tanh",
+      "scaling_factor": 1.0,
+      "normalize": false,
+      "temperature": 1.0,
+      "seed": 1
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model",
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pd/model/water/se_e2_a.json b/source/tests/pd/model/water/se_e2_a.json
new file mode 100644
index 0000000000..96f51ba5aa
--- /dev/null
+++ b/source/tests/pd/model/water/se_e2_a.json
@@ -0,0 +1,77 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pd/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 100000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 10000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/source/tests/pd/model/water/zbl.json b/source/tests/pd/model/water/zbl.json
new file mode 100644
index 0000000000..cb5602d92d
--- /dev/null
+++ b/source/tests/pd/model/water/zbl.json
@@ -0,0 +1,92 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "use_srtab": "H2O_tab_potential.txt",
+    "smin_alpha": 0.1,
+    "sw_rmin": 0.8,
+    "sw_rmax": 1.0,
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment5": "that's all"
+  },
+
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment6": " that's all"
+  },
+
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment7": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment8": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment9": "that's all"
+  },
+
+  "_comment10": "that's all"
+}
diff --git a/source/tests/pd/property/input.json b/source/tests/pd/property/input.json
new file mode 100644
index 0000000000..4e005f8277
--- /dev/null
+++ b/source/tests/pd/property/input.json
@@ -0,0 +1,77 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "H",
+      "C",
+      "N",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        90
+      ],
+      "rcut_smth": 1.8,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "precision": "float64",
+      "seed": 1
+    },
+    "fitting_net": {
+      "type": "property",
+      "intensive": true,
+      "task_dim": 3,
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "property",
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pt/property/single"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pt/property/single"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 2000,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pd/property/single/nopbc b/source/tests/pd/property/single/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pd/property/single/set.000000/coord.npy b/source/tests/pd/property/single/set.000000/coord.npy
new file mode 100644
index 0000000000..201ec9707f
Binary files /dev/null and b/source/tests/pd/property/single/set.000000/coord.npy differ
diff --git a/source/tests/pd/property/single/set.000000/property.npy b/source/tests/pd/property/single/set.000000/property.npy
new file mode 100644
index 0000000000..e5870a9f5b
Binary files /dev/null and b/source/tests/pd/property/single/set.000000/property.npy differ
diff --git a/source/tests/pd/property/single/set.000000/real_atom_types.npy b/source/tests/pd/property/single/set.000000/real_atom_types.npy
new file mode 100644
index 0000000000..256dbe7122
Binary files /dev/null and b/source/tests/pd/property/single/set.000000/real_atom_types.npy differ
diff --git a/source/tests/pd/property/single/type.raw b/source/tests/pd/property/single/type.raw
new file mode 100644
index 0000000000..d677b495ec
--- /dev/null
+++ b/source/tests/pd/property/single/type.raw
@@ -0,0 +1,20 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pd/property/single/type_map.raw b/source/tests/pd/property/single/type_map.raw
new file mode 100644
index 0000000000..c8a39f3a9e
--- /dev/null
+++ b/source/tests/pd/property/single/type_map.raw
@@ -0,0 +1,4 @@
+H
+C
+N
+O
diff --git a/source/tests/pd/requirements.txt b/source/tests/pd/requirements.txt
new file mode 100644
index 0000000000..74abad719e
--- /dev/null
+++ b/source/tests/pd/requirements.txt
@@ -0,0 +1,6 @@
+tensorflow>=2.14.0
+deepmd-kit>=2.2.7
+dpdata
+ase
+coverage
+pytest
diff --git a/source/tests/pd/test_LKF.py b/source/tests/pd/test_LKF.py
new file mode 100644
index 0000000000..81f69041da
--- /dev/null
+++ b/source/tests/pd/test_LKF.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.entrypoints.main import (
+    main,
+)
+
+
+class TestLKF(unittest.TestCase):
+    def test_lkf(self):
+        with open(str(Path(__file__).parent / "water/lkf.json")) as fin:
+            content = fin.read()
+        self.config = json.loads(content)
+        self.config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/data_0")
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/data_0")
+        ]
+        self.input_json = "test_lkf.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+        main(["train", self.input_json])
+
+    def tearDown(self):
+        os.remove(self.input_json)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_auto_batch_size.py b/source/tests/pd/test_auto_batch_size.py
new file mode 100644
index 0000000000..1033f46d07
--- /dev/null
+++ b/source/tests/pd/test_auto_batch_size.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.pd.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+
+
+class TestAutoBatchSize(unittest.TestCase):
+    def test_execute_all(self):
+        dd0 = np.zeros((10000, 2, 1, 3, 4))
+        dd1 = np.ones((10000, 2, 1, 3, 4))
+        auto_batch_size = AutoBatchSize(256, 2.0)
+
+        def func(dd1):
+            return np.zeros_like(dd1), np.ones_like(dd1)
+
+        dd2 = auto_batch_size.execute_all(func, 10000, 2, dd1)
+        np.testing.assert_equal(dd0, dd2[0])
+        np.testing.assert_equal(dd1, dd2[1])
+
+    def test_execute_all_dict(self):
+        dd0 = np.zeros((10000, 2, 1, 3, 4))
+        dd1 = np.ones((10000, 2, 1, 3, 4))
+        auto_batch_size = AutoBatchSize(256, 2.0)
+
+        def func(dd1):
+            return {
+                "foo": np.zeros_like(dd1),
+                "bar": np.ones_like(dd1),
+            }
+
+        dd2 = auto_batch_size.execute_all(func, 10000, 2, dd1)
+        np.testing.assert_equal(dd0, dd2["foo"])
+        np.testing.assert_equal(dd1, dd2["bar"])
diff --git a/source/tests/pd/test_calculator.py b/source/tests/pd/test_calculator.py
new file mode 100644
index 0000000000..5242b92b8e
--- /dev/null
+++ b/source/tests/pd/test_calculator.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.utils.ase_calc import (
+    DPCalculator,
+)
+
+from ..seed import (
+    GLOBAL_SEED,
+)
+
+dtype = paddle.float64
+
+paddle.framework.core.set_prim_eager_enabled(True)
+paddle.framework.core._set_prim_all_enabled(True)
+
+
+class TestCalculator(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        paddle.set_device(device)
+        _, _, more_loss = trainer.wrapper(**input_dict, label=label_dict, cur_lr=1.0)
+
+        self.calculator = DPCalculator("model.pd")
+
+    def test_calculator(self):
+        from ase import (
+            Atoms,
+        )
+
+        natoms = 5
+        cell = paddle.eye(3, dtype=dtype).to(device="cpu") * 10
+        paddle.seed(GLOBAL_SEED)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device="cpu")
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1])
+        atomic_numbers = [1, 1, 1, 8, 8]
+        idx_perm = [1, 0, 4, 3, 2]
+
+        prec = 1e-10
+        low_prec = 1e-4
+
+        ase_atoms0 = Atoms(
+            numbers=atomic_numbers,
+            positions=coord,
+            # positions=[tuple(item) for item in coordinate],
+            cell=cell,
+            calculator=self.calculator,
+            pbc=True,
+        )
+        e0, f0 = ase_atoms0.get_potential_energy(), ase_atoms0.get_forces()
+        s0, v0 = (
+            ase_atoms0.get_stress(voigt=True),
+            -ase_atoms0.get_stress(voigt=False) * ase_atoms0.get_volume(),
+        )
+
+        ase_atoms1 = Atoms(
+            numbers=[atomic_numbers[i] for i in idx_perm],
+            positions=coord[idx_perm, :],
+            # positions=[tuple(item) for item in coordinate],
+            cell=cell,
+            calculator=self.calculator,
+            pbc=True,
+        )
+        e1, f1 = ase_atoms1.get_potential_energy(), ase_atoms1.get_forces()
+        s1, v1 = (
+            ase_atoms1.get_stress(voigt=True),
+            -ase_atoms1.get_stress(voigt=False) * ase_atoms1.get_volume(),
+        )
+
+        assert isinstance(e0, float)
+        assert f0.shape == (natoms, 3)
+        assert v0.shape == (3, 3)
+        np.testing.assert_allclose(e0, e1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(f0[idx_perm, :], f1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(s0, s1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(v0, v1, rtol=low_prec, atol=prec)
diff --git a/source/tests/pd/test_change_bias.py b/source/tests/pd/test_change_bias.py
new file mode 100644
index 0000000000..2d87b739ff
--- /dev/null
+++ b/source/tests/pd/test_change_bias.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.train.training import (
+    get_model_for_wrapper,
+    model_change_out_bias,
+)
+from deepmd.pd.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pd.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pd.utils.utils import (
+    to_paddle_tensor,
+)
+
+from .common import (
+    run_dp,
+)
+from .model.test_permutation import (
+    model_se_e2_a,
+)
+from .test_finetune import (
+    energy_data_requirement,
+)
+
+current_path = os.getcwd()
+
+
+class TestChangeBias(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        model_name = "change-bias-model.ckpt"
+        self.data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.config["training"]["save_ckpt"] = model_name
+        self.trainer = get_trainer(deepcopy(self.config))
+        self.trainer.run()
+        self.state_dict_trained = self.trainer.wrapper.model.state_dict()
+        data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.config["model"]["type_map"],
+        )
+        data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            data.systems,
+            data.dataloaders,
+            nbatches=1,
+        )
+        self.model_path = Path(current_path) / (model_name + ".pd")
+        self.model_path_data_bias = Path(current_path) / (
+            model_name + "data_bias" + ".pd"
+        )
+        self.model_path_data_file_bias = Path(current_path) / (
+            model_name + "data_file_bias" + ".pd"
+        )
+        self.model_path_user_bias = Path(current_path) / (
+            model_name + "user_bias" + ".pd"
+        )
+
+    def test_change_bias_with_data(self):
+        run_dp(
+            f"dp --pd change-bias {self.model_path!s} -s {self.data_file[0]} -o {self.model_path_data_bias!s}"
+        )
+        state_dict = paddle.load(str(self.model_path_data_bias))
+        model_params = state_dict["model"]["_extra_state"]["model_params"]
+        model_for_wrapper = get_model_for_wrapper(model_params)
+        wrapper = ModelWrapper(model_for_wrapper)
+        wrapper.set_state_dict(state_dict["model"])
+        updated_bias = wrapper.model["Default"].get_out_bias()
+        expected_model = model_change_out_bias(
+            self.trainer.wrapper.model["Default"],
+            self.sampled,
+            _bias_adjust_mode="change-by-statistic",
+        )
+        expected_bias = expected_model.get_out_bias()
+        np.testing.assert_allclose(updated_bias.numpy(), expected_bias.numpy())
+
+    def test_change_bias_with_data_sys_file(self):
+        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
+        with open(tmp_file.name, "w") as f:
+            f.writelines([sys + "\n" for sys in self.data_file])
+        run_dp(
+            f"dp --pd change-bias {self.model_path!s} -f {tmp_file.name} -o {self.model_path_data_file_bias!s}"
+        )
+        state_dict = paddle.load(str(self.model_path_data_file_bias))
+        model_params = state_dict["model"]["_extra_state"]["model_params"]
+        model_for_wrapper = get_model_for_wrapper(model_params)
+        wrapper = ModelWrapper(model_for_wrapper)
+        wrapper.set_state_dict(state_dict["model"])
+        updated_bias = wrapper.model["Default"].get_out_bias()
+        expected_model = model_change_out_bias(
+            self.trainer.wrapper.model["Default"],
+            self.sampled,
+            _bias_adjust_mode="change-by-statistic",
+        )
+        expected_bias = expected_model.get_out_bias()
+        np.testing.assert_allclose(updated_bias.numpy(), expected_bias.numpy())
+
+    def test_change_bias_with_user_defined(self):
+        user_bias = [0.1, 3.2, -0.5]
+        run_dp(
+            f"dp --pd change-bias {self.model_path!s} -b {' '.join([str(_) for _ in user_bias])} -o {self.model_path_user_bias!s}"
+        )
+        state_dict = paddle.load(str(self.model_path_user_bias))
+        model_params = state_dict["model"]["_extra_state"]["model_params"]
+        model_for_wrapper = get_model_for_wrapper(model_params)
+        wrapper = ModelWrapper(model_for_wrapper)
+        wrapper.set_state_dict(state_dict["model"])
+        updated_bias = wrapper.model["Default"].get_out_bias()
+        expected_bias = to_paddle_tensor(np.array(user_bias)).reshape(
+            updated_bias.shape
+        )
+        np.testing.assert_allclose(updated_bias.numpy(), expected_bias.numpy())
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("change-bias-model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
diff --git a/source/tests/pd/test_dp_show.py b/source/tests/pd/test_dp_show.py
new file mode 100644
index 0000000000..6efbec7f52
--- /dev/null
+++ b/source/tests/pd/test_dp_show.py
@@ -0,0 +1,231 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import io
+import json
+import os
+import shutil
+import unittest
+from contextlib import (
+    redirect_stderr,
+)
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.utils.multi_task import (
+    preprocess_shared_params,
+)
+
+from .common import (
+    run_dp,
+)
+from .model.test_permutation import (
+    model_se_e2_a,
+)
+
+
+class TestSingleTaskModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        os.environ["FLAGS_prim_enable_dynamic"] = "1"
+        os.environ["FLAGS_enable_pir_api"] = "1"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["type_map"] = ["O", "H", "Au"]
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        run_dp("dp --pd freeze")
+
+    @unittest.skip(
+        "Paddle do not support dp --pd show frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_checkpoint(self):
+        INPUT = "model.pd"
+        ATTRIBUTES = "type-map descriptor fitting-net"
+        with redirect_stderr(io.StringIO()) as f:
+            run_dp(f"dp --pd show {INPUT} {ATTRIBUTES}")
+        results = f.getvalue().split("\n")[:-1]
+        assert "This is a singletask model" in results[-4]
+        assert "The type_map is ['O', 'H', 'Au']" in results[-3]
+        assert (
+            "{'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut': 4.0"
+        ) in results[-2]
+        assert (
+            "The fitting_net parameter is {'neuron': [24, 24, 24], 'resnet_dt': True, 'seed': 1}"
+            in results[-1]
+        )
+
+    @unittest.skip(
+        "Paddle do not support dp --pd show frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_frozen_model(self):
+        INPUT = "frozen_model.pd"
+        ATTRIBUTES = "type-map descriptor fitting-net"
+        with redirect_stderr(io.StringIO()) as f:
+            run_dp(f"dp --pd show {INPUT} {ATTRIBUTES}")
+        results = f.getvalue().split("\n")[:-1]
+        assert "This is a singletask model" in results[-4]
+        assert "The type_map is ['O', 'H', 'Au']" in results[-3]
+        assert (
+            "{'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut': 4.0"
+        ) in results[-2]
+        assert (
+            "The fitting_net parameter is {'neuron': [24, 24, 24], 'resnet_dt': True, 'seed': 1}"
+            in results[-1]
+        )
+
+    @unittest.skip(
+        "Paddle do not support dp --pd show frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_checkpoint_error(self):
+        INPUT = "model.pd"
+        ATTRIBUTES = "model-branch type-map descriptor fitting-net"
+        with self.assertRaisesRegex(
+            RuntimeError, "The 'model-branch' option requires a multitask model"
+        ):
+            run_dp(f"dp --pd show {INPUT} {ATTRIBUTES}")
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith("pd"):
+                os.remove(f)
+            if f in ["lcurve.out", "frozen_model.pd", "output.txt", "checkpoint"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestMultiTaskModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/multitask.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "se_e2_a"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["model"]["model_dict"]["model_1"]["fitting_net"] = {
+            "neuron": [1, 2, 3],
+            "seed": 678,
+        }
+        self.config["model"]["model_dict"]["model_2"]["fitting_net"] = {
+            "neuron": [9, 8, 7],
+            "seed": 1111,
+        }
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
+        trainer.run()
+        run_dp("dp --pd freeze --head model_1")
+
+    @unittest.skip(
+        "Paddle do not support dp --pd show frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_checkpoint(self):
+        INPUT = "model.ckpt.pd"
+        ATTRIBUTES = "model-branch type-map descriptor fitting-net"
+        with redirect_stderr(io.StringIO()) as f:
+            run_dp(f"dp --pd show {INPUT} {ATTRIBUTES}")
+        results = f.getvalue().split("\n")[:-1]
+        assert "This is a multitask model" in results[-8]
+        assert (
+            "Available model branches are ['model_1', 'model_2', 'RANDOM'], "
+            "where 'RANDOM' means using a randomly initialized fitting net."
+            in results[-7]
+        )
+        assert "The type_map of branch model_1 is ['O', 'H', 'B']" in results[-6]
+        assert "The type_map of branch model_2 is ['O', 'H', 'B']" in results[-5]
+        assert (
+            "model_1"
+            and "'type': 'se_e2_a'"
+            and "'sel': [46, 92, 4]"
+            and "'rcut_smth': 0.5"
+        ) in results[-4]
+        assert (
+            "model_2"
+            and "'type': 'se_e2_a'"
+            and "'sel': [46, 92, 4]"
+            and "'rcut_smth': 0.5"
+        ) in results[-3]
+        assert (
+            "The fitting_net parameter of branch model_1 is {'neuron': [1, 2, 3], 'seed': 678}"
+            in results[-2]
+        )
+        assert (
+            "The fitting_net parameter of branch model_2 is {'neuron': [9, 8, 7], 'seed': 1111}"
+            in results[-1]
+        )
+
+    @unittest.skip(
+        "Paddle do not support dp --pd show frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_frozen_model(self):
+        INPUT = "frozen_model"
+        ATTRIBUTES = "type-map descriptor fitting-net"
+        with redirect_stderr(io.StringIO()) as f:
+            run_dp(f"dp --pd show {INPUT} {ATTRIBUTES}")
+        results = f.getvalue().split("\n")[:-1]
+        assert "This is a singletask model" in results[-4]
+        assert "The type_map is ['O', 'H', 'B']" in results[-3]
+        assert (
+            "'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut_smth': 0.5"
+        ) in results[-2]
+        assert (
+            "The fitting_net parameter is {'neuron': [1, 2, 3], 'seed': 678}"
+            in results[-1]
+        )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith("pd"):
+                os.remove(f)
+            if f in [
+                "lcurve.out",
+                "frozen_model.json",
+                "frozen_model.pdiparams",
+                "checkpoint",
+                "output.txt",
+            ]:
+                os.remove(f)
+            if f in ["stat_files", self.stat_files]:
+                shutil.rmtree(f)
diff --git a/source/tests/pd/test_dp_test.py b/source/tests/pd/test_dp_test.py
new file mode 100644
index 0000000000..e188eb3d6b
--- /dev/null
+++ b/source/tests/pd/test_dp_test.py
@@ -0,0 +1,232 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+from paddle.static import (
+    InputSpec,
+)
+
+from deepmd.entrypoints.test import test as dp_test
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+
+from .model.test_permutation import (
+    model_property,
+    model_se_e2_a,
+    model_spin,
+)
+
+
+class DPTest:
+    @unittest.skip(
+        "Paddle do not support testing in frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_dp_test_1_frame(self):
+        trainer = get_trainer(deepcopy(self.config))
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        # exit()
+        paddle.set_device(device)
+        has_spin = getattr(trainer.model, "has_spin", False)
+
+        if callable(has_spin):
+            has_spin = has_spin()
+        if not has_spin:
+            input_dict.pop("spin", None)
+        input_dict["do_atomic_virial"] = True
+        result = trainer.model(**input_dict)
+        paddle.set_flags(
+            {
+                "FLAGS_save_cf_stack_op": 1,
+                "FLAGS_prim_enable_dynamic": 1,
+                "FLAGS_enable_pir_api": 1,
+            }
+        )
+        model = paddle.jit.to_static(
+            trainer.model,
+            full_graph=True,
+            input_spec=[
+                InputSpec([-1, -1, 3], dtype="float64", name="coord"),
+                InputSpec([-1, -1], dtype="int32", name="atype"),
+                InputSpec([-1, -1, -1], dtype="int32", name="nlist"),
+            ],
+        )
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pd")
+        paddle.jit.save(
+            model,
+            tmp_model.name,
+            skip_prune_program=True,
+        )
+        dp_test(
+            model=tmp_model.name,
+            system=self.config["training"]["validation_data"]["systems"][0],
+            datafile=None,
+            set_prefix="set",
+            numb_test=0,
+            rand_seed=None,
+            shuffle_test=False,
+            detail_file=self.detail_file,
+            atomic=False,
+        )
+        os.unlink(tmp_model.name)
+        natom = input_dict["atype"].shape[1]
+        pred_e = np.loadtxt(self.detail_file + ".e.out", ndmin=2)[0, 1]
+        np.testing.assert_almost_equal(
+            pred_e,
+            to_numpy_array(result["energy"])[0][0],
+        )
+        pred_e_peratom = np.loadtxt(self.detail_file + ".e_peratom.out", ndmin=2)[0, 1]
+        np.testing.assert_almost_equal(pred_e_peratom, pred_e / natom)
+        if not has_spin:
+            pred_f = np.loadtxt(self.detail_file + ".f.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_f,
+                to_numpy_array(result["force"]).reshape(-1, 3),
+            )
+            pred_v = np.loadtxt(self.detail_file + ".v.out", ndmin=2)[:, 9:18]
+            np.testing.assert_almost_equal(
+                pred_v,
+                to_numpy_array(result["virial"]),
+            )
+            pred_v_peratom = np.loadtxt(self.detail_file + ".v_peratom.out", ndmin=2)[
+                :, 9:18
+            ]
+            np.testing.assert_almost_equal(pred_v_peratom, pred_v / natom)
+        else:
+            pred_fr = np.loadtxt(self.detail_file + ".fr.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_fr,
+                to_numpy_array(result["force"]).reshape(-1, 3),
+            )
+            pred_fm = np.loadtxt(self.detail_file + ".fm.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_fm,
+                to_numpy_array(
+                    result["force_mag"][result["mask_mag"].bool().squeeze(-1)]
+                ).reshape(-1, 3),
+            )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f.startswith(self.detail_file):
+                os.remove(f)
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestDPTestSeA(DPTest, unittest.TestCase):
+    def setUp(self):
+        self.detail_file = "test_dp_test_ener_detail"
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+
+class TestDPTestSeASpin(DPTest, unittest.TestCase):
+    def setUp(self):
+        self.detail_file = "test_dp_test_ener_spin_detail"
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "NiO/data/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_spin)
+        self.config["model"]["type_map"] = ["Ni", "O", "B"]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+
+class TestDPTestPropertySeA(unittest.TestCase):
+    def setUp(self):
+        self.detail_file = "test_dp_test_property_detail"
+        input_json = str(Path(__file__).parent / "property/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "property/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_property)
+        self.input_json = "test_dp_test_property.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+    @unittest.skip(
+        "Paddle do not support testing in frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_dp_test_1_frame(self):
+        trainer = get_trainer(deepcopy(self.config))
+        input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        input_dict.pop("spin", None)
+        result = trainer.model(**input_dict)
+        model = paddle.jit.to_static(trainer.model)
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
+        paddle.jit.save(model, tmp_model.name)
+        dp_test(
+            model=tmp_model.name,
+            system=self.config["training"]["validation_data"]["systems"][0],
+            datafile=None,
+            set_prefix="set",
+            numb_test=0,
+            rand_seed=None,
+            shuffle_test=False,
+            detail_file=self.detail_file,
+            atomic=True,
+        )
+        os.unlink(tmp_model.name)
+        pred_property = np.loadtxt(self.detail_file + ".property.out.0")[:, 1]
+        np.testing.assert_almost_equal(
+            pred_property,
+            to_numpy_array(result["property"])[0],
+        )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f.startswith(self.detail_file):
+                os.remove(f)
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_finetune.py b/source/tests/pd/test_finetune.py
new file mode 100644
index 0000000000..0f5271c56a
--- /dev/null
+++ b/source/tests/pd/test_finetune.py
@@ -0,0 +1,375 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pd.utils.finetune import (
+    get_finetune_rules,
+)
+from deepmd.pd.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+from .model.test_permutation import (
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+    model_zbl,
+)
+
+energy_data_requirement = [
+    DataRequirementItem(
+        "energy",
+        ndof=1,
+        atomic=False,
+        must=False,
+        high_prec=True,
+    ),
+    DataRequirementItem(
+        "force",
+        ndof=3,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "virial",
+        ndof=9,
+        atomic=False,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "dos",
+        ndof=250,
+        atomic=False,
+        must=False,
+        high_prec=True,
+    ),
+    DataRequirementItem(
+        "atom_ener",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "atom_pref",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+        repeat=3,
+    ),
+]
+
+
+class FinetuneTest:
+    @unittest.skip(
+        "Paddle do not support finetune in frozen models(.json and .pdiparams file), "
+        "will be supported in the future."
+    )
+    def test_finetune_change_out_bias(self):
+        self.testkey = "energy" if self.testkey is None else self.testkey
+        # get data
+        data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.config["model"]["type_map"],
+        )
+        data.add_data_requirement(energy_data_requirement)
+        sampled = make_stat_input(
+            data.systems,
+            data.dataloaders,
+            nbatches=1,
+        )
+        # make sampled of multiple frames with different atom numbs
+        numb_atom = sampled[0]["atype"].shape[1]
+        small_numb_atom = numb_atom // 2
+        small_atom_data = deepcopy(sampled[0])
+        atomic_key = ["coord", "atype"]
+        for kk in atomic_key:
+            small_atom_data[kk] = small_atom_data[kk][:, :small_numb_atom]
+        scale_pref = float(small_numb_atom / numb_atom)
+        small_atom_data[self.testkey] *= scale_pref
+        small_atom_data["natoms"][:, :2] = small_numb_atom
+        small_atom_data["natoms"][:, 2:] = paddle.bincount(
+            small_atom_data["atype"][0],
+            minlength=small_atom_data["natoms"].shape[1] - 2,
+        )
+        sampled = [sampled[0], small_atom_data]
+
+        # get model
+        model = get_model(self.config["model"]).to(env.DEVICE)
+        atomic_model = model.atomic_model
+        atomic_model["out_bias"] = paddle.randn(atomic_model["out_bias"].shape)
+        energy_bias_before = to_numpy_array(atomic_model["out_bias"])[0]
+
+        # prepare original model for test
+        dp = paddle.jit.to_static(model)
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pd")
+        paddle.jit.save(dp, tmp_model.name)
+        dp = DeepEval(tmp_model.name)
+        origin_type_map = ["O", "H"]
+        full_type_map = ["O", "H", "B"]
+
+        # change energy bias
+        model.atomic_model.change_out_bias(
+            sampled,
+            bias_adjust_mode="change-by-statistic",
+        )
+        energy_bias_after = to_numpy_array(atomic_model["out_bias"])[0]
+
+        # get ground-truth energy bias change
+        sorter = np.argsort(full_type_map)
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        ntest = 1
+        atom_nums = np.tile(
+            np.bincount(to_numpy_array(sampled[0]["atype"][0]))[idx_type_map],
+            (ntest, 1),
+        )
+        atom_nums_small = np.tile(
+            np.bincount(to_numpy_array(sampled[1]["atype"][0]))[idx_type_map],
+            (ntest, 1),
+        )
+        atom_nums = np.concatenate([atom_nums, atom_nums_small], axis=0)
+
+        energy = dp.eval(
+            to_numpy_array(sampled[0]["coord"][:ntest]),
+            to_numpy_array(sampled[0]["box"][:ntest]),
+            to_numpy_array(sampled[0]["atype"][0]),
+        )[0]
+        energy_small = dp.eval(
+            to_numpy_array(sampled[1]["coord"][:ntest]),
+            to_numpy_array(sampled[1]["box"][:ntest]),
+            to_numpy_array(sampled[1]["atype"][0]),
+        )[0]
+        energy_diff = to_numpy_array(sampled[0][self.testkey][:ntest]) - energy
+        energy_diff_small = (
+            to_numpy_array(sampled[1][self.testkey][:ntest]) - energy_small
+        )
+        energy_diff = np.concatenate([energy_diff, energy_diff_small], axis=0)
+        finetune_shift = (
+            energy_bias_after[idx_type_map] - energy_bias_before[idx_type_map]
+        ).ravel()
+        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[
+            0
+        ].reshape(-1)
+
+        # check values
+        np.testing.assert_almost_equal(finetune_shift, ground_truth_shift, decimal=10)
+
+        self.tearDown()
+
+    def test_finetune_change_type(self):
+        if not self.mixed_types:
+            # skip when not mixed_types
+            return
+        # get data
+        data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.config["model"]["type_map"],
+        )
+        data.add_data_requirement(energy_data_requirement)
+        sampled = make_stat_input(
+            data.systems,
+            data.dataloaders,
+            nbatches=1,
+        )
+        data_type_map = self.config["model"]["type_map"]
+        for [old_type_map, new_type_map] in [
+            [["H", "X1", "X2", "O", "B"], ["O", "H", "B"]],
+            [["O", "H", "B"], ["H", "X1", "X2", "O", "B"]],
+        ]:
+            old_type_map_index = np.array(
+                [old_type_map.index(i) for i in data_type_map], dtype=np.int32
+            )
+            new_type_map_index = np.array(
+                [new_type_map.index(i) for i in data_type_map], dtype=np.int32
+            )
+
+            # get pretrained model with old type map
+            config_old_type_map = deepcopy(self.config)
+            config_old_type_map["model"]["type_map"] = old_type_map
+            trainer = get_trainer(config_old_type_map)
+            trainer.run()
+            finetune_model = (
+                config_old_type_map["training"].get("save_ckpt", "model.ckpt") + ".pd"
+            )
+
+            # finetune load the same type_map
+            config_old_type_map_finetune = deepcopy(self.config)
+            config_old_type_map_finetune["model"]["type_map"] = old_type_map
+            config_old_type_map_finetune["model"], finetune_links = get_finetune_rules(
+                finetune_model,
+                config_old_type_map_finetune["model"],
+            )
+            trainer_finetune_old = get_trainer(
+                config_old_type_map_finetune,
+                finetune_model=finetune_model,
+                finetune_links=finetune_links,
+            )
+
+            # finetune load the slim type_map
+            config_new_type_map_finetune = deepcopy(self.config)
+            config_new_type_map_finetune["model"]["type_map"] = new_type_map
+            config_new_type_map_finetune["model"], finetune_links = get_finetune_rules(
+                finetune_model,
+                config_new_type_map_finetune["model"],
+            )
+            trainer_finetune_new = get_trainer(
+                config_new_type_map_finetune,
+                finetune_model=finetune_model,
+                finetune_links=finetune_links,
+            )
+
+            # test consistency
+            ntest = 1
+            prec = 1e-10
+            model_old_result = trainer_finetune_old.model(
+                sampled[0]["coord"][:ntest],
+                to_paddle_tensor(old_type_map_index)[sampled[0]["atype"][:ntest]],
+                box=sampled[0]["box"][:ntest],
+            )
+            model_new_result = trainer_finetune_new.model(
+                sampled[0]["coord"][:ntest],
+                to_paddle_tensor(new_type_map_index)[sampled[0]["atype"][:ntest]],
+                box=sampled[0]["box"][:ntest],
+            )
+            test_keys = ["energy", "force", "virial"]
+            for key in test_keys:
+                np.testing.assert_allclose(
+                    model_old_result[key].numpy(),
+                    model_new_result[key].numpy(),
+                    rtol=prec,
+                    atol=prec,
+                )
+
+            self.tearDown()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestEnergyModelSeA(FinetuneTest, unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.mixed_types = False
+        self.testkey = None
+
+
+class TestEnergyZBLModelSeA(FinetuneTest, unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_zbl)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.mixed_types = False
+        self.testkey = None
+
+
+class TestEnergyDOSModelSeA(FinetuneTest, unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.data_file = [str(Path(__file__).parent / "dos/data/global_system")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.mixed_types = False
+        self.testkey = "dos"
+
+
+class TestEnergyModelDPA1(FinetuneTest, unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.mixed_types = True
+        self.testkey = None
+
+
+class TestEnergyModelDPA2(FinetuneTest, unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = self.data_file
+        self.config["training"]["validation_data"]["systems"] = self.data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["model"]["descriptor"]["repformer"]["nlayers"] = 2
+
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.mixed_types = True
+        self.testkey = None
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_init_frz_model.py b/source/tests/pd/test_init_frz_model.py
new file mode 100644
index 0000000000..9b824b3886
--- /dev/null
+++ b/source/tests/pd/test_init_frz_model.py
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from argparse import (
+    Namespace,
+)
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.pd.entrypoints.main import (
+    freeze,
+    get_trainer,
+)
+from deepmd.pd.infer.deep_eval import (
+    DeepPot,
+)
+
+from .common import (
+    run_dp,
+)
+
+
+@unittest.skip(
+    "Paddle do not support finetune in frozen models(.json and .pdiparams file), "
+    "will be supported in the future."
+)
+class TestInitFrzModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            config = json.load(f)
+        config["model"]["descriptor"]["smooth_type_embedding"] = True
+        config["training"]["numb_steps"] = 1
+        config["training"]["save_freq"] = 1
+        config["learning_rate"]["start_lr"] = 1.0
+        config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+
+        self.models = []
+        for imodel in range(3):
+            frozen_model = f"frozen_model{imodel}.json"
+            if imodel == 0:
+                temp_config = deepcopy(config)
+                trainer = get_trainer(temp_config)
+            elif imodel == 1:
+                temp_config = deepcopy(config)
+                temp_config["training"]["numb_steps"] = 0
+                trainer = get_trainer(temp_config, init_frz_model=self.models[-1])
+            else:
+                empty_config = deepcopy(config)
+                empty_config["model"]["descriptor"] = {}
+                empty_config["model"]["fitting_net"] = {}
+                empty_config["training"]["numb_steps"] = 0
+                tmp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
+                with open(tmp_input.name, "w") as f:
+                    json.dump(empty_config, f, indent=4)
+                run_dp(
+                    f"dp --pd train {tmp_input.name} --init-frz-model {self.models[-1]} --use-pretrain-script --skip-neighbor-stat"
+                )
+                trainer = None
+
+            if imodel in [0, 1]:
+                trainer.run()
+            ns = Namespace(
+                model="model.pd",
+                output=frozen_model,
+                head=None,
+            )
+            freeze(ns)
+            self.models.append(frozen_model)
+
+    def test_dp_test(self):
+        dp1 = DeepPot(str(self.models[0]))
+        dp2 = DeepPot(str(self.models[1]))
+        dp3 = DeepPot(str(self.models[2]))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret1 = dp1.eval(coord, cell, atype, atomic=True)
+        e1, f1, v1, ae1, av1 = ret1[0], ret1[1], ret1[2], ret1[3], ret1[4]
+        ret2 = dp2.eval(coord, cell, atype, atomic=True)
+        e2, f2, v2, ae2, av2 = ret2[0], ret2[1], ret2[2], ret2[3], ret2[4]
+        ret3 = dp3.eval(coord, cell, atype, atomic=True)
+        e3, f3, v3, ae3, av3 = ret3[0], ret3[1], ret3[2], ret3[3], ret3[4]
+        np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(e1, e3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(f1, f3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(v1, v3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(ae1, ae2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(ae1, ae3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(av1, av2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(av1, av3, rtol=1e-10, atol=1e-10)
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("frozen_model") and (
+                f.endswith(".json") or f.endswith(".pdiparams")
+            ):
+                os.remove(f)
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
diff --git a/source/tests/pd/test_init_model.py b/source/tests/pd/test_init_model.py
new file mode 100644
index 0000000000..50c1e82ad6
--- /dev/null
+++ b/source/tests/pd/test_init_model.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.infer.deep_eval import (
+    DeepPot,
+)
+
+from .common import (
+    run_dp,
+)
+
+
+class TestInitModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            config = json.load(f)
+        config["model"]["descriptor"]["smooth_type_embedding"] = True
+        config["training"]["numb_steps"] = 1
+        config["training"]["save_freq"] = 1
+        config["learning_rate"]["start_lr"] = 1.0
+        config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+
+        self.models = []
+        for imodel in range(3):
+            ckpt_model = f"model{imodel}.ckpt"
+            if imodel == 0:
+                temp_config = deepcopy(config)
+                temp_config["training"]["save_ckpt"] = ckpt_model
+                trainer = get_trainer(temp_config)
+            elif imodel == 1:
+                temp_config = deepcopy(config)
+                temp_config["training"]["numb_steps"] = 0
+                temp_config["training"]["save_ckpt"] = ckpt_model
+                trainer = get_trainer(temp_config, init_model=self.models[-1])
+            else:
+                empty_config = deepcopy(config)
+                empty_config["model"]["descriptor"] = {}
+                empty_config["model"]["fitting_net"] = {}
+                empty_config["training"]["numb_steps"] = 0
+                empty_config["training"]["save_ckpt"] = ckpt_model
+                tmp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
+                with open(tmp_input.name, "w") as f:
+                    json.dump(empty_config, f, indent=4)
+                run_dp(
+                    f"dp --pd train {tmp_input.name} --init-model {self.models[-1]} --use-pretrain-script --skip-neighbor-stat"
+                )
+                trainer = None
+
+            if imodel in [0, 1]:
+                trainer.run()
+            self.models.append(ckpt_model + ".pd")
+
+    def test_dp_test(self):
+        dp1 = DeepPot(str(self.models[0]))
+        dp2 = DeepPot(str(self.models[1]))
+        dp3 = DeepPot(str(self.models[2]))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret1 = dp1.eval(coord, cell, atype, atomic=True)
+        e1, f1, v1, ae1, av1 = ret1[0], ret1[1], ret1[2], ret1[3], ret1[4]
+        ret2 = dp2.eval(coord, cell, atype, atomic=True)
+        e2, f2, v2, ae2, av2 = ret2[0], ret2[1], ret2[2], ret2[3], ret2[4]
+        ret3 = dp3.eval(coord, cell, atype, atomic=True)
+        e3, f3, v3, ae3, av3 = ret3[0], ret3[1], ret3[2], ret3[3], ret3[4]
+        np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(e1, e3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(f1, f3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(v1, v3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(ae1, ae2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(ae1, ae3, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(av1, av2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(av1, av3, rtol=1e-10, atol=1e-10)
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
diff --git a/source/tests/pd/test_loss.py b/source/tests/pd/test_loss.py
new file mode 100644
index 0000000000..6139d33a6a
--- /dev/null
+++ b/source/tests/pd/test_loss.py
@@ -0,0 +1,808 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+
+import numpy as np
+import paddle
+import tensorflow.compat.v1 as tf
+
+tf.disable_eager_execution()
+from pathlib import (
+    Path,
+)
+
+from deepmd.pd.loss import (
+    EnergySpinLoss,
+    EnergyStdLoss,
+)
+from deepmd.pd.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.tf.loss.ener import (
+    EnerSpinLoss,
+    EnerStdLoss,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+from ..seed import (
+    GLOBAL_SEED,
+)
+from .model.test_embedding_net import (
+    get_single_batch,
+)
+from .test_finetune import (
+    energy_data_requirement,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def get_batch(system, type_map, data_requirement):
+    dataset = DeepmdDataSetForLoader(system, type_map)
+    dataset.add_data_requirement(data_requirement)
+    np_batch, pt_batch = get_single_batch(dataset)
+    return np_batch, pt_batch
+
+
+class LossCommonTest(unittest.TestCase):
+    def setUp(self):
+        self.cur_lr = 1.2
+        if not self.spin:
+            self.system = str(Path(__file__).parent / "water/data/data_0")
+            self.type_map = ["H", "O"]
+        else:
+            self.system = str(Path(__file__).parent / "NiO/data/data_0")
+            self.type_map = ["Ni", "O"]
+            energy_data_requirement.append(
+                DataRequirementItem(
+                    "force_mag",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        # data
+        np_batch, pt_batch = get_batch(
+            self.system, self.type_map, energy_data_requirement
+        )
+        natoms = np_batch["natoms"]
+        self.nloc = natoms[0]
+        nframes = np_batch["energy"].shape[0]
+        rng = np.random.default_rng(GLOBAL_SEED)
+
+        if not self.spin:
+            l_energy, l_force, l_virial = (
+                np_batch["energy"],
+                np_batch["force"],
+                np_batch["virial"],
+            )
+            p_energy, p_force, p_virial = (
+                np.ones_like(l_energy),
+                np.ones_like(l_force),
+                np.ones_like(l_virial),
+            )
+            nloc = natoms[0]
+            batch_size = pt_batch["coord"].shape[0]
+            p_atom_energy = rng.random(size=[batch_size, nloc])
+            l_atom_energy = rng.random(size=[batch_size, nloc])
+            atom_pref = rng.random(size=[batch_size, nloc * 3])
+            drdq = rng.random(size=[batch_size, nloc * 2 * 3])
+            atom_ener_coeff = rng.random(size=[batch_size, nloc])
+            # placeholders
+            l_force_real = l_force
+            l_force_mag = l_force
+            p_force_real = p_force
+            p_force_mag = p_force
+        else:
+            # data
+            np_batch, pt_batch = get_batch(
+                self.system, self.type_map, energy_data_requirement
+            )
+            natoms = np_batch["natoms"]
+            self.nloc = natoms[0]
+            l_energy, l_force_real, l_force_mag, l_virial = (
+                np_batch["energy"],
+                np_batch["force"],
+                np_batch["force_mag"],
+                np_batch["virial"],
+            )
+            # merged force for tf old implement
+            l_force_merge_tf = np.concatenate(
+                [
+                    l_force_real.reshape([nframes, self.nloc, 3]),
+                    l_force_mag.reshape([nframes, self.nloc, 3])[
+                        np_batch["atype"] == 0
+                    ].reshape([nframes, -1, 3]),
+                ],
+                axis=1,
+            ).reshape([nframes, -1])
+            p_energy, p_force_real, p_force_mag, p_force_merge_tf, p_virial = (
+                np.ones_like(l_energy),
+                np.ones_like(l_force_real),
+                np.ones_like(l_force_mag),
+                np.ones_like(l_force_merge_tf),
+                np.ones_like(l_virial),
+            )
+            virt_nloc = (np_batch["atype"] == 0).sum(-1)
+            natoms_tf = np.concatenate([natoms, virt_nloc], axis=0)
+            natoms_tf[:2] += virt_nloc
+            nloc = natoms_tf[0]
+            batch_size = pt_batch["coord"].shape[0]
+            p_atom_energy = rng.random(size=[batch_size, nloc])
+            l_atom_energy = rng.random(size=[batch_size, nloc])
+            atom_pref = rng.random(size=[batch_size, nloc * 3])
+            drdq = rng.random(size=[batch_size, nloc * 2 * 3])
+            atom_ener_coeff = rng.random(size=[batch_size, nloc])
+            self.nloc_tf = nloc
+            natoms = natoms_tf
+            l_force = l_force_merge_tf
+            p_force = p_force_merge_tf
+
+        # tf
+        self.g = tf.Graph()
+        with self.g.as_default():
+            t_cur_lr = tf.placeholder(shape=[], dtype=tf.float64)
+            t_natoms = tf.placeholder(shape=[None], dtype=tf.int32)
+            t_penergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_pforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_pvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_patom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lenergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_lforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_latom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_atom_pref = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_atom_ener_coeff = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_drdq = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            find_energy = tf.constant(1.0, dtype=tf.float64)
+            find_force = tf.constant(1.0, dtype=tf.float64)
+            find_virial = tf.constant(1.0 if not self.spin else 0.0, dtype=tf.float64)
+            find_atom_energy = tf.constant(1.0, dtype=tf.float64)
+            find_atom_pref = tf.constant(1.0, dtype=tf.float64)
+            find_drdq = tf.constant(1.0, dtype=tf.float64)
+            find_atom_ener_coeff = tf.constant(1.0, dtype=tf.float64)
+            model_dict = {
+                "energy": t_penergy,
+                "force": t_pforce,
+                "virial": t_pvirial,
+                "atom_ener": t_patom_energy,
+            }
+            label_dict = {
+                "energy": t_lenergy,
+                "force": t_lforce,
+                "virial": t_lvirial,
+                "atom_ener": t_latom_energy,
+                "atom_pref": t_atom_pref,
+                "drdq": t_drdq,
+                "atom_ener_coeff": t_atom_ener_coeff,
+                "find_energy": find_energy,
+                "find_force": find_force,
+                "find_virial": find_virial,
+                "find_atom_ener": find_atom_energy,
+                "find_atom_pref": find_atom_pref,
+                "find_drdq": find_drdq,
+                "find_atom_ener_coeff": find_atom_ener_coeff,
+            }
+            self.tf_loss_sess = self.tf_loss.build(
+                t_cur_lr, t_natoms, model_dict, label_dict, ""
+            )
+
+        self.feed_dict = {
+            t_cur_lr: self.cur_lr,
+            t_natoms: natoms,
+            t_penergy: p_energy,
+            t_pforce: p_force,
+            t_pvirial: p_virial.reshape([-1, 9]),
+            t_patom_energy: p_atom_energy,
+            t_lenergy: l_energy,
+            t_lforce: l_force,
+            t_lvirial: l_virial.reshape([-1, 9]),
+            t_latom_energy: l_atom_energy,
+            t_atom_pref: atom_pref,
+            t_drdq: drdq,
+            t_atom_ener_coeff: atom_ener_coeff,
+        }
+        # pt
+        if not self.spin:
+            self.model_pred = {
+                "energy": paddle.to_tensor(p_energy),
+                "force": paddle.to_tensor(p_force),
+                "virial": paddle.to_tensor(p_virial),
+                "atom_energy": paddle.to_tensor(p_atom_energy),
+            }
+            self.label = {
+                "energy": paddle.to_tensor(l_energy),
+                "find_energy": 1.0,
+                "force": paddle.to_tensor(l_force),
+                "find_force": 1.0,
+                "virial": paddle.to_tensor(l_virial),
+                "find_virial": 1.0,
+                "atom_ener": paddle.to_tensor(l_atom_energy),
+                "find_atom_ener": 1.0,
+                "atom_pref": paddle.to_tensor(atom_pref),
+                "find_atom_pref": 1.0,
+                "drdq": paddle.to_tensor(drdq),
+                "find_drdq": 1.0,
+                "atom_ener_coeff": paddle.to_tensor(atom_ener_coeff),
+                "find_atom_ener_coeff": 1.0,
+            }
+            self.label_absent = {
+                "energy": paddle.to_tensor(l_energy),
+                "force": paddle.to_tensor(l_force),
+                "virial": paddle.to_tensor(l_virial),
+                "atom_ener": paddle.to_tensor(l_atom_energy),
+                "atom_pref": paddle.to_tensor(atom_pref),
+                "drdq": paddle.to_tensor(drdq),
+                "atom_ener_coeff": paddle.to_tensor(atom_ener_coeff),
+            }
+        else:
+            self.model_pred = {
+                "energy": paddle.to_tensor(p_energy),
+                "force": paddle.to_tensor(p_force_real).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "force_mag": paddle.to_tensor(p_force_mag).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "mask_mag": paddle.to_tensor(np_batch["atype"] == 0).reshape(
+                    [nframes, self.nloc, 1]
+                ),
+                "atom_energy": paddle.to_tensor(p_atom_energy),
+            }
+            self.label = {
+                "energy": paddle.to_tensor(l_energy),
+                "find_energy": 1.0,
+                "force": paddle.to_tensor(l_force_real).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "find_force": 1.0,
+                "force_mag": paddle.to_tensor(l_force_mag).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "find_force_mag": 1.0,
+                "atom_ener": paddle.to_tensor(l_atom_energy),
+                "find_atom_ener": 1.0,
+                "atom_ener_coeff": paddle.to_tensor(atom_ener_coeff),
+                "find_atom_ener_coeff": 1.0,
+            }
+            self.label_absent = {
+                "energy": paddle.to_tensor(l_energy),
+                "force": paddle.to_tensor(l_force_real).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "force_mag": paddle.to_tensor(l_force_mag).reshape(
+                    [nframes, self.nloc, 3]
+                ),
+                "atom_ener": paddle.to_tensor(l_atom_energy),
+                "atom_ener_coeff": paddle.to_tensor(atom_ener_coeff),
+            }
+        self.natoms = pt_batch["natoms"]
+
+    def tearDown(self) -> None:
+        tf.reset_default_graph()
+        return super().tearDown()
+
+
+class TestEnerStdLoss(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_f = 1000.0
+        self.limit_pref_f = 1.0
+        self.start_pref_v = 0.02
+        self.limit_pref_v = 1.0
+        # tf
+        self.tf_loss = EnerStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+        )
+        # pt
+        self.pt_loss = EnergyStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+        )
+        self.spin = False
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc,
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc,
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force", "virial"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerStdLossAePfGf(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_f = 1000.0
+        self.limit_pref_f = 1.0
+        self.start_pref_v = 0.02
+        self.limit_pref_v = 1.0
+        self.start_pref_ae = 0.02
+        self.limit_pref_ae = 1.0
+        self.start_pref_pf = 0.02
+        self.limit_pref_pf = 1.0
+        self.start_pref_gf = 0.02
+        self.limit_pref_gf = 1.0
+        self.numb_generalized_coord = 2
+        # tf
+        self.tf_loss = EnerStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            self.start_pref_ae,
+            self.limit_pref_ae,
+            self.start_pref_pf,
+            self.limit_pref_pf,
+            start_pref_gf=self.start_pref_gf,
+            limit_pref_gf=self.limit_pref_gf,
+            numb_generalized_coord=self.numb_generalized_coord,
+        )
+        # pt
+        self.pt_loss = EnergyStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            self.start_pref_ae,
+            self.limit_pref_ae,
+            self.start_pref_pf,
+            self.limit_pref_pf,
+            start_pref_gf=self.start_pref_gf,
+            limit_pref_gf=self.limit_pref_gf,
+            numb_generalized_coord=self.numb_generalized_coord,
+        )
+        self.spin = False
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc,
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc,
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force", "virial", "atom_ener", "pref_force", "gen_force"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerStdLossAecoeff(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_f = 1000.0
+        self.limit_pref_f = 1.0
+        self.start_pref_v = 0.02
+        self.limit_pref_v = 1.0
+        # tf
+        self.tf_loss = EnerStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            enable_atom_ener_coeff=True,
+        )
+        # pt
+        self.pt_loss = EnergyStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            enable_atom_ener_coeff=True,
+        )
+        self.spin = False
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc,
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc,
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force", "virial"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerStdLossRelativeF(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_f = 1000.0
+        self.limit_pref_f = 1.0
+        self.start_pref_v = 0.02
+        self.limit_pref_v = 1.0
+        # tf
+        self.tf_loss = EnerStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            relative_f=0.1,
+        )
+        # pt
+        self.pt_loss = EnergyStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+            relative_f=0.1,
+        )
+        self.spin = False
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc,
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc,
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force", "virial"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerSpinLoss(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_fr = 1000.0
+        self.limit_pref_fr = 1.0
+        self.start_pref_fm = 1000.0
+        self.limit_pref_fm = 1.0
+        self.cur_lr = 1.2
+        self.use_spin = [1, 0]
+        # tf
+        self.tf_loss = EnerSpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            use_spin=self.use_spin,
+        )
+        # pt
+        self.pt_loss = EnergySpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+        )
+        self.spin = True
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force_r", "force_m"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerSpinLossAe(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_fr = 1000.0
+        self.limit_pref_fr = 1.0
+        self.start_pref_fm = 1000.0
+        self.limit_pref_fm = 1.0
+        self.start_pref_ae = 0.02
+        self.limit_pref_ae = 1.0
+        self.cur_lr = 1.2
+        self.use_spin = [1, 0]
+        # tf
+        self.tf_loss = EnerSpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            start_pref_ae=self.start_pref_ae,
+            limit_pref_ae=self.limit_pref_ae,
+            use_spin=self.use_spin,
+        )
+        # pt
+        self.pt_loss = EnergySpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            start_pref_ae=self.start_pref_ae,
+            limit_pref_ae=self.limit_pref_ae,
+        )
+        self.spin = True
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force_r", "force_m", "atom_ener"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+class TestEnerSpinLossAecoeff(LossCommonTest):
+    def setUp(self):
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_fr = 1000.0
+        self.limit_pref_fr = 1.0
+        self.start_pref_fm = 1000.0
+        self.limit_pref_fm = 1.0
+        self.cur_lr = 1.2
+        self.use_spin = [1, 0]
+        # tf
+        self.tf_loss = EnerSpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            use_spin=self.use_spin,
+            enable_atom_ener_coeff=True,
+        )
+        # pt
+        self.pt_loss = EnergySpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            enable_atom_ener_coeff=True,
+        )
+        self.spin = True
+        super().setUp()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            tf_loss, tf_more_loss = sess.run(
+                self.tf_loss_sess, feed_dict=self.feed_dict
+            )
+
+        def fake_model():
+            return self.model_pred
+
+        _, pt_loss, pt_more_loss = self.pt_loss(
+            {},
+            fake_model,
+            self.label,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        _, pt_loss_absent, pt_more_loss_absent = self.pt_loss(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        pt_loss = pt_loss.detach().cpu()
+        pt_loss_absent = pt_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(tf_loss, pt_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, pt_loss_absent.numpy()))
+        for key in ["ener", "force_r", "force_m"]:
+            self.assertTrue(
+                np.allclose(
+                    tf_more_loss[f"l2_{key}_loss"], pt_more_loss[f"l2_{key}_loss"]
+                )
+            )
+            self.assertTrue(np.isnan(pt_more_loss_absent[f"l2_{key}_loss"].numpy()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_lr.py b/source/tests/pd/test_lr.py
new file mode 100644
index 0000000000..f5ce911b04
--- /dev/null
+++ b/source/tests/pd/test_lr.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+tf.disable_eager_execution()
+
+from deepmd.pd.utils.learning_rate import (
+    LearningRateExp,
+)
+from deepmd.tf.utils import (
+    learning_rate,
+)
+
+
+class TestLearningRate(unittest.TestCase):
+    def setUp(self):
+        self.start_lr = 0.001
+        self.stop_lr = 3.51e-8
+        self.decay_steps = np.arange(400, 601, 100)
+        self.stop_steps = np.arange(500, 1600, 500)
+
+    def test_consistency(self):
+        for decay_step in self.decay_steps:
+            for stop_step in self.stop_steps:
+                self.decay_step = decay_step
+                self.stop_step = stop_step
+                self.judge_it()
+                self.decay_rate_pt()
+
+    def judge_it(self):
+        base_lr = learning_rate.LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step
+        )
+        g = tf.Graph()
+        with g.as_default():
+            global_step = tf.placeholder(shape=[], dtype=tf.int32)
+            t_lr = base_lr.build(global_step, self.stop_step)
+
+        my_lr = LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step, self.stop_step
+        )
+        with tf.Session(graph=g) as sess:
+            base_vals = [
+                sess.run(t_lr, feed_dict={global_step: step_id})
+                for step_id in range(self.stop_step)
+                if step_id % self.decay_step != 0
+            ]
+        my_vals = [
+            my_lr.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        self.assertTrue(np.allclose(base_vals, my_vals))
+        tf.reset_default_graph()
+
+    def decay_rate_pt(self):
+        my_lr = LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step, self.stop_step
+        )
+
+        default_ds = 100 if self.stop_step // 10 > 100 else self.stop_step // 100 + 1
+        if self.decay_step >= self.stop_step:
+            self.decay_step = default_ds
+        decay_rate = np.exp(
+            np.log(self.stop_lr / self.start_lr) / (self.stop_step / self.decay_step)
+        )
+        my_lr_decay = LearningRateExp(
+            self.start_lr,
+            1e-10,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        min_lr = 1e-5
+        my_lr_decay_trunc = LearningRateExp(
+            self.start_lr,
+            min_lr,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        my_vals = [
+            my_lr.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay = [
+            my_lr_decay.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay_trunc = [
+            my_lr_decay_trunc.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        self.assertTrue(np.allclose(my_vals_decay, my_vals))
+        self.assertTrue(
+            np.allclose(my_vals_decay_trunc, np.clip(my_vals, a_min=min_lr, a_max=None))
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_multitask.py b/source/tests/pd/test_multitask.py
new file mode 100644
index 0000000000..8c7ceb5e90
--- /dev/null
+++ b/source/tests/pd/test_multitask.py
@@ -0,0 +1,305 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.utils.finetune import (
+    get_finetune_rules,
+)
+from deepmd.pd.utils.multi_task import (
+    preprocess_shared_params,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+
+from .model.test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+)
+
+
+def setUpModule():
+    global multitask_template
+    multitask_template_json = str(Path(__file__).parent / "water/multitask.json")
+    with open(multitask_template_json) as f:
+        multitask_template = json.load(f)
+
+
+class MultiTaskTrainTest:
+    def test_multitask_train(self):
+        # test multitask training
+        self.config = update_deepmd_input(self.config, warning=True)
+        self.config = normalize(self.config, multi_task=True)
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
+        trainer.run()
+        # check model keys
+        self.assertEqual(len(trainer.wrapper.model), 2)
+        self.assertIn("model_1", trainer.wrapper.model)
+        self.assertIn("model_2", trainer.wrapper.model)
+
+        # check shared parameters
+        multi_state_dict = trainer.wrapper.model.state_dict()
+        for state_key in multi_state_dict:
+            if "model_1" in state_key:
+                self.assertIn(state_key.replace("model_1", "model_2"), multi_state_dict)
+            if "model_2" in state_key:
+                self.assertIn(state_key.replace("model_2", "model_1"), multi_state_dict)
+            if "model_1.descriptor" in state_key:
+                np.testing.assert_allclose(
+                    multi_state_dict[state_key].numpy(),
+                    multi_state_dict[state_key.replace("model_1", "model_2")].numpy(),
+                )
+
+        # test multitask fine-tuning
+        # add model_3
+        self.origin_config["model"]["model_dict"]["model_3"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_3"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_3"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_3"]["stat_file"].replace("model_2", "model_3")
+        )
+
+        # add model_4
+        self.origin_config["model"]["model_dict"]["model_4"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_4"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_4"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_4"]["stat_file"].replace("model_2", "model_4")
+        )
+
+        # set finetune rules
+        # model_1 resuming from model_1
+        # pass
+
+        # model_2 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_2"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_3 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_3"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_4 fine-tuning with randomly initialized fitting net
+        # pass
+
+        self.origin_config["model"], shared_links_finetune = preprocess_shared_params(
+            self.origin_config["model"]
+        )
+
+        finetune_model = self.config["training"].get("save_ckpt", "model.ckpt") + ".pd"
+        self.origin_config["model"], finetune_links = get_finetune_rules(
+            finetune_model,
+            self.origin_config["model"],
+        )
+        self.origin_config = update_deepmd_input(self.origin_config, warning=True)
+        self.origin_config = normalize(self.origin_config, multi_task=True)
+        trainer_finetune = get_trainer(
+            deepcopy(self.origin_config),
+            finetune_model=finetune_model,
+            shared_links=shared_links_finetune,
+            finetune_links=finetune_links,
+        )
+
+        # check parameters
+        multi_state_dict_finetuned = trainer_finetune.wrapper.model.state_dict()
+        for state_key in multi_state_dict_finetuned:
+            if "model_1" in state_key:
+                np.testing.assert_allclose(
+                    multi_state_dict[state_key].numpy(),
+                    multi_state_dict_finetuned[state_key].numpy(),
+                )
+            elif "model_2" in state_key and "out_bias" not in state_key:
+                np.testing.assert_allclose(
+                    multi_state_dict[state_key].numpy(),
+                    multi_state_dict_finetuned[state_key].numpy(),
+                )
+            elif "model_3" in state_key and "out_bias" not in state_key:
+                np.testing.assert_allclose(
+                    multi_state_dict[state_key.replace("model_3", "model_2")].numpy(),
+                    multi_state_dict_finetuned[state_key].numpy(),
+                )
+            elif (
+                "model_4" in state_key
+                and "fitting_net" not in state_key
+                and "out_bias" not in state_key
+            ):
+                np.testing.assert_allclose(
+                    multi_state_dict[state_key.replace("model_4", "model_2")].numpy(),
+                    multi_state_dict_finetuned[state_key].numpy(),
+                )
+
+        # check running
+        trainer_finetune.run()
+        self.tearDown()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in [self.stat_files]:
+                shutil.rmtree(f)
+
+
+class TestMultiTaskSeA(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_se_e2_a = deepcopy(multitask_template)
+        multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "se_e2_a"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_se_e2_a
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+# @unittest.skip("Paddle do not support MultiTaskDPA1.")
+class TestMultiTaskDPA1(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA1 = deepcopy(multitask_template)
+        multitask_DPA1["model"]["shared_dict"]["my_descriptor"] = model_dpa1[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA1"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA1
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA2(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA2 = deepcopy(multitask_template)
+        multitask_DPA2["model"]["shared_dict"]["my_descriptor"] = model_dpa2[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA2"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA2
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_neighbor_stat.py b/source/tests/pd/test_neighbor_stat.py
new file mode 100644
index 0000000000..613150b7fc
--- /dev/null
+++ b/source/tests/pd/test_neighbor_stat.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import shutil
+import unittest
+
+import dpdata
+import numpy as np
+
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
+
+from ..seed import (
+    GLOBAL_SEED,
+)
+
+
+def gen_sys(nframes):
+    rng = np.random.default_rng(GLOBAL_SEED)
+    natoms = 1000
+    data = {}
+    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
+    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
+    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
+    data["forces"] = rng.random([nframes, natoms, 3])
+    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
+        1, 3, 3
+    )
+    data["energies"] = rng.random([nframes, 1])
+    data["atom_names"] = ["TYPE"]
+    data["atom_numbs"] = [27]
+    data["atom_types"] = np.repeat(0, 27)
+    return data
+
+
+class TestNeighborStat(unittest.TestCase):
+    def setUp(self):
+        data0 = gen_sys(1)
+        sys0 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys0.to_deepmd_npy("system_0", set_size=1)
+
+    def tearDown(self):
+        shutil.rmtree("system_0")
+
+    def test_neighbor_stat(self):
+        for rcut in (0.0, 1.0, 2.0, 4.0):
+            for mixed_type in (True, False):
+                with self.subTest(rcut=rcut, mixed_type=mixed_type):
+                    rcut += 1e-3  # prevent numerical errors
+                    min_nbor_dist, max_nbor_size = neighbor_stat(
+                        system="system_0",
+                        rcut=rcut,
+                        type_map=["TYPE", "NO_THIS_TYPE"],
+                        mixed_type=mixed_type,
+                        backend="paddle",
+                    )
+                    upper = np.ceil(rcut) + 1
+                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
+                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
+                    # distance to (0,0,0)
+                    distance = np.linalg.norm(positions, axis=1)
+                    expected_neighbors = np.count_nonzero(
+                        np.logical_and(distance > 0, distance <= rcut)
+                    )
+                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
+                    ret = [expected_neighbors]
+                    if not mixed_type:
+                        ret.append(0)
+                    np.testing.assert_array_equal(max_nbor_size, ret)
diff --git a/source/tests/pd/test_sampler.py b/source/tests/pd/test_sampler.py
new file mode 100644
index 0000000000..2af5a9c05c
--- /dev/null
+++ b/source/tests/pd/test_sampler.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+from paddle.io import (
+    BatchSampler,
+    DataLoader,
+)
+
+from deepmd.pd.utils.dataloader import (
+    DpLoaderSet,
+    get_weighted_sampler,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.utils import random as tf_random
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestSampler(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.batch_size = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        self.my_dataset = DpLoaderSet(
+            self.systems,
+            self.batch_size,
+            model_config["type_map"],
+            seed=10,
+            shuffle=False,
+        )
+
+        tf_random.seed(10)
+        self.dp_dataset = DeepmdDataSystem(self.systems, self.batch_size, 1, self.rcut)
+
+    def test_sampler_debug_info(self):
+        dataloader = DataLoader(
+            self.my_dataset,
+            batch_sampler=BatchSampler(
+                get_weighted_sampler(self.my_dataset, prob_style="prob_sys_size"),
+                drop_last=False,
+            ),
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            # pin_memory=True,
+        )
+        device = paddle.get_device()
+        paddle.set_device("cpu")
+        batch_data = next(iter(dataloader))
+        paddle.set_device(device)
+        sid = batch_data["sid"]
+        fid = batch_data["fid"][0]
+        coord = batch_data["coord"].squeeze(0)
+        frame = self.my_dataset.systems[sid].__getitem__(fid)
+        self.assertTrue(np.allclose(coord, frame["coord"]))
+
+    def test_auto_prob_uniform(self):
+        auto_prob_style = "prob_uniform"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_auto_prob_sys_size(self):
+        auto_prob_style = "prob_sys_size"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_auto_prob_sys_size_ext(self):
+        auto_prob_style = "prob_sys_size;0:1:0.2;1:3:0.8"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_sys_probs(self):
+        sys_probs = [0.1, 0.4, 0.5]
+        sampler = get_weighted_sampler(
+            self.my_dataset, prob_style=sys_probs, sys_prob=True
+        )
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(sys_probs=sys_probs)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_tabulate_fusion_se_a.py b/source/tests/pd/test_tabulate_fusion_se_a.py
new file mode 100644
index 0000000000..11c7711105
--- /dev/null
+++ b/source/tests/pd/test_tabulate_fusion_se_a.py
@@ -0,0 +1,1512 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ..consistent.common import (
+    parameterized,
+)
+
+
+@parameterized((paddle.float64, paddle.float32))
+@unittest.skipIf(not ENABLE_CUSTOMIZED_OP, "PyTorch customized OPs are not built")
+class TestTabulateFusionSeAOp(unittest.TestCase):
+    def setUp(self):
+        (dtype,) = self.param
+        if dtype == paddle.float64:
+            self.prec = 1e-10
+        elif dtype == paddle.float32:
+            self.prec = 1e-5
+        self.table_tensor = (
+            paddle.to_tensor(
+                [
+                    6.348551343037398542e-01,
+                    4.209465843706336474e-04,
+                    6.390862740714405368e-03,
+                    -1.544448595628262176e-04,
+                    -1.891095227974180087e-04,
+                    2.695025951562175852e-05,
+                    -1.317549846042939343e00,
+                    -5.624478206903206490e-02,
+                    1.274284553146523905e-02,
+                    -6.836227424141475689e-04,
+                    -1.438066096020836407e-04,
+                    -1.854932873974712940e-06,
+                    -9.996964112615246423e-01,
+                    6.928234423723647617e-02,
+                    -4.974719973810486084e-03,
+                    -2.019584729176823030e-04,
+                    1.077254539742680247e-04,
+                    -8.024209768588029797e-06,
+                    3.552689563657350780e-01,
+                    -3.578299775339799371e-02,
+                    -1.319946251007718743e-03,
+                    1.016701374495701440e-03,
+                    -1.057336720791906388e-04,
+                    5.182678943855506567e-06,
+                    1.227750369557627286e00,
+                    4.100352079064395472e-02,
+                    3.586869164810712295e-03,
+                    -4.304540913340443135e-04,
+                    -1.269943482892440004e-04,
+                    1.459465404430219674e-05,
+                    -1.472642501673147031e00,
+                    -1.611354921283318364e-01,
+                    1.645427874390196360e-02,
+                    2.107392978135091402e-04,
+                    -2.193541011180757461e-04,
+                    1.915392497459551146e-05,
+                    -2.855174490181606739e-01,
+                    9.774337856626263976e-02,
+                    -2.140891880666230714e-03,
+                    -7.148328890055103638e-04,
+                    1.965696332267534503e-05,
+                    -4.593489654121371453e-06,
+                    -1.468441009949382314e00,
+                    -6.360828127262234399e-02,
+                    4.751283295356955282e-03,
+                    8.711899561753186068e-05,
+                    -9.937008678852959884e-06,
+                    4.273569346584811685e-07,
+                    6.348599826995243722e-01,
+                    5.487167506364742930e-04,
+                    6.386116198716365253e-03,
+                    -1.619832375568118791e-04,
+                    -1.877328309473502049e-04,
+                    2.134130914519164856e-05,
+                    -1.318111020264137512e00,
+                    -5.599013082054477008e-02,
+                    1.272225054666903735e-02,
+                    -6.893710047488201898e-04,
+                    -1.434367581078517366e-04,
+                    3.329508890614227371e-05,
+                    -9.990040854920316793e-01,
+                    6.918278968071900348e-02,
+                    -4.980714172967731085e-03,
+                    -1.976574487947816198e-04,
+                    1.070037204086153902e-04,
+                    -7.859875077388093586e-06,
+                    3.549109954092205532e-01,
+                    -3.580909209068139365e-02,
+                    -1.289508598157979719e-03,
+                    1.012474257117017967e-03,
+                    -1.054418924402112718e-04,
+                    -1.245498322204730900e-05,
+                    1.228160763020727630e00,
+                    4.107512853046493134e-02,
+                    3.573879491390910459e-03,
+                    -4.355190226638688713e-04,
+                    -1.258433981470396103e-04,
+                    1.610862268100766631e-05,
+                    -1.474252210958008291e00,
+                    -1.608063442081248406e-01,
+                    1.646046950167207382e-02,
+                    2.019843636566674109e-04,
+                    -2.185756589083626730e-04,
+                    1.978479879983412190e-05,
+                    -2.845402300363228942e-01,
+                    9.770034635718018168e-02,
+                    -2.162325119197382531e-03,
+                    -7.140472215558940627e-04,
+                    1.956302663031799223e-05,
+                    1.932584474244053378e-05,
+                    -1.469076617546759334e00,
+                    -6.351322951074317436e-02,
+                    4.753890907276497185e-03,
+                    8.672114560243554321e-05,
+                    -1.004574434175897967e-05,
+                    -4.345700882560937596e-06,
+                    6.348661083147921769e-01,
+                    6.763897297752743953e-04,
+                    6.381144275303845745e-03,
+                    -1.694690463885140694e-04,
+                    -1.868179426353836598e-04,
+                    3.439291082765030046e-05,
+                    -1.318669650038090335e00,
+                    -5.573589319299507294e-02,
+                    1.270148368741391351e-02,
+                    -6.950749719342792137e-04,
+                    -1.422194703304518733e-04,
+                    3.454751241752252323e-05,
+                    -9.983127558632299836e-01,
+                    6.908311652764687061e-02,
+                    -4.986579772806746212e-03,
+                    -1.933888092529071571e-04,
+                    1.068327546750306073e-04,
+                    -2.976978385983384886e-05,
+                    3.545527765488725169e-01,
+                    -3.583457894275744043e-02,
+                    -1.259197760082061621e-03,
+                    1.008246479193084487e-03,
+                    -1.059401869200098984e-04,
+                    1.721968053146218465e-06,
+                    1.228571871257205572e00,
+                    4.114647496201748883e-02,
+                    3.560738575723638825e-03,
+                    -4.405332425718102457e-04,
+                    -1.251648759618972115e-04,
+                    3.659080417076460655e-05,
+                    -1.475858628153338792e00,
+                    -1.604770750960976822e-01,
+                    1.646639808472218428e-02,
+                    1.932598402043995316e-04,
+                    -2.175904819601363058e-04,
+                    1.230256868634094333e-05,
+                    -2.835634435191126679e-01,
+                    9.765688571984927624e-02,
+                    -2.183734604613508240e-03,
+                    -7.132463811570244078e-04,
+                    2.021887442373574272e-05,
+                    1.321401495096886281e-05,
+                    -1.469711274366155784e00,
+                    -6.341812571665436660e-02,
+                    4.756486470714936521e-03,
+                    8.631384191910702040e-05,
+                    -1.010516500002806932e-05,
+                    -1.110874413279218719e-05,
+                    6.348735101551836735e-01,
+                    8.039610290153098582e-04,
+                    6.375948457075718626e-03,
+                    -1.769074132993461279e-04,
+                    -1.855677150383903214e-04,
+                    3.421271436711027645e-05,
+                    -1.319225739518145257e00,
+                    -5.548207260888919634e-02,
+                    1.268054645200545304e-02,
+                    -7.007297564176242621e-04,
+                    -1.408885818822980523e-04,
+                    3.124701885930576017e-05,
+                    -9.976224235482542557e-01,
+                    6.898332734138989952e-02,
+                    -4.992317635216104131e-03,
+                    -1.891404922064061889e-04,
+                    1.053957535708985289e-04,
+                    -1.089286646983666076e-06,
+                    3.541943058468561834e-01,
+                    -3.585946084769019160e-02,
+                    -1.229013912637771933e-03,
+                    1.004009466262262241e-03,
+                    -1.059129033455631863e-04,
+                    -4.941663399086282537e-06,
+                    1.228983691638902087e00,
+                    4.121755707472917613e-02,
+                    3.547447845420277635e-03,
+                    -4.455036207721562607e-04,
+                    -1.239172256532283074e-04,
+                    3.437341080261359686e-05,
+                    -1.477461752073406132e00,
+                    -1.601476900261984693e-01,
+                    1.647206544856073471e-02,
+                    1.845724864086241608e-04,
+                    -2.173853638475303177e-04,
+                    3.620505631412716563e-05,
+                    -2.825870937484175061e-01,
+                    9.761299713537928413e-02,
+                    -2.205119732548723246e-03,
+                    -7.124245958910824846e-04,
+                    2.074820558303217398e-05,
+                    1.209381466404663338e-05,
+                    -1.470344979888463577e00,
+                    -6.332297013406351649e-02,
+                    4.759069711794740656e-03,
+                    8.589935708505183382e-05,
+                    -1.045842324058424788e-05,
+                    -6.134254562752213537e-06,
+                    6.348821871815598650e-01,
+                    9.314261853726121809e-04,
+                    6.370530236175125580e-03,
+                    -1.842978984547447257e-04,
+                    -1.840210089691990327e-04,
+                    2.234897510077387526e-05,
+                    -1.319779292891724465e00,
+                    -5.522867246076747227e-02,
+                    1.265944033870337014e-02,
+                    -7.063360380236871801e-04,
+                    -1.393416734992873119e-04,
+                    1.931167378610719847e-05,
+                    -9.969330896946905218e-01,
+                    6.888342466806646192e-02,
+                    -4.997928623431705138e-03,
+                    -1.849303524006284602e-04,
+                    1.053651633995249134e-04,
+                    -2.870133904891753420e-05,
+                    3.538355893399378616e-01,
+                    -3.588374034700148041e-02,
+                    -1.198957225773849763e-03,
+                    9.997681359810027708e-04,
+                    -1.060678155548662341e-04,
+                    -4.107776618240329050e-06,
+                    1.229396221507694564e00,
+                    4.128837188660083868e-02,
+                    3.534008730169808672e-03,
+                    -4.504275777948374090e-04,
+                    -1.224778886969254976e-04,
+                    2.455513266683544498e-05,
+                    -1.479061581584721008e00,
+                    -1.598181942132129441e-01,
+                    1.647747255391585064e-02,
+                    1.759082956613747337e-04,
+                    -2.158335508261176197e-04,
+                    6.406725844410341030e-06,
+                    -2.816111850012528728e-01,
+                    9.756868109694678826e-02,
+                    -2.226479900633348240e-03,
+                    -7.115823288942964460e-04,
+                    2.121038517729223415e-05,
+                    1.358027318850170435e-05,
+                    -1.470977733597038872e00,
+                    -6.322776301216057049e-02,
+                    4.761640356162846754e-03,
+                    8.547576468445008296e-05,
+                    -1.081874527005240631e-05,
+                    -8.845528475774308509e-07,
+                    6.348921383103013349e-01,
+                    1.058780765759985421e-03,
+                    6.364891110105044131e-03,
+                    -1.916363332792569681e-04,
+                    -1.827768871456785058e-04,
+                    2.275707291847725182e-05,
+                    -1.320330314380025793e00,
+                    -5.497569611120622923e-02,
+                    1.263816684562326688e-02,
+                    -7.118908987616576157e-04,
+                    -1.380182662155302303e-04,
+                    1.630252530406085050e-05,
+                    -9.962447554247517711e-01,
+                    6.878341103651769428e-02,
+                    -5.003413601927745452e-03,
+                    -1.807403991329658622e-04,
+                    1.040363362483998831e-04,
+                    -4.422604643727719699e-06,
+                    3.534766330394523148e-01,
+                    -3.590741998555346121e-02,
+                    -1.169027863565602274e-03,
+                    9.955202772264954043e-04,
+                    -1.060447700647724903e-04,
+                    -1.021743279826507342e-05,
+                    1.229809458175783687e00,
+                    4.135891644424664892e-02,
+                    3.520422661584679015e-03,
+                    -4.553035794622276055e-04,
+                    -1.210679214963379874e-04,
+                    1.595827246550979495e-05,
+                    -1.480658115605847147e00,
+                    -1.594885928526604546e-01,
+                    1.648262036665308974e-02,
+                    1.672799673730459213e-04,
+                    -2.148155690753495697e-04,
+                    -1.867405535452657550e-06,
+                    -2.806357215496423363e-01,
+                    9.752393810975558408e-02,
+                    -2.247814508535729908e-03,
+                    -7.107227883497464890e-04,
+                    2.207595560206285042e-05,
+                    -1.137331983229785190e-06,
+                    -1.471609534977757372e00,
+                    -6.313250460562676303e-02,
+                    4.764198129054059844e-03,
+                    8.503999275315992160e-05,
+                    -1.072692568096017848e-05,
+                    -1.373273803695183988e-05,
+                    6.349033624136081189e-01,
+                    1.186020367092407990e-03,
+                    6.359032581545111251e-03,
+                    -1.989262833250400370e-04,
+                    -1.812752661309344573e-04,
+                    1.302837915648187095e-05,
+                    -1.320878808237722746e00,
+                    -5.472314689282183064e-02,
+                    1.261672747063919374e-02,
+                    -7.173917679890315846e-04,
+                    -1.373052781380030543e-04,
+                    3.768455339511444900e-05,
+                    -9.955574218354472649e-01,
+                    6.868328895828368363e-02,
+                    -5.008773436308684712e-03,
+                    -1.765844799686671349e-04,
+                    1.034810966435298563e-04,
+                    -1.111176255155353207e-05,
+                    3.531174429312692320e-01,
+                    -3.593050231143132822e-02,
+                    -1.139225984250480384e-03,
+                    9.912704081392112714e-04,
+                    -1.064918174657224404e-04,
+                    2.680738443515978403e-06,
+                    1.230223398925979650e00,
+                    4.142918782293085467e-02,
+                    3.506691073047987512e-03,
+                    -4.601302388532728274e-04,
+                    -1.198865987378785417e-04,
+                    1.656386182477533959e-05,
+                    -1.482251353107205460e00,
+                    -1.591588911206925361e-01,
+                    1.648750985769346228e-02,
+                    1.586901819247656846e-04,
+                    -2.147074421644348298e-04,
+                    2.641762503224190698e-05,
+                    -2.796607076604977760e-01,
+                    9.747876869099537933e-02,
+                    -2.269122958003529523e-03,
+                    -7.098388532529275848e-04,
+                    2.226701915637888804e-05,
+                    1.106237844209756009e-05,
+                    -1.472240383519069384e00,
+                    -6.303719517464229094e-02,
+                    4.766742755353862819e-03,
+                    8.459962202271287246e-05,
+                    -1.132218730142039535e-05,
+                    8.958476322974335592e-07,
+                    6.349158583197994643e-01,
+                    1.313140616388666637e-03,
+                    6.352956158169477396e-03,
+                    -2.061601622854974502e-04,
+                    -1.806298821034440756e-04,
+                    3.770936817966389514e-05,
+                    -1.321424778752664952e00,
+                    -5.447102810827629538e-02,
+                    1.259512371128685033e-02,
+                    -7.228490733933210606e-04,
+                    -1.356407402355522122e-04,
+                    2.099832634320949299e-05,
+                    -9.948710899987588396e-01,
+                    6.858306092758209571e-02,
+                    -5.014008993202081696e-03,
+                    -1.724573933478598642e-04,
+                    1.029144894329912032e-04,
+                    -1.738522780636760158e-05,
+                    3.527580249757622521e-01,
+                    -3.595298987582695727e-02,
+                    -1.109551740263377793e-03,
+                    9.870126155001155040e-04,
+                    -1.064931456292656029e-04,
+                    -2.059910396978558087e-06,
+                    1.230638041011988815e00,
+                    4.149918312660194619e-02,
+                    3.492815399561766294e-03,
+                    -4.649051157564728157e-04,
+                    -1.192927614880224277e-04,
+                    4.072077917749542957e-05,
+                    -1.483841293110880866e00,
+                    -1.588290941739924356e-01,
+                    1.649214200293154520e-02,
+                    1.501282794678792006e-04,
+                    -2.138853834118830831e-04,
+                    2.633111784219914963e-05,
+                    -2.786861475954987011e-01,
+                    9.743317336979973042e-02,
+                    -2.290404652904617314e-03,
+                    -7.089360554728917595e-04,
+                    2.260180638238835256e-05,
+                    1.741828165826791135e-05,
+                    -1.472870278712053782e00,
+                    -6.294183498489253070e-02,
+                    4.769273959660644442e-03,
+                    8.414681093302789892e-05,
+                    -1.142905205912834352e-05,
+                    -4.014065121916994726e-06,
+                    6.349296248136164778e-01,
+                    1.440137170869312810e-03,
+                    6.346663352465874847e-03,
+                    -2.133510744796659759e-04,
+                    -1.788513201196447670e-04,
+                    1.721163944875696416e-05,
+                    -1.321968230245579967e00,
+                    -5.421934303028537461e-02,
+                    1.257335706466754244e-02,
+                    -7.282542863230233527e-04,
+                    -1.343059033644905889e-04,
+                    1.747822893445653714e-05,
+                    -9.941857609618123259e-01,
+                    6.848272942128874607e-02,
+                    -5.019121140152461337e-03,
+                    -1.683596869525186377e-04,
+                    1.024142382012053007e-04,
+                    -2.632719129544749384e-05,
+                    3.523983851077774343e-01,
+                    -3.597488523292310947e-02,
+                    -1.080005278271846739e-03,
+                    9.827512175914082399e-04,
+                    -1.066680880078371994e-04,
+                    3.403258606315080555e-07,
+                    1.231053381658700818e00,
+                    4.156889948792314576e-02,
+                    3.478797077596604108e-03,
+                    -4.696409807358484993e-04,
+                    -1.173636798436718986e-04,
+                    1.149931408689037458e-05,
+                    -1.485427934690428442e00,
+                    -1.584992071496764965e-01,
+                    1.649651778315383566e-02,
+                    1.415960091521040870e-04,
+                    -2.125888038426753843e-04,
+                    7.384582528889821378e-06,
+                    -2.777120456109742896e-01,
+                    9.738715268720327112e-02,
+                    -2.311658999267464203e-03,
+                    -7.080165982958596923e-04,
+                    2.340034491729013294e-05,
+                    5.174033942788913380e-06,
+                    -1.473499220050474623e00,
+                    -6.284642430757329812e-02,
+                    4.771791466347353149e-03,
+                    8.368540130389298475e-05,
+                    -1.162498575113560591e-05,
+                    -5.381585801785509468e-06,
+                    6.349446606365225509e-01,
+                    1.567005718051586727e-03,
+                    6.340155681555815353e-03,
+                    -2.204854663573854625e-04,
+                    -1.779502948888764897e-04,
+                    3.196283450610521294e-05,
+                    -1.322509167069771951e00,
+                    -5.396809490162747525e-02,
+                    1.255142902735281209e-02,
+                    -7.336077414823606981e-04,
+                    -1.332538502428148267e-04,
+                    2.525523713666122703e-05,
+                    -9.935014357470516311e-01,
+                    6.838229689892011409e-02,
+                    -5.024110745516051704e-03,
+                    -1.642860423419652261e-04,
+                    1.011792892256958577e-04,
+                    -5.902237032851650630e-06,
+                    3.520385292366049468e-01,
+                    -3.599619093977864809e-02,
+                    -1.050586739210998023e-03,
+                    9.784837539753422735e-04,
+                    -1.066187407206570670e-04,
+                    -6.052991441884039902e-06,
+                    1.231469418062474341e00,
+                    4.163833406830096812e-02,
+                    3.464637544942418459e-03,
+                    -4.743218246565151001e-04,
+                    -1.164951133813105271e-04,
+                    2.473911917278243621e-05,
+                    -1.487011276970676033e00,
+                    -1.581692351651968476e-01,
+                    1.650063818395723983e-02,
+                    1.331001312464952355e-04,
+                    -2.118074389246019866e-04,
+                    9.192428068946771109e-06,
+                    -2.767384059577842614e-01,
+                    9.734070719609828892e-02,
+                    -2.332885405321092481e-03,
+                    -7.070743922828596519e-04,
+                    2.373777250910882265e-05,
+                    1.127700884024945933e-05,
+                    -1.474127207030835107e00,
+                    -6.275096341939470634e-02,
+                    4.774294999622533293e-03,
+                    8.321347296773265077e-05,
+                    -1.162225195759229858e-05,
+                    -1.468175407624093560e-05,
+                    6.349609644870094494e-01,
+                    1.693741975839754832e-03,
+                    6.333434667015966531e-03,
+                    -2.275719866012916918e-04,
+                    -1.766077012712487378e-04,
+                    2.919052022666632077e-05,
+                    -1.323047593610823247e00,
+                    -5.371728693515605280e-02,
+                    1.252934109528984138e-02,
+                    -7.389107006611626187e-04,
+                    -1.322992615601379437e-04,
+                    3.689337377145077536e-05,
+                    -9.928181153524118230e-01,
+                    6.828176580261838269e-02,
+                    -5.028978678356570489e-03,
+                    -1.602449667799085492e-04,
+                    1.004819833385002965e-04,
+                    -7.012859043909368637e-06,
+                    3.516784632459502014e-01,
+                    -3.601690955621394963e-02,
+                    -1.021296258318379370e-03,
+                    9.742140050919662845e-04,
+                    -1.068837890347894775e-04,
+                    3.261791903209577241e-07,
+                    1.231886147391427544e00,
+                    4.170748405790913882e-02,
+                    3.450338240560582581e-03,
+                    -4.789562532735843967e-04,
+                    -1.153902983973557932e-04,
+                    2.856018069496295048e-05,
+                    -1.488591319127526624e00,
+                    -1.578391833182464787e-01,
+                    1.650450419566778376e-02,
+                    1.246407552546250339e-04,
+                    -2.115332183818513349e-04,
+                    3.149345367837511192e-05,
+                    -2.757652328811996956e-01,
+                    9.729383746118988596e-02,
+                    -2.354083281534554220e-03,
+                    -7.061133365182417328e-04,
+                    2.418809213597686327e-05,
+                    1.280494807360028992e-05,
+                    -1.474754239152433311e00,
+                    -6.265545260258377491e-02,
+                    4.776784283590801948e-03,
+                    8.273687806363864625e-05,
+                    -1.229952261449745124e-05,
+                    3.204146150058887708e-06,
+                    6.349785350208994039e-01,
+                    1.820341692612803541e-03,
+                    6.326501834700739083e-03,
+                    -2.346100929840904846e-04,
+                    -1.748840426396014729e-04,
+                    1.130785525935554482e-05,
+                    -1.323583514286295282e00,
+                    -5.346692231381247606e-02,
+                    1.250709476370755191e-02,
+                    -7.441705970339035966e-04,
+                    -1.303302437099287372e-04,
+                    7.935577538626925858e-06,
+                    -9.921358007514943234e-01,
+                    6.818113855713830995e-02,
+                    -5.033725808341922223e-03,
+                    -1.562353718150353687e-04,
+                    1.001568149392305130e-04,
+                    -2.302258383924021595e-05,
+                    3.513181929939074299e-01,
+                    -3.603704364469759169e-02,
+                    -9.921339651685744804e-04,
+                    9.699384566370250092e-04,
+                    -1.069081013817698415e-04,
+                    -2.744679484186812129e-06,
+                    1.232303566785723392e00,
+                    4.177634667571154814e-02,
+                    3.435900604437185177e-03,
+                    -4.835440426346156498e-04,
+                    -1.140781768005934266e-04,
+                    2.411509316948267986e-05,
+                    -1.490168060387760951e00,
+                    -1.575090566866652331e-01,
+                    1.650811681325956015e-02,
+                    1.162064642248029450e-04,
+                    -2.100324946396962247e-04,
+                    4.868837971279583202e-06,
+                    -2.747925306207861240e-01,
+                    9.724654405895133413e-02,
+                    -2.375252040655950400e-03,
+                    -7.051355614741510987e-04,
+                    2.505903781065493165e-05,
+                    -2.569082101323676566e-06,
+                    -1.475380315917416585e00,
+                    -6.255989214488603956e-02,
+                    4.779259042312647421e-03,
+                    8.224491253736542200e-05,
+                    -1.205054378062991984e-05,
+                    -1.594987943813344381e-05,
+                    6.349973708516511994e-01,
+                    1.946800647308156995e-03,
+                    6.319358714566076195e-03,
+                    -2.415904693897710526e-04,
+                    -1.741570105122868483e-04,
+                    3.342152683043006766e-05,
+                    -1.324116933545430141e00,
+                    -5.321700419064152865e-02,
+                    1.248469152702344660e-02,
+                    -7.493727578058629766e-04,
+                    -1.295525827398787404e-04,
+                    2.659942231629285135e-05,
+                    -9.914544928937398804e-01,
+                    6.808041756983601589e-02,
+                    -5.038353005641925050e-03,
+                    -1.522500103683389601e-04,
+                    9.911425811568465554e-05,
+                    -1.035676665958809070e-05,
+                    3.509577243129330393e-01,
+                    -3.605659577023319351e-02,
+                    -9.630999837076988784e-04,
+                    9.656594578503095369e-04,
+                    -1.070158919994286978e-04,
+                    -2.281503112307771063e-06,
+                    1.232721673357858538e00,
+                    4.184491916948063911e-02,
+                    3.421326077437690516e-03,
+                    -4.880823132679394552e-04,
+                    -1.129872290747681817e-04,
+                    2.854952342195995698e-05,
+                    -1.491741500028839651e00,
+                    -1.571788603283475749e-01,
+                    1.651147703627379656e-02,
+                    1.078118218043548068e-04,
+                    -2.094656285123614196e-04,
+                    1.573608604543182341e-05,
+                    -2.738203034102859035e-01,
+                    9.719882757757769554e-02,
+                    -2.396391097750961291e-03,
+                    -7.041328812172977002e-04,
+                    2.511128111671661627e-05,
+                    1.472819566023977703e-05,
+                    -1.476005436830838402e00,
+                    -6.246428233956573262e-02,
+                    4.781718999863710830e-03,
+                    8.175246233396933941e-05,
+                    -1.310850420537104008e-05,
+                    1.717274673157189222e-05,
+                    6.350174705506670403e-01,
+                    2.073114649501703322e-03,
+                    6.312006840494438151e-03,
+                    -2.485262001215581039e-04,
+                    -1.724445833892894095e-04,
+                    1.623821996891234705e-05,
+                    -1.324647855868849478e00,
+                    -5.296753568880858964e-02,
+                    1.246213287875118370e-02,
+                    -7.545274547770323926e-04,
+                    -1.284298383236558551e-04,
+                    3.142127009671183137e-05,
+                    -9.907741927046019859e-01,
+                    6.797960523066012839e-02,
+                    -5.042861140826992473e-03,
+                    -1.482946605870891395e-04,
+                    9.821987974303589589e-05,
+                    -3.593831829470692349e-06,
+                    3.505970630098214080e-01,
+                    -3.607556850024738748e-02,
+                    -9.341944322877257512e-04,
+                    9.613773761737330267e-04,
+                    -1.072343182304808093e-04,
+                    2.791451096706449119e-06,
+                    1.233140464192951757e00,
+                    4.191319881581374862e-02,
+                    3.406616101162745613e-03,
+                    -4.925758895926437772e-04,
+                    -1.113902906060245713e-04,
+                    1.275308331152581608e-05,
+                    -1.493311637378700762e00,
+                    -1.568485992811522733e-01,
+                    1.651458586873823589e-02,
+                    9.944841367174414462e-05,
+                    -2.085492230796830474e-04,
+                    1.276456024245067926e-05,
+                    -2.728485554775001987e-01,
+                    9.715068861693920699e-02,
+                    -2.417499870240937074e-03,
+                    -7.031148500958378164e-04,
+                    2.576543833825076558e-05,
+                    7.841889896124507091e-06,
+                    -1.476629601400710978e00,
+                    -6.236862348540499201e-02,
+                    4.784163880393361643e-03,
+                    8.124213252544174404e-05,
+                    -1.286332078849730127e-05,
+                    -1.821996546344873330e-06,
+                    6.350388326475970846e-01,
+                    2.199279539485121671e-03,
+                    6.304447750121061969e-03,
+                    -2.554047701160370044e-04,
+                    -1.716061813901302753e-04,
+                    3.413524324276134592e-05,
+                    -1.325176285768258300e00,
+                    -5.271851990161838253e-02,
+                    1.243942031140890699e-02,
+                    -7.596346042592860793e-04,
+                    -1.269803855069738714e-04,
+                    2.314478643438959578e-05,
+                    -9.900949010857222898e-01,
+                    6.787870391214460841e-02,
+                    -5.047251084767826433e-03,
+                    -1.443753107913585767e-04,
+                    9.837034053479728221e-05,
+                    -3.865274593462701621e-05,
+                    3.502362148656810170e-01,
+                    -3.609396440447816545e-02,
+                    -9.054174237006253068e-04,
+                    9.570894530963515055e-04,
+                    -1.071221722792567601e-04,
+                    -5.180134097885568801e-06,
+                    1.233559936349031494e00,
+                    4.198118292014653419e-02,
+                    3.391772117805412056e-03,
+                    -4.970162819604460663e-04,
+                    -1.105584293158747960e-04,
+                    2.757032189173095048e-05,
+                    -1.494878471815561216e00,
+                    -1.565182785628131401e-01,
+                    1.651744431908664865e-02,
+                    9.112268062696188113e-05,
+                    -2.082277461664644284e-04,
+                    3.370820636496137736e-05,
+                    -2.718772910441742408e-01,
+                    9.710212778853387350e-02,
+                    -2.438577777940475859e-03,
+                    -7.020756635958485484e-04,
+                    2.613933618298708639e-05,
+                    1.211520684095310762e-05,
+                    -1.477252809138063672e00,
+                    -6.227291588670166161e-02,
+                    4.786593408182711167e-03,
+                    8.072392747742672100e-05,
+                    -1.281499371544444526e-05,
+                    -1.293175202324119235e-05,
+                    6.350614556306495295e-01,
+                    2.325291188338546311e-03,
+                    6.296682984661446623e-03,
+                    -2.622362895631248896e-04,
+                    -1.701076322674243866e-04,
+                    2.573454296903621253e-05,
+                    -1.325702227786145437e00,
+                    -5.246995989253622206e-02,
+                    1.241655531642829255e-02,
+                    -7.646904682589584622e-04,
+                    -1.257704658362481128e-04,
+                    2.439373356208127567e-05,
+                    -9.894166189151047952e-01,
+                    6.777771596940393439e-02,
+                    -5.051523708536139086e-03,
+                    -1.404733355821404265e-04,
+                    9.677082285072928253e-05,
+                    -3.720510878458014501e-06,
+                    3.498751856359115786e-01,
+                    -3.611178605486395354e-02,
+                    -8.767690652124425499e-04,
+                    9.527998576480508275e-04,
+                    -1.072771816869139909e-04,
+                    -2.281376475091892258e-06,
+                    1.233980086857325631e00,
+                    4.204886881676297983e-02,
+                    3.376795570009583514e-03,
+                    -5.014114486109571937e-04,
+                    -1.092957353261917852e-04,
+                    2.516456964431257380e-05,
+                    -1.496442002767713664e00,
+                    -1.561879031708521548e-01,
+                    1.652005340007862977e-02,
+                    8.282284133744905071e-05,
+                    -2.067123325224875000e-04,
+                    7.057486539657783089e-06,
+                    -2.709065143258797548e-01,
+                    9.705314571543909030e-02,
+                    -2.459624243094573216e-03,
+                    -7.010187162791577066e-04,
+                    2.672975399789282626e-05,
+                    7.629793933874534523e-06,
+                    -1.477875059556995385e00,
+                    -6.217715985326619649e-02,
+                    4.789007307701962507e-03,
+                    8.019935829649041371e-05,
+                    -1.318861260046749971e-05,
+                    -7.150339348059032240e-06,
+                    6.350853379468965887e-01,
+                    2.451145498001100487e-03,
+                    6.288714088740080324e-03,
+                    -2.690159202421790068e-04,
+                    -1.686584359429067433e-04,
+                    1.941481480743946700e-05,
+                    -1.326225686495484890e00,
+                    -5.222185869521017709e-02,
+                    1.239353938406437261e-02,
+                    -7.696964132049412353e-04,
+                    -1.246012242240120604e-04,
+                    2.724071141974432252e-05,
+                    -9.887393470472876089e-01,
+                    6.767664374012982709e-02,
+                    -5.055679883306329545e-03,
+                    -1.366074591188833347e-04,
+                    9.623033677044332457e-05,
+                    -1.113456896173822779e-05,
+                    3.495139810501832756e-01,
+                    -3.612903602543367232e-02,
+                    -8.482494585971035728e-04,
+                    9.485064841097947883e-04,
+                    -1.073561607316583907e-04,
+                    -2.239996380309942211e-06,
+                    1.234400912722548371e00,
+                    4.211625386880359784e-02,
+                    3.361687900729734210e-03,
+                    -5.057597926077623488e-04,
+                    -1.078411892315765344e-04,
+                    1.508800592977199686e-05,
+                    -1.498002229713325750e00,
+                    -1.558574780824932282e-01,
+                    1.652241412871961052e-02,
+                    7.456368677257522147e-05,
+                    -2.062001731191939454e-04,
+                    2.069621557469772063e-05,
+                    -2.699362295319003291e-01,
+                    9.700374303226286243e-02,
+                    -2.480638690415259105e-03,
+                    -6.999405672986690023e-04,
+                    2.700789474676622474e-05,
+                    1.556143061449123430e-05,
+                    -1.478496352174730522e00,
+                    -6.208135570041733303e-02,
+                    4.791405303667145565e-03,
+                    7.966538051836852740e-05,
+                    -1.352687841609079228e-05,
+                    -2.789411930543395566e-06,
+                    6.351104780025849106e-01,
+                    2.576838401336829787e-03,
+                    6.280542610220480118e-03,
+                    -2.757414391158645754e-04,
+                    -1.675762649448408429e-04,
+                    2.787462665161048641e-05,
+                    -1.326746666499438287e00,
+                    -5.197421931349595348e-02,
+                    1.237037400330611749e-02,
+                    -7.746541492504023475e-04,
+                    -1.232228491818352083e-04,
+                    2.166599538617633252e-05,
+                    -9.880630863135209108e-01,
+                    6.757548954459043078e-02,
+                    -5.059720480258220535e-03,
+                    -1.327693574508429343e-04,
+                    9.550030312894054513e-05,
+                    -1.096549240339310371e-05,
+                    3.491526068124157778e-01,
+                    -3.614571689219699124e-02,
+                    -8.198587001702131727e-04,
+                    9.442100079790295610e-04,
+                    -1.074330339280879455e-04,
+                    -2.103241190440061311e-06,
+                    1.234822410923189784e00,
+                    4.218333546826981417e-02,
+                    3.346450553092000530e-03,
+                    -5.100549148199152614e-04,
+                    -1.071543306169886722e-04,
+                    3.572075491055831030e-05,
+                    -1.499559152180234056e00,
+                    -1.555270082545787691e-01,
+                    1.652452752618108200e-02,
+                    6.633607063542407416e-05,
+                    -2.052990867644106118e-04,
+                    1.891505702101457936e-05,
+                    -2.689664408651156746e-01,
+                    9.695392038509384469e-02,
+                    -2.501620547117759490e-03,
+                    -6.988464710389351081e-04,
+                    2.774961528830105395e-05,
+                    4.843681010028069226e-06,
+                    -1.479116686511674494e00,
+                    -6.198550374897651011e-02,
+                    4.793787121096219732e-03,
+                    7.912045955652986253e-05,
+                    -1.359696279035538403e-05,
+                    -9.132339849453571562e-06,
+                    6.351368741634448867e-01,
+                    2.702365862198193025e-03,
+                    6.272170100036473551e-03,
+                    -2.824171711189519380e-04,
+                    -1.661976899287730559e-04,
+                    2.457347650017094835e-05,
+                    -1.327265172431057128e00,
+                    -5.172704472148267896e-02,
+                    1.234706066178771662e-02,
+                    -7.795630288411945592e-04,
+                    -1.217395799935142969e-04,
+                    1.184741714306808905e-05,
+                    -9.873878375219384829e-01,
+                    6.747425568563097942e-02,
+                    -5.063646370480812467e-03,
+                    -1.289626891970745083e-04,
+                    9.513074838211379970e-05,
+                    -2.521433322545949321e-05,
+                    3.487910686007592576e-01,
+                    -3.616183123303555458e-02,
+                    -7.915968808226425679e-04,
+                    9.399119246579864433e-04,
+                    -1.077055728285351480e-04,
+                    6.031191175422362627e-06,
+                    1.235244578411804905e00,
+                    4.225011103602600848e-02,
+                    3.331084970256580589e-03,
+                    -5.143079026275864784e-04,
+                    -1.055716785023949844e-04,
+                    2.051193936812822612e-05,
+                    -1.501112769745742259e00,
+                    -1.551964986234863897e-01,
+                    1.652639461772111712e-02,
+                    5.814089462644928566e-05,
+                    -2.041249358339155683e-04,
+                    6.311073191969795411e-06,
+                    -2.679971525218879380e-01,
+                    9.690367843145115956e-02,
+                    -2.522569242956208650e-03,
+                    -6.977319783847560700e-04,
+                    2.827424678587480721e-05,
+                    2.739673941330651616e-06,
+                    -1.479736062091468574e00,
+                    -6.188960432526132566e-02,
+                    4.796152485364500034e-03,
+                    7.856828747830194362e-05,
+                    -1.395147193446202365e-05,
+                    -4.087221013031299888e-06,
+                    6.351645247550001816e-01,
+                    2.827723875485507743e-03,
+                    6.263598112024793517e-03,
+                    -2.890409134869928735e-04,
+                    -1.648390823803598971e-04,
+                    2.215887759642637032e-05,
+                    -1.327781208952985015e00,
+                    -5.148033786352124164e-02,
+                    1.232360084570068709e-02,
+                    -7.844171563535663055e-04,
+                    -1.210428935521009746e-04,
+                    3.344327592646507844e-05,
+                    -9.867136014577331249e-01,
+                    6.737294444867666932e-02,
+                    -5.067458424877044516e-03,
+                    -1.251812701937470213e-04,
+                    9.419473244264059593e-05,
+                    -1.679002076268449654e-05,
+                    3.484293720675762929e-01,
+                    -3.617738162759492893e-02,
+                    -7.634640860539731316e-04,
+                    9.356082122653546981e-04,
+                    -1.075431084112703954e-04,
+                    -3.044614041061100766e-06,
+                    1.235667412115300623e00,
+                    4.231657802179918798e-02,
+                    3.315592595281378029e-03,
+                    -5.185116053649769336e-04,
+                    -1.041674655671950871e-04,
+                    1.242766263135090892e-05,
+                    -1.502663082036415076e00,
+                    -1.548659541050484978e-01,
+                    1.652801643260504508e-02,
+                    4.998556989557471122e-05,
+                    -2.037688261998792680e-04,
+                    2.657243869390409541e-05,
+                    -2.670283686919466826e-01,
+                    9.685301784023310490e-02,
+                    -2.543484210258855835e-03,
+                    -6.965966582328896994e-04,
+                    2.850491087748043708e-05,
+                    1.232179636112698650e-05,
+                    -1.480354478441044286e00,
+                    -6.179365776107784841e-02,
+                    4.798501122259496952e-03,
+                    7.800586916120723585e-05,
+                    -1.413851691566035862e-05,
+                    -5.727587674967719880e-06,
+                    6.351934280628791507e-01,
+                    2.952908467203564646e-03,
+                    6.254828202758994093e-03,
+                    -2.956111985445306826e-04,
+                    -1.636502852942454153e-04,
+                    2.616921494951480123e-05,
+                    -1.328294780757159899e00,
+                    -5.123410165425365537e-02,
+                    1.229999603970671068e-02,
+                    -7.892274520450543677e-04,
+                    -1.195721301312790567e-04,
+                    2.454197033093738297e-05,
+                    -9.860403788833298488e-01,
+                    6.727155810173718331e-02,
+                    -5.071157514069617352e-03,
+                    -1.214296539729165295e-04,
+                    9.340570341953608358e-05,
+                    -1.444050153586573228e-05,
+                    3.480675228394242149e-01,
+                    -3.619237065717702262e-02,
+                    -7.354603960058733389e-04,
+                    9.313051737393654526e-04,
+                    -1.076930273455606579e-04,
+                    -7.696053039474192446e-07,
+                    1.236090908935226107e00,
+                    4.238273390417521269e-02,
+                    3.299974870987111650e-03,
+                    -5.226642260988254756e-04,
+                    -1.032474625011560351e-04,
+                    2.396475265799989632e-05,
+                    -1.504210088727871764e00,
+                    -1.545353795944727493e-01,
+                    1.652939400402650763e-02,
+                    4.186078937618800693e-05,
+                    -2.027012231708198600e-04,
+                    1.761148452766873776e-05,
+                    -2.660600935582757565e-01,
+                    9.680193929166537592e-02,
+                    -2.564364883962782712e-03,
+                    -6.954454205710857090e-04,
+                    2.907017700829073683e-05,
+                    9.120785771591908463e-06,
+                    -1.480971935090678926e00,
+                    -6.169766439371183325e-02,
+                    4.800832758035045861e-03,
+                    7.743502257440657043e-05,
+                    -1.440171540732098418e-05,
+                    -4.489324897938611976e-06,
+                    6.355509554770921721e-01,
+                    4.194364255265300989e-03,
+                    6.156587518227093006e-03,
+                    -3.584539136959086518e-04,
+                    -1.505562336471176987e-04,
+                    2.631189526673375584e-05,
+                    -1.333295991901433553e00,
+                    -4.879824528740911438e-02,
+                    1.205629889598585497e-02,
+                    -8.346035033896359156e-04,
+                    -1.072962342948566929e-04,
+                    2.412331753624817981e-05,
+                    -9.793640468817854661e-01,
+                    6.625405011186732973e-02,
+                    -5.102126473064734317e-03,
+                    -8.551069374443776396e-05,
+                    8.618032279329005427e-05,
+                    -1.422030758858379208e-05,
+                    3.444418516979214084e-01,
+                    -3.631195473807800889e-02,
+                    -4.625381215785304145e-04,
+                    8.881537622047225473e-04,
+                    -1.080757789189670570e-04,
+                    5.820590714360855199e-08,
+                    1.240361649325028681e00,
+                    4.302664794411619614e-02,
+                    3.137220402938139478e-03,
+                    -5.615677039256951981e-04,
+                    -9.125763978623760322e-05,
+                    2.367398552885374808e-05,
+                    -1.519498310980496925e00,
+                    -1.512290469691385253e-01,
+                    1.652996628226939199e-02,
+                    -3.745688059096337011e-05,
+                    -1.938906911473592626e-04,
+                    1.811217640451412989e-05,
+                    -2.564062357251438717e-01,
+                    9.626832379335603651e-02,
+                    -2.771163091665611831e-03,
+                    -6.829069315554202020e-04,
+                    3.363238372709415958e-05,
+                    8.623099725596635004e-06,
+                    -1.487093617252511990e00,
+                    -6.073523464295225993e-02,
+                    4.823154268625621383e-03,
+                    7.122599345182346051e-05,
+                    -1.664931178025436733e-05,
+                    -4.312450972708557703e-06,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE0)
+            .reshape([8, 132])
+        )
+        # always on cpu
+        self.table_info_tensor = paddle.to_tensor(
+            [0, 0.2, 0.4, 0.01, 0.1, -1], dtype=dtype
+        ).to(device="cpu")
+        self.em_x_tensor = (
+            paddle.to_tensor(
+                [
+                    0.0343909,
+                    0.11357423,
+                    0.0858676,
+                    0.19337772,
+                    0.1935728,
+                    0.0477744,
+                    0.05845198,
+                    0.19080509,
+                    0.16111261,
+                    0.07179262,
+                    0.10078013,
+                    0.04640909,
+                    0.10433399,
+                    0.15650861,
+                    0.17527857,
+                    0.04249097,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 4])
+        )
+        self.em_tensor = (
+            paddle.to_tensor(
+                [
+                    0.0343909,
+                    0.08394249,
+                    0.06791791,
+                    0.00903334,
+                    0.11357423,
+                    0.10597251,
+                    0.05738069,
+                    0.10071109,
+                    0.0858676,
+                    0.17410445,
+                    0.05390256,
+                    0.09495758,
+                    0.19337772,
+                    0.02045487,
+                    0.04095526,
+                    0.18431305,
+                    0.1935728,
+                    0.03930614,
+                    0.0304133,
+                    0.15261676,
+                    0.0477744,
+                    0.06838737,
+                    0.12824902,
+                    0.14125861,
+                    0.05845198,
+                    0.12731053,
+                    0.0315968,
+                    0.14927774,
+                    0.19080509,
+                    0.19206871,
+                    0.14361383,
+                    0.04083437,
+                    0.16111261,
+                    0.19944826,
+                    0.16563484,
+                    0.00797179,
+                    0.07179262,
+                    0.16993159,
+                    0.01834742,
+                    0.08405,
+                    0.10078013,
+                    0.0773945,
+                    0.09541813,
+                    0.0042979,
+                    0.04640909,
+                    0.07968697,
+                    0.18046262,
+                    0.11724063,
+                    0.10433399,
+                    0.16910201,
+                    0.10653732,
+                    0.07434702,
+                    0.15650861,
+                    0.0350976,
+                    0.04088021,
+                    0.15753491,
+                    0.17527857,
+                    0.03178642,
+                    0.01599623,
+                    0.08095053,
+                    0.04249097,
+                    0.17082205,
+                    0.18275348,
+                    0.02921504,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 4, 4])
+        )
+        self.table_info_tensor.stop_gradient = False
+        self.table_tensor.stop_gradient = False
+        self.em_x_tensor.stop_gradient = False
+        self.em_tensor.stop_gradient = False
+        self.last_layer_size = 8
+        self.nloc = 4
+        self.nnei = 4
+        # forward test
+        self.expected_descriptor_tensor = (
+            paddle.to_tensor(
+                [
+                    0.2713010991854039,
+                    -0.5660628160978955,
+                    -0.4230503961233804,
+                    0.14965802865129818,
+                    0.5269537220240132,
+                    -0.6384566368739288,
+                    -0.11624505007495309,
+                    -0.6310320354395895,
+                    0.24412212410338252,
+                    -0.5084222360348541,
+                    -0.3820314749241062,
+                    0.1353770997654753,
+                    0.4734379786819688,
+                    -0.5715862139964242,
+                    -0.10647548073978085,
+                    -0.5667128671678037,
+                    0.13979393629121145,
+                    -0.2912360135099118,
+                    -0.21862062309471242,
+                    0.0774457356172342,
+                    0.2711843179288344,
+                    -0.3276148559472541,
+                    -0.06077287203673235,
+                    -0.3246349003705672,
+                    0.24704819325244173,
+                    -0.51555848202734,
+                    -0.3850932628970095,
+                    0.1362072061097241,
+                    0.4799221442877444,
+                    -0.5816835832792213,
+                    -0.10566161861294662,
+                    -0.5747363397856997,
+                    0.3115883382215896,
+                    -0.6506883878140057,
+                    -0.4850147890277097,
+                    0.1714325837291713,
+                    0.6056532652377606,
+                    -0.7350680305117758,
+                    -0.13233106208913875,
+                    -0.7254159568199746,
+                    0.2712100312735679,
+                    -0.5656668988983458,
+                    -0.423180345296733,
+                    0.14974857024944524,
+                    0.5266242202317545,
+                    -0.6376329980619129,
+                    -0.11658482011618507,
+                    -0.6305841995062695,
+                    0.21202134995532176,
+                    -0.44212972830581004,
+                    -0.33094666630320135,
+                    0.1171304680138136,
+                    0.4116282973058779,
+                    -0.49821290159157117,
+                    -0.0913087049986295,
+                    -0.49286515716995555,
+                    0.3073352620583199,
+                    -0.6405463996728692,
+                    -0.48022213268980996,
+                    0.17004692071229927,
+                    0.5964092995598747,
+                    -0.7211172753937779,
+                    -0.13304346833797515,
+                    -0.7140277774981623,
+                    0.24135931626467969,
+                    -0.5031638848589246,
+                    -0.3769990721972972,
+                    0.13347531041756877,
+                    0.4684615467688112,
+                    -0.5666465173184999,
+                    -0.10429212140716522,
+                    -0.5608812343251992,
+                    0.33429479916822996,
+                    -0.6966906185519567,
+                    -0.5224684150875668,
+                    0.1850292768610088,
+                    0.6486770724987723,
+                    -0.7841702188445613,
+                    -0.14487244407008348,
+                    -0.7765953436864135,
+                    0.2920023645166421,
+                    -0.6084066648243805,
+                    -0.45656556423057065,
+                    0.16172245683335545,
+                    0.5665031788673434,
+                    -0.68453036136064,
+                    -0.12681615685082662,
+                    -0.6781799312012713,
+                    0.1355913887851541,
+                    -0.28210651239855183,
+                    -0.2125861375354654,
+                    0.07539812279917796,
+                    0.2627430315734105,
+                    -0.3166083947691034,
+                    -0.05968776068495512,
+                    -0.314435441368215,
+                    0.3039443167162163,
+                    -0.6342831095043088,
+                    -0.4738141723760223,
+                    0.16759395880105882,
+                    0.5904373874978512,
+                    -0.7155951337421371,
+                    -0.1300380195196118,
+                    -0.7070835391843331,
+                    0.25830471641609376,
+                    -0.5379623982551216,
+                    -0.40421268847222747,
+                    0.14323456063074608,
+                    0.5009475712655791,
+                    -0.6048126036159627,
+                    -0.11264425943960855,
+                    -0.5996424258133577,
+                    0.21979686345452815,
+                    -0.45763436166403704,
+                    -0.34413141738362185,
+                    0.12197403142496444,
+                    0.426168976528326,
+                    -0.5142565732600977,
+                    -0.0960985398529971,
+                    -0.5100982631949256,
+                    0.21721660807904586,
+                    -0.45326448307918804,
+                    -0.3386650653893315,
+                    0.1197985132761583,
+                    0.4219360127187033,
+                    -0.5113040538633717,
+                    -0.09300044948770746,
+                    -0.5052854291543631,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 8])
+            .to(device=env.DEVICE)
+        )
+        # backward test
+        self.expected_dy_dem_x = (
+            paddle.to_tensor(
+                [
+                    -0.02067741234134639,
+                    -0.037876115867122244,
+                    -0.041801992795897414,
+                    -0.04158797219225682,
+                    -0.03938577535008901,
+                    -0.04047080940333281,
+                    -0.03819691803756371,
+                    -0.05383372190821694,
+                    -0.051795083742471035,
+                    -0.03552707650567376,
+                    -0.02812172878706858,
+                    -0.044512948316127884,
+                    -0.04586229371985228,
+                    -0.037943692770837076,
+                    -0.02917727398183635,
+                    -0.04478649455427308,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4])
+            .to(device=env.DEVICE)
+        )
+        self.expected_dy_dem = (
+            paddle.to_tensor(
+                [
+                    -3.3296560873139764,
+                    -3.3296560873139764,
+                    -3.3296560873139764,
+                    -3.3296560873139764,
+                    -3.337818861718732,
+                    -3.337818861718732,
+                    -3.337818861718732,
+                    -3.337818861718732,
+                    -3.33501295970411,
+                    -3.33501295970411,
+                    -3.33501295970411,
+                    -3.33501295970411,
+                    -3.345599737642763,
+                    -3.345599737642763,
+                    -3.345599737642763,
+                    -3.345599737642763,
+                    -3.3456182126661695,
+                    -3.3456182126661695,
+                    -3.3456182126661695,
+                    -3.3456182126661695,
+                    -3.33106684078773,
+                    -3.33106684078773,
+                    -3.33106684078773,
+                    -3.33106684078773,
+                    -3.33218327918215,
+                    -3.33218327918215,
+                    -3.33218327918215,
+                    -3.33218327918215,
+                    -3.3453558489853616,
+                    -3.3453558489853616,
+                    -3.3453558489853616,
+                    -3.3453558489853616,
+                    -3.3425075397870057,
+                    -3.3425075397870057,
+                    -3.3425075397870057,
+                    -3.3425075397870057,
+                    -3.333566847126196,
+                    -3.333566847126196,
+                    -3.333566847126196,
+                    -3.333566847126196,
+                    -3.336529893308974,
+                    -3.336529893308974,
+                    -3.336529893308974,
+                    -3.336529893308974,
+                    -3.330923503981002,
+                    -3.330923503981002,
+                    -3.330923503981002,
+                    -3.330923503981002,
+                    -3.3368890892700986,
+                    -3.3368890892700986,
+                    -3.3368890892700986,
+                    -3.3368890892700986,
+                    -3.3420603756052665,
+                    -3.3420603756052665,
+                    -3.3420603756052665,
+                    -3.3420603756052665,
+                    -3.343874115987605,
+                    -3.343874115987605,
+                    -3.343874115987605,
+                    -3.343874115987605,
+                    -3.330511428849272,
+                    -3.330511428849272,
+                    -3.330511428849272,
+                    -3.330511428849272,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 4])
+            .to(device=env.DEVICE)
+        )
+
+    def test_forward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_a(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the shape
+        self.assertEqual(descriptor_tensor.shape, self.expected_descriptor_tensor.shape)
+
+        # Check the values
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+    def test_backward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_a(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the forward
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        # Create a loss and perform backward
+        loss = descriptor_tensor.sum()
+        loss.backward()
+
+        # Check gradients
+        self.assertIsNotNone(self.em_x_tensor.grad)
+        self.assertIsNotNone(self.em_tensor.grad)
+
+        # Check the shapes of the gradients
+        self.assertEqual(self.em_x_tensor.grad.shape, self.expected_dy_dem_x.shape)
+        self.assertEqual(self.em_tensor.grad.shape, self.expected_dy_dem.shape)
+
+        # Check the values of the gradients
+        np.testing.assert_allclose(
+            self.em_x_tensor.grad.numpy(),
+            self.expected_dy_dem_x.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        np.testing.assert_allclose(
+            self.em_tensor.grad.numpy(),
+            self.expected_dy_dem.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_tabulate_fusion_se_atten.py b/source/tests/pd/test_tabulate_fusion_se_atten.py
new file mode 100644
index 0000000000..a7322862c8
--- /dev/null
+++ b/source/tests/pd/test_tabulate_fusion_se_atten.py
@@ -0,0 +1,1654 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ..consistent.common import (
+    parameterized,
+)
+
+
+@parameterized((paddle.float64, paddle.float32))
+@unittest.skipIf(not ENABLE_CUSTOMIZED_OP, "PyTorch customized OPs are not built")
+class TestTabulateFusionSeAttenOp(unittest.TestCase):
+    def setUp(self):
+        (dtype,) = self.param
+        if dtype == paddle.float64:
+            self.prec = 1e-10
+        elif dtype == paddle.float32:
+            self.prec = 1e-5
+        self.table_tensor = (
+            paddle.to_tensor(
+                [
+                    6.348551343037398542e-01,
+                    4.209465843706336474e-04,
+                    6.390862740714405368e-03,
+                    -1.544448595628262176e-04,
+                    -1.891095227974180087e-04,
+                    2.695025951562175852e-05,
+                    -1.317549846042939343e00,
+                    -5.624478206903206490e-02,
+                    1.274284553146523905e-02,
+                    -6.836227424141475689e-04,
+                    -1.438066096020836407e-04,
+                    -1.854932873974712940e-06,
+                    -9.996964112615246423e-01,
+                    6.928234423723647617e-02,
+                    -4.974719973810486084e-03,
+                    -2.019584729176823030e-04,
+                    1.077254539742680247e-04,
+                    -8.024209768588029797e-06,
+                    3.552689563657350780e-01,
+                    -3.578299775339799371e-02,
+                    -1.319946251007718743e-03,
+                    1.016701374495701440e-03,
+                    -1.057336720791906388e-04,
+                    5.182678943855506567e-06,
+                    1.227750369557627286e00,
+                    4.100352079064395472e-02,
+                    3.586869164810712295e-03,
+                    -4.304540913340443135e-04,
+                    -1.269943482892440004e-04,
+                    1.459465404430219674e-05,
+                    -1.472642501673147031e00,
+                    -1.611354921283318364e-01,
+                    1.645427874390196360e-02,
+                    2.107392978135091402e-04,
+                    -2.193541011180757461e-04,
+                    1.915392497459551146e-05,
+                    -2.855174490181606739e-01,
+                    9.774337856626263976e-02,
+                    -2.140891880666230714e-03,
+                    -7.148328890055103638e-04,
+                    1.965696332267534503e-05,
+                    -4.593489654121371453e-06,
+                    -1.468441009949382314e00,
+                    -6.360828127262234399e-02,
+                    4.751283295356955282e-03,
+                    8.711899561753186068e-05,
+                    -9.937008678852959884e-06,
+                    4.273569346584811685e-07,
+                    6.348599826995243722e-01,
+                    5.487167506364742930e-04,
+                    6.386116198716365253e-03,
+                    -1.619832375568118791e-04,
+                    -1.877328309473502049e-04,
+                    2.134130914519164856e-05,
+                    -1.318111020264137512e00,
+                    -5.599013082054477008e-02,
+                    1.272225054666903735e-02,
+                    -6.893710047488201898e-04,
+                    -1.434367581078517366e-04,
+                    3.329508890614227371e-05,
+                    -9.990040854920316793e-01,
+                    6.918278968071900348e-02,
+                    -4.980714172967731085e-03,
+                    -1.976574487947816198e-04,
+                    1.070037204086153902e-04,
+                    -7.859875077388093586e-06,
+                    3.549109954092205532e-01,
+                    -3.580909209068139365e-02,
+                    -1.289508598157979719e-03,
+                    1.012474257117017967e-03,
+                    -1.054418924402112718e-04,
+                    -1.245498322204730900e-05,
+                    1.228160763020727630e00,
+                    4.107512853046493134e-02,
+                    3.573879491390910459e-03,
+                    -4.355190226638688713e-04,
+                    -1.258433981470396103e-04,
+                    1.610862268100766631e-05,
+                    -1.474252210958008291e00,
+                    -1.608063442081248406e-01,
+                    1.646046950167207382e-02,
+                    2.019843636566674109e-04,
+                    -2.185756589083626730e-04,
+                    1.978479879983412190e-05,
+                    -2.845402300363228942e-01,
+                    9.770034635718018168e-02,
+                    -2.162325119197382531e-03,
+                    -7.140472215558940627e-04,
+                    1.956302663031799223e-05,
+                    1.932584474244053378e-05,
+                    -1.469076617546759334e00,
+                    -6.351322951074317436e-02,
+                    4.753890907276497185e-03,
+                    8.672114560243554321e-05,
+                    -1.004574434175897967e-05,
+                    -4.345700882560937596e-06,
+                    6.348661083147921769e-01,
+                    6.763897297752743953e-04,
+                    6.381144275303845745e-03,
+                    -1.694690463885140694e-04,
+                    -1.868179426353836598e-04,
+                    3.439291082765030046e-05,
+                    -1.318669650038090335e00,
+                    -5.573589319299507294e-02,
+                    1.270148368741391351e-02,
+                    -6.950749719342792137e-04,
+                    -1.422194703304518733e-04,
+                    3.454751241752252323e-05,
+                    -9.983127558632299836e-01,
+                    6.908311652764687061e-02,
+                    -4.986579772806746212e-03,
+                    -1.933888092529071571e-04,
+                    1.068327546750306073e-04,
+                    -2.976978385983384886e-05,
+                    3.545527765488725169e-01,
+                    -3.583457894275744043e-02,
+                    -1.259197760082061621e-03,
+                    1.008246479193084487e-03,
+                    -1.059401869200098984e-04,
+                    1.721968053146218465e-06,
+                    1.228571871257205572e00,
+                    4.114647496201748883e-02,
+                    3.560738575723638825e-03,
+                    -4.405332425718102457e-04,
+                    -1.251648759618972115e-04,
+                    3.659080417076460655e-05,
+                    -1.475858628153338792e00,
+                    -1.604770750960976822e-01,
+                    1.646639808472218428e-02,
+                    1.932598402043995316e-04,
+                    -2.175904819601363058e-04,
+                    1.230256868634094333e-05,
+                    -2.835634435191126679e-01,
+                    9.765688571984927624e-02,
+                    -2.183734604613508240e-03,
+                    -7.132463811570244078e-04,
+                    2.021887442373574272e-05,
+                    1.321401495096886281e-05,
+                    -1.469711274366155784e00,
+                    -6.341812571665436660e-02,
+                    4.756486470714936521e-03,
+                    8.631384191910702040e-05,
+                    -1.010516500002806932e-05,
+                    -1.110874413279218719e-05,
+                    6.348735101551836735e-01,
+                    8.039610290153098582e-04,
+                    6.375948457075718626e-03,
+                    -1.769074132993461279e-04,
+                    -1.855677150383903214e-04,
+                    3.421271436711027645e-05,
+                    -1.319225739518145257e00,
+                    -5.548207260888919634e-02,
+                    1.268054645200545304e-02,
+                    -7.007297564176242621e-04,
+                    -1.408885818822980523e-04,
+                    3.124701885930576017e-05,
+                    -9.976224235482542557e-01,
+                    6.898332734138989952e-02,
+                    -4.992317635216104131e-03,
+                    -1.891404922064061889e-04,
+                    1.053957535708985289e-04,
+                    -1.089286646983666076e-06,
+                    3.541943058468561834e-01,
+                    -3.585946084769019160e-02,
+                    -1.229013912637771933e-03,
+                    1.004009466262262241e-03,
+                    -1.059129033455631863e-04,
+                    -4.941663399086282537e-06,
+                    1.228983691638902087e00,
+                    4.121755707472917613e-02,
+                    3.547447845420277635e-03,
+                    -4.455036207721562607e-04,
+                    -1.239172256532283074e-04,
+                    3.437341080261359686e-05,
+                    -1.477461752073406132e00,
+                    -1.601476900261984693e-01,
+                    1.647206544856073471e-02,
+                    1.845724864086241608e-04,
+                    -2.173853638475303177e-04,
+                    3.620505631412716563e-05,
+                    -2.825870937484175061e-01,
+                    9.761299713537928413e-02,
+                    -2.205119732548723246e-03,
+                    -7.124245958910824846e-04,
+                    2.074820558303217398e-05,
+                    1.209381466404663338e-05,
+                    -1.470344979888463577e00,
+                    -6.332297013406351649e-02,
+                    4.759069711794740656e-03,
+                    8.589935708505183382e-05,
+                    -1.045842324058424788e-05,
+                    -6.134254562752213537e-06,
+                    6.348821871815598650e-01,
+                    9.314261853726121809e-04,
+                    6.370530236175125580e-03,
+                    -1.842978984547447257e-04,
+                    -1.840210089691990327e-04,
+                    2.234897510077387526e-05,
+                    -1.319779292891724465e00,
+                    -5.522867246076747227e-02,
+                    1.265944033870337014e-02,
+                    -7.063360380236871801e-04,
+                    -1.393416734992873119e-04,
+                    1.931167378610719847e-05,
+                    -9.969330896946905218e-01,
+                    6.888342466806646192e-02,
+                    -4.997928623431705138e-03,
+                    -1.849303524006284602e-04,
+                    1.053651633995249134e-04,
+                    -2.870133904891753420e-05,
+                    3.538355893399378616e-01,
+                    -3.588374034700148041e-02,
+                    -1.198957225773849763e-03,
+                    9.997681359810027708e-04,
+                    -1.060678155548662341e-04,
+                    -4.107776618240329050e-06,
+                    1.229396221507694564e00,
+                    4.128837188660083868e-02,
+                    3.534008730169808672e-03,
+                    -4.504275777948374090e-04,
+                    -1.224778886969254976e-04,
+                    2.455513266683544498e-05,
+                    -1.479061581584721008e00,
+                    -1.598181942132129441e-01,
+                    1.647747255391585064e-02,
+                    1.759082956613747337e-04,
+                    -2.158335508261176197e-04,
+                    6.406725844410341030e-06,
+                    -2.816111850012528728e-01,
+                    9.756868109694678826e-02,
+                    -2.226479900633348240e-03,
+                    -7.115823288942964460e-04,
+                    2.121038517729223415e-05,
+                    1.358027318850170435e-05,
+                    -1.470977733597038872e00,
+                    -6.322776301216057049e-02,
+                    4.761640356162846754e-03,
+                    8.547576468445008296e-05,
+                    -1.081874527005240631e-05,
+                    -8.845528475774308509e-07,
+                    6.348921383103013349e-01,
+                    1.058780765759985421e-03,
+                    6.364891110105044131e-03,
+                    -1.916363332792569681e-04,
+                    -1.827768871456785058e-04,
+                    2.275707291847725182e-05,
+                    -1.320330314380025793e00,
+                    -5.497569611120622923e-02,
+                    1.263816684562326688e-02,
+                    -7.118908987616576157e-04,
+                    -1.380182662155302303e-04,
+                    1.630252530406085050e-05,
+                    -9.962447554247517711e-01,
+                    6.878341103651769428e-02,
+                    -5.003413601927745452e-03,
+                    -1.807403991329658622e-04,
+                    1.040363362483998831e-04,
+                    -4.422604643727719699e-06,
+                    3.534766330394523148e-01,
+                    -3.590741998555346121e-02,
+                    -1.169027863565602274e-03,
+                    9.955202772264954043e-04,
+                    -1.060447700647724903e-04,
+                    -1.021743279826507342e-05,
+                    1.229809458175783687e00,
+                    4.135891644424664892e-02,
+                    3.520422661584679015e-03,
+                    -4.553035794622276055e-04,
+                    -1.210679214963379874e-04,
+                    1.595827246550979495e-05,
+                    -1.480658115605847147e00,
+                    -1.594885928526604546e-01,
+                    1.648262036665308974e-02,
+                    1.672799673730459213e-04,
+                    -2.148155690753495697e-04,
+                    -1.867405535452657550e-06,
+                    -2.806357215496423363e-01,
+                    9.752393810975558408e-02,
+                    -2.247814508535729908e-03,
+                    -7.107227883497464890e-04,
+                    2.207595560206285042e-05,
+                    -1.137331983229785190e-06,
+                    -1.471609534977757372e00,
+                    -6.313250460562676303e-02,
+                    4.764198129054059844e-03,
+                    8.503999275315992160e-05,
+                    -1.072692568096017848e-05,
+                    -1.373273803695183988e-05,
+                    6.349033624136081189e-01,
+                    1.186020367092407990e-03,
+                    6.359032581545111251e-03,
+                    -1.989262833250400370e-04,
+                    -1.812752661309344573e-04,
+                    1.302837915648187095e-05,
+                    -1.320878808237722746e00,
+                    -5.472314689282183064e-02,
+                    1.261672747063919374e-02,
+                    -7.173917679890315846e-04,
+                    -1.373052781380030543e-04,
+                    3.768455339511444900e-05,
+                    -9.955574218354472649e-01,
+                    6.868328895828368363e-02,
+                    -5.008773436308684712e-03,
+                    -1.765844799686671349e-04,
+                    1.034810966435298563e-04,
+                    -1.111176255155353207e-05,
+                    3.531174429312692320e-01,
+                    -3.593050231143132822e-02,
+                    -1.139225984250480384e-03,
+                    9.912704081392112714e-04,
+                    -1.064918174657224404e-04,
+                    2.680738443515978403e-06,
+                    1.230223398925979650e00,
+                    4.142918782293085467e-02,
+                    3.506691073047987512e-03,
+                    -4.601302388532728274e-04,
+                    -1.198865987378785417e-04,
+                    1.656386182477533959e-05,
+                    -1.482251353107205460e00,
+                    -1.591588911206925361e-01,
+                    1.648750985769346228e-02,
+                    1.586901819247656846e-04,
+                    -2.147074421644348298e-04,
+                    2.641762503224190698e-05,
+                    -2.796607076604977760e-01,
+                    9.747876869099537933e-02,
+                    -2.269122958003529523e-03,
+                    -7.098388532529275848e-04,
+                    2.226701915637888804e-05,
+                    1.106237844209756009e-05,
+                    -1.472240383519069384e00,
+                    -6.303719517464229094e-02,
+                    4.766742755353862819e-03,
+                    8.459962202271287246e-05,
+                    -1.132218730142039535e-05,
+                    8.958476322974335592e-07,
+                    6.349158583197994643e-01,
+                    1.313140616388666637e-03,
+                    6.352956158169477396e-03,
+                    -2.061601622854974502e-04,
+                    -1.806298821034440756e-04,
+                    3.770936817966389514e-05,
+                    -1.321424778752664952e00,
+                    -5.447102810827629538e-02,
+                    1.259512371128685033e-02,
+                    -7.228490733933210606e-04,
+                    -1.356407402355522122e-04,
+                    2.099832634320949299e-05,
+                    -9.948710899987588396e-01,
+                    6.858306092758209571e-02,
+                    -5.014008993202081696e-03,
+                    -1.724573933478598642e-04,
+                    1.029144894329912032e-04,
+                    -1.738522780636760158e-05,
+                    3.527580249757622521e-01,
+                    -3.595298987582695727e-02,
+                    -1.109551740263377793e-03,
+                    9.870126155001155040e-04,
+                    -1.064931456292656029e-04,
+                    -2.059910396978558087e-06,
+                    1.230638041011988815e00,
+                    4.149918312660194619e-02,
+                    3.492815399561766294e-03,
+                    -4.649051157564728157e-04,
+                    -1.192927614880224277e-04,
+                    4.072077917749542957e-05,
+                    -1.483841293110880866e00,
+                    -1.588290941739924356e-01,
+                    1.649214200293154520e-02,
+                    1.501282794678792006e-04,
+                    -2.138853834118830831e-04,
+                    2.633111784219914963e-05,
+                    -2.786861475954987011e-01,
+                    9.743317336979973042e-02,
+                    -2.290404652904617314e-03,
+                    -7.089360554728917595e-04,
+                    2.260180638238835256e-05,
+                    1.741828165826791135e-05,
+                    -1.472870278712053782e00,
+                    -6.294183498489253070e-02,
+                    4.769273959660644442e-03,
+                    8.414681093302789892e-05,
+                    -1.142905205912834352e-05,
+                    -4.014065121916994726e-06,
+                    6.349296248136164778e-01,
+                    1.440137170869312810e-03,
+                    6.346663352465874847e-03,
+                    -2.133510744796659759e-04,
+                    -1.788513201196447670e-04,
+                    1.721163944875696416e-05,
+                    -1.321968230245579967e00,
+                    -5.421934303028537461e-02,
+                    1.257335706466754244e-02,
+                    -7.282542863230233527e-04,
+                    -1.343059033644905889e-04,
+                    1.747822893445653714e-05,
+                    -9.941857609618123259e-01,
+                    6.848272942128874607e-02,
+                    -5.019121140152461337e-03,
+                    -1.683596869525186377e-04,
+                    1.024142382012053007e-04,
+                    -2.632719129544749384e-05,
+                    3.523983851077774343e-01,
+                    -3.597488523292310947e-02,
+                    -1.080005278271846739e-03,
+                    9.827512175914082399e-04,
+                    -1.066680880078371994e-04,
+                    3.403258606315080555e-07,
+                    1.231053381658700818e00,
+                    4.156889948792314576e-02,
+                    3.478797077596604108e-03,
+                    -4.696409807358484993e-04,
+                    -1.173636798436718986e-04,
+                    1.149931408689037458e-05,
+                    -1.485427934690428442e00,
+                    -1.584992071496764965e-01,
+                    1.649651778315383566e-02,
+                    1.415960091521040870e-04,
+                    -2.125888038426753843e-04,
+                    7.384582528889821378e-06,
+                    -2.777120456109742896e-01,
+                    9.738715268720327112e-02,
+                    -2.311658999267464203e-03,
+                    -7.080165982958596923e-04,
+                    2.340034491729013294e-05,
+                    5.174033942788913380e-06,
+                    -1.473499220050474623e00,
+                    -6.284642430757329812e-02,
+                    4.771791466347353149e-03,
+                    8.368540130389298475e-05,
+                    -1.162498575113560591e-05,
+                    -5.381585801785509468e-06,
+                    6.349446606365225509e-01,
+                    1.567005718051586727e-03,
+                    6.340155681555815353e-03,
+                    -2.204854663573854625e-04,
+                    -1.779502948888764897e-04,
+                    3.196283450610521294e-05,
+                    -1.322509167069771951e00,
+                    -5.396809490162747525e-02,
+                    1.255142902735281209e-02,
+                    -7.336077414823606981e-04,
+                    -1.332538502428148267e-04,
+                    2.525523713666122703e-05,
+                    -9.935014357470516311e-01,
+                    6.838229689892011409e-02,
+                    -5.024110745516051704e-03,
+                    -1.642860423419652261e-04,
+                    1.011792892256958577e-04,
+                    -5.902237032851650630e-06,
+                    3.520385292366049468e-01,
+                    -3.599619093977864809e-02,
+                    -1.050586739210998023e-03,
+                    9.784837539753422735e-04,
+                    -1.066187407206570670e-04,
+                    -6.052991441884039902e-06,
+                    1.231469418062474341e00,
+                    4.163833406830096812e-02,
+                    3.464637544942418459e-03,
+                    -4.743218246565151001e-04,
+                    -1.164951133813105271e-04,
+                    2.473911917278243621e-05,
+                    -1.487011276970676033e00,
+                    -1.581692351651968476e-01,
+                    1.650063818395723983e-02,
+                    1.331001312464952355e-04,
+                    -2.118074389246019866e-04,
+                    9.192428068946771109e-06,
+                    -2.767384059577842614e-01,
+                    9.734070719609828892e-02,
+                    -2.332885405321092481e-03,
+                    -7.070743922828596519e-04,
+                    2.373777250910882265e-05,
+                    1.127700884024945933e-05,
+                    -1.474127207030835107e00,
+                    -6.275096341939470634e-02,
+                    4.774294999622533293e-03,
+                    8.321347296773265077e-05,
+                    -1.162225195759229858e-05,
+                    -1.468175407624093560e-05,
+                    6.349609644870094494e-01,
+                    1.693741975839754832e-03,
+                    6.333434667015966531e-03,
+                    -2.275719866012916918e-04,
+                    -1.766077012712487378e-04,
+                    2.919052022666632077e-05,
+                    -1.323047593610823247e00,
+                    -5.371728693515605280e-02,
+                    1.252934109528984138e-02,
+                    -7.389107006611626187e-04,
+                    -1.322992615601379437e-04,
+                    3.689337377145077536e-05,
+                    -9.928181153524118230e-01,
+                    6.828176580261838269e-02,
+                    -5.028978678356570489e-03,
+                    -1.602449667799085492e-04,
+                    1.004819833385002965e-04,
+                    -7.012859043909368637e-06,
+                    3.516784632459502014e-01,
+                    -3.601690955621394963e-02,
+                    -1.021296258318379370e-03,
+                    9.742140050919662845e-04,
+                    -1.068837890347894775e-04,
+                    3.261791903209577241e-07,
+                    1.231886147391427544e00,
+                    4.170748405790913882e-02,
+                    3.450338240560582581e-03,
+                    -4.789562532735843967e-04,
+                    -1.153902983973557932e-04,
+                    2.856018069496295048e-05,
+                    -1.488591319127526624e00,
+                    -1.578391833182464787e-01,
+                    1.650450419566778376e-02,
+                    1.246407552546250339e-04,
+                    -2.115332183818513349e-04,
+                    3.149345367837511192e-05,
+                    -2.757652328811996956e-01,
+                    9.729383746118988596e-02,
+                    -2.354083281534554220e-03,
+                    -7.061133365182417328e-04,
+                    2.418809213597686327e-05,
+                    1.280494807360028992e-05,
+                    -1.474754239152433311e00,
+                    -6.265545260258377491e-02,
+                    4.776784283590801948e-03,
+                    8.273687806363864625e-05,
+                    -1.229952261449745124e-05,
+                    3.204146150058887708e-06,
+                    6.349785350208994039e-01,
+                    1.820341692612803541e-03,
+                    6.326501834700739083e-03,
+                    -2.346100929840904846e-04,
+                    -1.748840426396014729e-04,
+                    1.130785525935554482e-05,
+                    -1.323583514286295282e00,
+                    -5.346692231381247606e-02,
+                    1.250709476370755191e-02,
+                    -7.441705970339035966e-04,
+                    -1.303302437099287372e-04,
+                    7.935577538626925858e-06,
+                    -9.921358007514943234e-01,
+                    6.818113855713830995e-02,
+                    -5.033725808341922223e-03,
+                    -1.562353718150353687e-04,
+                    1.001568149392305130e-04,
+                    -2.302258383924021595e-05,
+                    3.513181929939074299e-01,
+                    -3.603704364469759169e-02,
+                    -9.921339651685744804e-04,
+                    9.699384566370250092e-04,
+                    -1.069081013817698415e-04,
+                    -2.744679484186812129e-06,
+                    1.232303566785723392e00,
+                    4.177634667571154814e-02,
+                    3.435900604437185177e-03,
+                    -4.835440426346156498e-04,
+                    -1.140781768005934266e-04,
+                    2.411509316948267986e-05,
+                    -1.490168060387760951e00,
+                    -1.575090566866652331e-01,
+                    1.650811681325956015e-02,
+                    1.162064642248029450e-04,
+                    -2.100324946396962247e-04,
+                    4.868837971279583202e-06,
+                    -2.747925306207861240e-01,
+                    9.724654405895133413e-02,
+                    -2.375252040655950400e-03,
+                    -7.051355614741510987e-04,
+                    2.505903781065493165e-05,
+                    -2.569082101323676566e-06,
+                    -1.475380315917416585e00,
+                    -6.255989214488603956e-02,
+                    4.779259042312647421e-03,
+                    8.224491253736542200e-05,
+                    -1.205054378062991984e-05,
+                    -1.594987943813344381e-05,
+                    6.349973708516511994e-01,
+                    1.946800647308156995e-03,
+                    6.319358714566076195e-03,
+                    -2.415904693897710526e-04,
+                    -1.741570105122868483e-04,
+                    3.342152683043006766e-05,
+                    -1.324116933545430141e00,
+                    -5.321700419064152865e-02,
+                    1.248469152702344660e-02,
+                    -7.493727578058629766e-04,
+                    -1.295525827398787404e-04,
+                    2.659942231629285135e-05,
+                    -9.914544928937398804e-01,
+                    6.808041756983601589e-02,
+                    -5.038353005641925050e-03,
+                    -1.522500103683389601e-04,
+                    9.911425811568465554e-05,
+                    -1.035676665958809070e-05,
+                    3.509577243129330393e-01,
+                    -3.605659577023319351e-02,
+                    -9.630999837076988784e-04,
+                    9.656594578503095369e-04,
+                    -1.070158919994286978e-04,
+                    -2.281503112307771063e-06,
+                    1.232721673357858538e00,
+                    4.184491916948063911e-02,
+                    3.421326077437690516e-03,
+                    -4.880823132679394552e-04,
+                    -1.129872290747681817e-04,
+                    2.854952342195995698e-05,
+                    -1.491741500028839651e00,
+                    -1.571788603283475749e-01,
+                    1.651147703627379656e-02,
+                    1.078118218043548068e-04,
+                    -2.094656285123614196e-04,
+                    1.573608604543182341e-05,
+                    -2.738203034102859035e-01,
+                    9.719882757757769554e-02,
+                    -2.396391097750961291e-03,
+                    -7.041328812172977002e-04,
+                    2.511128111671661627e-05,
+                    1.472819566023977703e-05,
+                    -1.476005436830838402e00,
+                    -6.246428233956573262e-02,
+                    4.781718999863710830e-03,
+                    8.175246233396933941e-05,
+                    -1.310850420537104008e-05,
+                    1.717274673157189222e-05,
+                    6.350174705506670403e-01,
+                    2.073114649501703322e-03,
+                    6.312006840494438151e-03,
+                    -2.485262001215581039e-04,
+                    -1.724445833892894095e-04,
+                    1.623821996891234705e-05,
+                    -1.324647855868849478e00,
+                    -5.296753568880858964e-02,
+                    1.246213287875118370e-02,
+                    -7.545274547770323926e-04,
+                    -1.284298383236558551e-04,
+                    3.142127009671183137e-05,
+                    -9.907741927046019859e-01,
+                    6.797960523066012839e-02,
+                    -5.042861140826992473e-03,
+                    -1.482946605870891395e-04,
+                    9.821987974303589589e-05,
+                    -3.593831829470692349e-06,
+                    3.505970630098214080e-01,
+                    -3.607556850024738748e-02,
+                    -9.341944322877257512e-04,
+                    9.613773761737330267e-04,
+                    -1.072343182304808093e-04,
+                    2.791451096706449119e-06,
+                    1.233140464192951757e00,
+                    4.191319881581374862e-02,
+                    3.406616101162745613e-03,
+                    -4.925758895926437772e-04,
+                    -1.113902906060245713e-04,
+                    1.275308331152581608e-05,
+                    -1.493311637378700762e00,
+                    -1.568485992811522733e-01,
+                    1.651458586873823589e-02,
+                    9.944841367174414462e-05,
+                    -2.085492230796830474e-04,
+                    1.276456024245067926e-05,
+                    -2.728485554775001987e-01,
+                    9.715068861693920699e-02,
+                    -2.417499870240937074e-03,
+                    -7.031148500958378164e-04,
+                    2.576543833825076558e-05,
+                    7.841889896124507091e-06,
+                    -1.476629601400710978e00,
+                    -6.236862348540499201e-02,
+                    4.784163880393361643e-03,
+                    8.124213252544174404e-05,
+                    -1.286332078849730127e-05,
+                    -1.821996546344873330e-06,
+                    6.350388326475970846e-01,
+                    2.199279539485121671e-03,
+                    6.304447750121061969e-03,
+                    -2.554047701160370044e-04,
+                    -1.716061813901302753e-04,
+                    3.413524324276134592e-05,
+                    -1.325176285768258300e00,
+                    -5.271851990161838253e-02,
+                    1.243942031140890699e-02,
+                    -7.596346042592860793e-04,
+                    -1.269803855069738714e-04,
+                    2.314478643438959578e-05,
+                    -9.900949010857222898e-01,
+                    6.787870391214460841e-02,
+                    -5.047251084767826433e-03,
+                    -1.443753107913585767e-04,
+                    9.837034053479728221e-05,
+                    -3.865274593462701621e-05,
+                    3.502362148656810170e-01,
+                    -3.609396440447816545e-02,
+                    -9.054174237006253068e-04,
+                    9.570894530963515055e-04,
+                    -1.071221722792567601e-04,
+                    -5.180134097885568801e-06,
+                    1.233559936349031494e00,
+                    4.198118292014653419e-02,
+                    3.391772117805412056e-03,
+                    -4.970162819604460663e-04,
+                    -1.105584293158747960e-04,
+                    2.757032189173095048e-05,
+                    -1.494878471815561216e00,
+                    -1.565182785628131401e-01,
+                    1.651744431908664865e-02,
+                    9.112268062696188113e-05,
+                    -2.082277461664644284e-04,
+                    3.370820636496137736e-05,
+                    -2.718772910441742408e-01,
+                    9.710212778853387350e-02,
+                    -2.438577777940475859e-03,
+                    -7.020756635958485484e-04,
+                    2.613933618298708639e-05,
+                    1.211520684095310762e-05,
+                    -1.477252809138063672e00,
+                    -6.227291588670166161e-02,
+                    4.786593408182711167e-03,
+                    8.072392747742672100e-05,
+                    -1.281499371544444526e-05,
+                    -1.293175202324119235e-05,
+                    6.350614556306495295e-01,
+                    2.325291188338546311e-03,
+                    6.296682984661446623e-03,
+                    -2.622362895631248896e-04,
+                    -1.701076322674243866e-04,
+                    2.573454296903621253e-05,
+                    -1.325702227786145437e00,
+                    -5.246995989253622206e-02,
+                    1.241655531642829255e-02,
+                    -7.646904682589584622e-04,
+                    -1.257704658362481128e-04,
+                    2.439373356208127567e-05,
+                    -9.894166189151047952e-01,
+                    6.777771596940393439e-02,
+                    -5.051523708536139086e-03,
+                    -1.404733355821404265e-04,
+                    9.677082285072928253e-05,
+                    -3.720510878458014501e-06,
+                    3.498751856359115786e-01,
+                    -3.611178605486395354e-02,
+                    -8.767690652124425499e-04,
+                    9.527998576480508275e-04,
+                    -1.072771816869139909e-04,
+                    -2.281376475091892258e-06,
+                    1.233980086857325631e00,
+                    4.204886881676297983e-02,
+                    3.376795570009583514e-03,
+                    -5.014114486109571937e-04,
+                    -1.092957353261917852e-04,
+                    2.516456964431257380e-05,
+                    -1.496442002767713664e00,
+                    -1.561879031708521548e-01,
+                    1.652005340007862977e-02,
+                    8.282284133744905071e-05,
+                    -2.067123325224875000e-04,
+                    7.057486539657783089e-06,
+                    -2.709065143258797548e-01,
+                    9.705314571543909030e-02,
+                    -2.459624243094573216e-03,
+                    -7.010187162791577066e-04,
+                    2.672975399789282626e-05,
+                    7.629793933874534523e-06,
+                    -1.477875059556995385e00,
+                    -6.217715985326619649e-02,
+                    4.789007307701962507e-03,
+                    8.019935829649041371e-05,
+                    -1.318861260046749971e-05,
+                    -7.150339348059032240e-06,
+                    6.350853379468965887e-01,
+                    2.451145498001100487e-03,
+                    6.288714088740080324e-03,
+                    -2.690159202421790068e-04,
+                    -1.686584359429067433e-04,
+                    1.941481480743946700e-05,
+                    -1.326225686495484890e00,
+                    -5.222185869521017709e-02,
+                    1.239353938406437261e-02,
+                    -7.696964132049412353e-04,
+                    -1.246012242240120604e-04,
+                    2.724071141974432252e-05,
+                    -9.887393470472876089e-01,
+                    6.767664374012982709e-02,
+                    -5.055679883306329545e-03,
+                    -1.366074591188833347e-04,
+                    9.623033677044332457e-05,
+                    -1.113456896173822779e-05,
+                    3.495139810501832756e-01,
+                    -3.612903602543367232e-02,
+                    -8.482494585971035728e-04,
+                    9.485064841097947883e-04,
+                    -1.073561607316583907e-04,
+                    -2.239996380309942211e-06,
+                    1.234400912722548371e00,
+                    4.211625386880359784e-02,
+                    3.361687900729734210e-03,
+                    -5.057597926077623488e-04,
+                    -1.078411892315765344e-04,
+                    1.508800592977199686e-05,
+                    -1.498002229713325750e00,
+                    -1.558574780824932282e-01,
+                    1.652241412871961052e-02,
+                    7.456368677257522147e-05,
+                    -2.062001731191939454e-04,
+                    2.069621557469772063e-05,
+                    -2.699362295319003291e-01,
+                    9.700374303226286243e-02,
+                    -2.480638690415259105e-03,
+                    -6.999405672986690023e-04,
+                    2.700789474676622474e-05,
+                    1.556143061449123430e-05,
+                    -1.478496352174730522e00,
+                    -6.208135570041733303e-02,
+                    4.791405303667145565e-03,
+                    7.966538051836852740e-05,
+                    -1.352687841609079228e-05,
+                    -2.789411930543395566e-06,
+                    6.351104780025849106e-01,
+                    2.576838401336829787e-03,
+                    6.280542610220480118e-03,
+                    -2.757414391158645754e-04,
+                    -1.675762649448408429e-04,
+                    2.787462665161048641e-05,
+                    -1.326746666499438287e00,
+                    -5.197421931349595348e-02,
+                    1.237037400330611749e-02,
+                    -7.746541492504023475e-04,
+                    -1.232228491818352083e-04,
+                    2.166599538617633252e-05,
+                    -9.880630863135209108e-01,
+                    6.757548954459043078e-02,
+                    -5.059720480258220535e-03,
+                    -1.327693574508429343e-04,
+                    9.550030312894054513e-05,
+                    -1.096549240339310371e-05,
+                    3.491526068124157778e-01,
+                    -3.614571689219699124e-02,
+                    -8.198587001702131727e-04,
+                    9.442100079790295610e-04,
+                    -1.074330339280879455e-04,
+                    -2.103241190440061311e-06,
+                    1.234822410923189784e00,
+                    4.218333546826981417e-02,
+                    3.346450553092000530e-03,
+                    -5.100549148199152614e-04,
+                    -1.071543306169886722e-04,
+                    3.572075491055831030e-05,
+                    -1.499559152180234056e00,
+                    -1.555270082545787691e-01,
+                    1.652452752618108200e-02,
+                    6.633607063542407416e-05,
+                    -2.052990867644106118e-04,
+                    1.891505702101457936e-05,
+                    -2.689664408651156746e-01,
+                    9.695392038509384469e-02,
+                    -2.501620547117759490e-03,
+                    -6.988464710389351081e-04,
+                    2.774961528830105395e-05,
+                    4.843681010028069226e-06,
+                    -1.479116686511674494e00,
+                    -6.198550374897651011e-02,
+                    4.793787121096219732e-03,
+                    7.912045955652986253e-05,
+                    -1.359696279035538403e-05,
+                    -9.132339849453571562e-06,
+                    6.351368741634448867e-01,
+                    2.702365862198193025e-03,
+                    6.272170100036473551e-03,
+                    -2.824171711189519380e-04,
+                    -1.661976899287730559e-04,
+                    2.457347650017094835e-05,
+                    -1.327265172431057128e00,
+                    -5.172704472148267896e-02,
+                    1.234706066178771662e-02,
+                    -7.795630288411945592e-04,
+                    -1.217395799935142969e-04,
+                    1.184741714306808905e-05,
+                    -9.873878375219384829e-01,
+                    6.747425568563097942e-02,
+                    -5.063646370480812467e-03,
+                    -1.289626891970745083e-04,
+                    9.513074838211379970e-05,
+                    -2.521433322545949321e-05,
+                    3.487910686007592576e-01,
+                    -3.616183123303555458e-02,
+                    -7.915968808226425679e-04,
+                    9.399119246579864433e-04,
+                    -1.077055728285351480e-04,
+                    6.031191175422362627e-06,
+                    1.235244578411804905e00,
+                    4.225011103602600848e-02,
+                    3.331084970256580589e-03,
+                    -5.143079026275864784e-04,
+                    -1.055716785023949844e-04,
+                    2.051193936812822612e-05,
+                    -1.501112769745742259e00,
+                    -1.551964986234863897e-01,
+                    1.652639461772111712e-02,
+                    5.814089462644928566e-05,
+                    -2.041249358339155683e-04,
+                    6.311073191969795411e-06,
+                    -2.679971525218879380e-01,
+                    9.690367843145115956e-02,
+                    -2.522569242956208650e-03,
+                    -6.977319783847560700e-04,
+                    2.827424678587480721e-05,
+                    2.739673941330651616e-06,
+                    -1.479736062091468574e00,
+                    -6.188960432526132566e-02,
+                    4.796152485364500034e-03,
+                    7.856828747830194362e-05,
+                    -1.395147193446202365e-05,
+                    -4.087221013031299888e-06,
+                    6.351645247550001816e-01,
+                    2.827723875485507743e-03,
+                    6.263598112024793517e-03,
+                    -2.890409134869928735e-04,
+                    -1.648390823803598971e-04,
+                    2.215887759642637032e-05,
+                    -1.327781208952985015e00,
+                    -5.148033786352124164e-02,
+                    1.232360084570068709e-02,
+                    -7.844171563535663055e-04,
+                    -1.210428935521009746e-04,
+                    3.344327592646507844e-05,
+                    -9.867136014577331249e-01,
+                    6.737294444867666932e-02,
+                    -5.067458424877044516e-03,
+                    -1.251812701937470213e-04,
+                    9.419473244264059593e-05,
+                    -1.679002076268449654e-05,
+                    3.484293720675762929e-01,
+                    -3.617738162759492893e-02,
+                    -7.634640860539731316e-04,
+                    9.356082122653546981e-04,
+                    -1.075431084112703954e-04,
+                    -3.044614041061100766e-06,
+                    1.235667412115300623e00,
+                    4.231657802179918798e-02,
+                    3.315592595281378029e-03,
+                    -5.185116053649769336e-04,
+                    -1.041674655671950871e-04,
+                    1.242766263135090892e-05,
+                    -1.502663082036415076e00,
+                    -1.548659541050484978e-01,
+                    1.652801643260504508e-02,
+                    4.998556989557471122e-05,
+                    -2.037688261998792680e-04,
+                    2.657243869390409541e-05,
+                    -2.670283686919466826e-01,
+                    9.685301784023310490e-02,
+                    -2.543484210258855835e-03,
+                    -6.965966582328896994e-04,
+                    2.850491087748043708e-05,
+                    1.232179636112698650e-05,
+                    -1.480354478441044286e00,
+                    -6.179365776107784841e-02,
+                    4.798501122259496952e-03,
+                    7.800586916120723585e-05,
+                    -1.413851691566035862e-05,
+                    -5.727587674967719880e-06,
+                    6.351934280628791507e-01,
+                    2.952908467203564646e-03,
+                    6.254828202758994093e-03,
+                    -2.956111985445306826e-04,
+                    -1.636502852942454153e-04,
+                    2.616921494951480123e-05,
+                    -1.328294780757159899e00,
+                    -5.123410165425365537e-02,
+                    1.229999603970671068e-02,
+                    -7.892274520450543677e-04,
+                    -1.195721301312790567e-04,
+                    2.454197033093738297e-05,
+                    -9.860403788833298488e-01,
+                    6.727155810173718331e-02,
+                    -5.071157514069617352e-03,
+                    -1.214296539729165295e-04,
+                    9.340570341953608358e-05,
+                    -1.444050153586573228e-05,
+                    3.480675228394242149e-01,
+                    -3.619237065717702262e-02,
+                    -7.354603960058733389e-04,
+                    9.313051737393654526e-04,
+                    -1.076930273455606579e-04,
+                    -7.696053039474192446e-07,
+                    1.236090908935226107e00,
+                    4.238273390417521269e-02,
+                    3.299974870987111650e-03,
+                    -5.226642260988254756e-04,
+                    -1.032474625011560351e-04,
+                    2.396475265799989632e-05,
+                    -1.504210088727871764e00,
+                    -1.545353795944727493e-01,
+                    1.652939400402650763e-02,
+                    4.186078937618800693e-05,
+                    -2.027012231708198600e-04,
+                    1.761148452766873776e-05,
+                    -2.660600935582757565e-01,
+                    9.680193929166537592e-02,
+                    -2.564364883962782712e-03,
+                    -6.954454205710857090e-04,
+                    2.907017700829073683e-05,
+                    9.120785771591908463e-06,
+                    -1.480971935090678926e00,
+                    -6.169766439371183325e-02,
+                    4.800832758035045861e-03,
+                    7.743502257440657043e-05,
+                    -1.440171540732098418e-05,
+                    -4.489324897938611976e-06,
+                    6.355509554770921721e-01,
+                    4.194364255265300989e-03,
+                    6.156587518227093006e-03,
+                    -3.584539136959086518e-04,
+                    -1.505562336471176987e-04,
+                    2.631189526673375584e-05,
+                    -1.333295991901433553e00,
+                    -4.879824528740911438e-02,
+                    1.205629889598585497e-02,
+                    -8.346035033896359156e-04,
+                    -1.072962342948566929e-04,
+                    2.412331753624817981e-05,
+                    -9.793640468817854661e-01,
+                    6.625405011186732973e-02,
+                    -5.102126473064734317e-03,
+                    -8.551069374443776396e-05,
+                    8.618032279329005427e-05,
+                    -1.422030758858379208e-05,
+                    3.444418516979214084e-01,
+                    -3.631195473807800889e-02,
+                    -4.625381215785304145e-04,
+                    8.881537622047225473e-04,
+                    -1.080757789189670570e-04,
+                    5.820590714360855199e-08,
+                    1.240361649325028681e00,
+                    4.302664794411619614e-02,
+                    3.137220402938139478e-03,
+                    -5.615677039256951981e-04,
+                    -9.125763978623760322e-05,
+                    2.367398552885374808e-05,
+                    -1.519498310980496925e00,
+                    -1.512290469691385253e-01,
+                    1.652996628226939199e-02,
+                    -3.745688059096337011e-05,
+                    -1.938906911473592626e-04,
+                    1.811217640451412989e-05,
+                    -2.564062357251438717e-01,
+                    9.626832379335603651e-02,
+                    -2.771163091665611831e-03,
+                    -6.829069315554202020e-04,
+                    3.363238372709415958e-05,
+                    8.623099725596635004e-06,
+                    -1.487093617252511990e00,
+                    -6.073523464295225993e-02,
+                    4.823154268625621383e-03,
+                    7.122599345182346051e-05,
+                    -1.664931178025436733e-05,
+                    -4.312450972708557703e-06,
+                ],
+                dtype=dtype,
+            )
+            .reshape([8, 132])
+            .to(device=env.DEVICE)
+        )
+        self.table_info_tensor = paddle.to_tensor(
+            [0, 0.2, 0.4, 0.01, 0.1, -1], dtype=dtype, place="cpu"
+        )
+        self.em_x_tensor = (
+            paddle.to_tensor(
+                [
+                    0.0343909,
+                    0.11357423,
+                    0.0858676,
+                    0.19337772,
+                    0.1935728,
+                    0.0477744,
+                    0.05845198,
+                    0.19080509,
+                    0.16111261,
+                    0.07179262,
+                    0.10078013,
+                    0.04640909,
+                    0.10433399,
+                    0.15650861,
+                    0.17527857,
+                    0.04249097,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4])
+            .to(device=env.DEVICE)
+        )
+        self.em_tensor = (
+            paddle.to_tensor(
+                [
+                    0.0343909,
+                    0.08394249,
+                    0.06791791,
+                    0.00903334,
+                    0.11357423,
+                    0.10597251,
+                    0.05738069,
+                    0.10071109,
+                    0.0858676,
+                    0.17410445,
+                    0.05390256,
+                    0.09495758,
+                    0.19337772,
+                    0.02045487,
+                    0.04095526,
+                    0.18431305,
+                    0.1935728,
+                    0.03930614,
+                    0.0304133,
+                    0.15261676,
+                    0.0477744,
+                    0.06838737,
+                    0.12824902,
+                    0.14125861,
+                    0.05845198,
+                    0.12731053,
+                    0.0315968,
+                    0.14927774,
+                    0.19080509,
+                    0.19206871,
+                    0.14361383,
+                    0.04083437,
+                    0.16111261,
+                    0.19944826,
+                    0.16563484,
+                    0.00797179,
+                    0.07179262,
+                    0.16993159,
+                    0.01834742,
+                    0.08405,
+                    0.10078013,
+                    0.0773945,
+                    0.09541813,
+                    0.0042979,
+                    0.04640909,
+                    0.07968697,
+                    0.18046262,
+                    0.11724063,
+                    0.10433399,
+                    0.16910201,
+                    0.10653732,
+                    0.07434702,
+                    0.15650861,
+                    0.0350976,
+                    0.04088021,
+                    0.15753491,
+                    0.17527857,
+                    0.03178642,
+                    0.01599623,
+                    0.08095053,
+                    0.04249097,
+                    0.17082205,
+                    0.18275348,
+                    0.02921504,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 4])
+            .to(device=env.DEVICE)
+        )
+        self.two_embed_tensor = (
+            paddle.to_tensor(
+                [
+                    0.41783850884461693,
+                    0.06917892522383784,
+                    0.07309949640440838,
+                    0.57828038123179,
+                    0.30460107001129133,
+                    0.0641857998132136,
+                    0.016519028000859692,
+                    0.46818914782665344,
+                    0.7524658161955905,
+                    0.7366050152276675,
+                    0.5442923017739666,
+                    0.6984532784508917,
+                    0.8794579292532613,
+                    0.933333068809702,
+                    0.052557248156142045,
+                    0.3158695444821408,
+                    0.6104896498153188,
+                    0.3190616402773879,
+                    0.39327308944220873,
+                    0.9555810861515368,
+                    0.45845946239660273,
+                    0.2836952640436372,
+                    0.7129235830370116,
+                    0.21678811087765415,
+                    0.8589385334305147,
+                    0.8664288996198418,
+                    0.2392088190073245,
+                    0.44554156483185636,
+                    0.4554902141228184,
+                    0.6929437508125064,
+                    0.680397459717037,
+                    0.47499455998030615,
+                    0.19872841218252735,
+                    0.0593083660501722,
+                    0.20593103822290515,
+                    0.8377626566866462,
+                    0.9006561442856688,
+                    0.9451358048366522,
+                    0.03886827486931199,
+                    0.9395629463676399,
+                    0.0018941296317954714,
+                    0.08140115779980839,
+                    0.9309153205767321,
+                    0.4697357303240055,
+                    0.9164471895052549,
+                    0.5957401092143415,
+                    0.40338864067603986,
+                    0.9096349710860572,
+                    0.027870073369474335,
+                    0.9614765307496669,
+                    0.3142035164603587,
+                    0.4027282032956391,
+                    0.05129061735552376,
+                    0.18018240347684844,
+                    0.8391548601089657,
+                    0.25198333808352436,
+                    0.07903335895654717,
+                    0.9831396742713334,
+                    0.21328884297544115,
+                    0.8119626188647525,
+                    0.1734453905861253,
+                    0.014174310472666818,
+                    0.06890574596678134,
+                    0.3490769935686088,
+                    0.34055562797730554,
+                    0.9834924063503578,
+                    0.1689164263315952,
+                    0.9611024936313157,
+                    0.6796725725159389,
+                    0.7902946379060674,
+                    0.3045916985592084,
+                    0.6923776720247495,
+                    0.5626994287153583,
+                    0.12132066580981216,
+                    0.13356198804830732,
+                    0.5332034125146011,
+                    0.6155216974624633,
+                    0.3080851791499254,
+                    0.8391387652641518,
+                    0.8588772315368923,
+                    0.9414859699900482,
+                    0.9852118289755771,
+                    0.7514252073835589,
+                    0.6780090883007501,
+                    0.1472721338720271,
+                    0.4785493098407567,
+                    0.8825343095166535,
+                    0.1562449821247882,
+                    0.5809096109347806,
+                    0.653605647812403,
+                    0.26158060329219845,
+                    0.28359029181509054,
+                    0.23111396285536823,
+                    0.6711415141607222,
+                    0.5955230293073148,
+                    0.14336394912405104,
+                    0.48478135042139503,
+                    0.34621668898158153,
+                    0.7962234329935334,
+                    0.40204538487553787,
+                    0.09600971949708359,
+                    0.985025266359638,
+                    0.4949655728846287,
+                    0.23503981206241742,
+                    0.607828476455725,
+                    0.21634419784756398,
+                    0.04166567958728129,
+                    0.132198384508056,
+                    0.4112021863641492,
+                    0.9441979803962212,
+                    0.993462871462463,
+                    0.4524002115880147,
+                    0.6322719605196645,
+                    0.5121196654684579,
+                    0.7844974619880201,
+                    0.6783684708633317,
+                    0.6402712236722511,
+                    0.43899788665378925,
+                    0.6060330628471464,
+                    0.7082475921988166,
+                    0.1614968711069913,
+                    0.6289247345866867,
+                    0.4034261331727077,
+                    0.7906075239905527,
+                    0.9325509002602962,
+                    0.44489583733770977,
+                    0.5194672674960213,
+                    0.04635102497306032,
+                ],
+                dtype=dtype,
+            )
+            .reshape([8, 16])
+            .to(device=env.DEVICE)
+        )
+        self.table_info_tensor.stop_gradient = not False
+        self.table_tensor.stop_gradient = not False
+        self.em_x_tensor.stop_gradient = not True
+        self.em_tensor.stop_gradient = not True
+        self.two_embed_tensor.stop_gradient = not True
+        self.last_layer_size = 8
+        self.nloc = 4
+        self.nnei = 4
+        self.is_sorted = True
+        # forward test
+        self.expected_descriptor_tensor = (
+            paddle.to_tensor(
+                [
+                    0.47347690809281584,
+                    -0.938671106172836,
+                    -0.566066031386074,
+                    0.24346508156830923,
+                    0.8202563571070155,
+                    -1.0373756242429473,
+                    -0.17010015427406364,
+                    -0.8710788156620061,
+                    0.395670826145944,
+                    -0.7164061254932106,
+                    -0.5182353938571188,
+                    0.24032480431966494,
+                    0.7295250448255718,
+                    -0.8217164571528093,
+                    -0.14650667106275897,
+                    -0.743917788428106,
+                    0.2284657683159583,
+                    -0.42325060475528936,
+                    -0.28528185044441623,
+                    0.1299218265387629,
+                    0.4122891899913208,
+                    -0.4792844297628545,
+                    -0.07999903336182355,
+                    -0.44416840002965857,
+                    0.4349292047304616,
+                    -0.866714677458846,
+                    -0.5207292765686371,
+                    0.22332001940248375,
+                    0.7498173442166285,
+                    -0.9546906311960559,
+                    -0.15936409133917512,
+                    -0.7878577875263373,
+                    0.3466925422373803,
+                    -0.994380783370134,
+                    -0.6270917901063118,
+                    0.2981402728321051,
+                    0.9195320780786727,
+                    -1.0718892456307918,
+                    -0.15698461575270795,
+                    -1.1789262485734189,
+                    0.28814823365263476,
+                    -0.9885184495221915,
+                    -0.5748462943377031,
+                    0.24501712277073154,
+                    0.6966426111509829,
+                    -0.7918732495676203,
+                    -0.15816622107875547,
+                    -0.9232820446171233,
+                    0.22378298591000056,
+                    -0.6859112389106587,
+                    -0.49619127436326704,
+                    0.19242493499693383,
+                    0.622786588111436,
+                    -0.666018566891193,
+                    -0.11621443478059659,
+                    -0.7927712244868067,
+                    0.33146082229500645,
+                    -0.910623259021886,
+                    -0.6975451800757693,
+                    0.2708230994848638,
+                    0.9436440642240583,
+                    -1.103250728415007,
+                    -0.18643132646601496,
+                    -1.1918841520358467,
+                    0.37921786279033454,
+                    -0.8962410980736447,
+                    -0.500721492855562,
+                    0.22977304492608347,
+                    0.6894967253035347,
+                    -0.9081285527067445,
+                    -0.16659833566436824,
+                    -0.8496545388057982,
+                    0.5137998029000233,
+                    -1.1537368698160295,
+                    -0.656860645256254,
+                    0.31042037927337496,
+                    0.9814343841081181,
+                    -1.232310461500326,
+                    -0.23314406631295234,
+                    -1.2304015706558842,
+                    0.45797993385377606,
+                    -1.1074919572397988,
+                    -0.6048852416894798,
+                    0.2616527005842335,
+                    0.7879703504421955,
+                    -1.1362711906177663,
+                    -0.1981304325148623,
+                    -0.9310107317132751,
+                    0.21315309858654777,
+                    -0.41274344906220745,
+                    -0.2588508380504396,
+                    0.1066634142045425,
+                    0.3672005243972004,
+                    -0.4840002903711901,
+                    -0.10065024885011888,
+                    -0.4546504875519408,
+                    0.47121911760467616,
+                    -0.8813734369794723,
+                    -0.7156169154744415,
+                    0.242949784111888,
+                    0.8695344392466614,
+                    -1.2333547296658691,
+                    -0.22100811739419962,
+                    -1.0344804237112,
+                    0.3541853556808732,
+                    -0.7889099992546985,
+                    -0.632615806745115,
+                    0.2240548602116392,
+                    0.7597588014275503,
+                    -1.035877717989762,
+                    -0.17525344544740995,
+                    -0.7266950510645241,
+                    0.293533442380667,
+                    -0.6807511051238859,
+                    -0.5158926321437481,
+                    0.19381846340306683,
+                    0.6817081536687413,
+                    -0.8591698632437857,
+                    -0.15090870856670646,
+                    -0.6035960397897837,
+                    0.3364163288609897,
+                    -0.6117855193715979,
+                    -0.4783834090534011,
+                    0.1649210671780133,
+                    0.6081937728291197,
+                    -0.9156559135117243,
+                    -0.16287243502858786,
+                    -0.7232291367106685,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 8])
+            .to(device=env.DEVICE)
+        )
+        # backward test
+        self.expected_dy_dem_x = (
+            paddle.to_tensor(
+                [
+                    -0.02944485238565673,
+                    -0.09481442615634611,
+                    -0.039285023803917796,
+                    -0.08263513336597483,
+                    -0.1147941391226924,
+                    -0.053028707974760975,
+                    -0.04045111384033326,
+                    -0.09645372744447589,
+                    -0.148871652361389,
+                    -0.03949164509537857,
+                    -0.03304671059396837,
+                    -0.07677112538315375,
+                    -0.08936844295120971,
+                    -0.0666033025810816,
+                    -0.036086280282677796,
+                    -0.053263385364202,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4])
+            .to(device=env.DEVICE)
+        )
+        self.expected_dy_dem = (
+            paddle.to_tensor(
+                [
+                    -3.437493391458747,
+                    -3.437493391458747,
+                    -3.437493391458747,
+                    -3.437493391458747,
+                    -4.917683334085319,
+                    -4.917683334085319,
+                    -4.917683334085319,
+                    -4.917683334085319,
+                    -3.7978352380265443,
+                    -3.7978352380265443,
+                    -3.7978352380265443,
+                    -3.7978352380265443,
+                    -5.39483968657882,
+                    -5.39483968657882,
+                    -5.39483968657882,
+                    -5.39483968657882,
+                    -4.918657812120523,
+                    -4.918657812120523,
+                    -4.918657812120523,
+                    -4.918657812120523,
+                    -5.405511948034504,
+                    -5.405511948034504,
+                    -5.405511948034504,
+                    -5.405511948034504,
+                    -5.5647056342671615,
+                    -5.5647056342671615,
+                    -5.5647056342671615,
+                    -5.5647056342671615,
+                    -4.870290792037633,
+                    -4.870290792037633,
+                    -4.870290792037633,
+                    -4.870290792037633,
+                    -5.712629223988493,
+                    -5.712629223988493,
+                    -5.712629223988493,
+                    -5.712629223988493,
+                    -4.279958255143791,
+                    -4.279958255143791,
+                    -4.279958255143791,
+                    -4.279958255143791,
+                    -5.554543471933205,
+                    -5.554543471933205,
+                    -5.554543471933205,
+                    -5.554543471933205,
+                    -5.072772403587814,
+                    -5.072772403587814,
+                    -5.072772403587814,
+                    -5.072772403587814,
+                    -5.967810024526445,
+                    -5.967810024526445,
+                    -5.967810024526445,
+                    -5.967810024526445,
+                    -5.08155970167425,
+                    -5.08155970167425,
+                    -5.08155970167425,
+                    -5.08155970167425,
+                    -5.238429358303623,
+                    -5.238429358303623,
+                    -5.238429358303623,
+                    -5.238429358303623,
+                    -3.906538220487487,
+                    -3.906538220487487,
+                    -3.906538220487487,
+                    -3.906538220487487,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 4])
+            .to(device=env.DEVICE)
+        )
+
+    def test_forward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_atten(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.two_embed_tensor,
+            self.last_layer_size,
+            self.is_sorted,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the shape
+        self.assertEqual(descriptor_tensor.shape, self.expected_descriptor_tensor.shape)
+
+        # Check the values
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+    def test_backward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_atten(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.two_embed_tensor,
+            self.last_layer_size,
+            self.is_sorted,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the forward
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        # Create a loss and perform backward
+        loss = descriptor_tensor.sum()
+        loss.backward()
+
+        # Check gradients
+        self.assertIsNotNone(self.em_x_tensor.grad)
+        self.assertIsNotNone(self.em_tensor.grad)
+
+        # Check the shapes of the gradients
+        self.assertEqual(self.em_x_tensor.grad.shape, self.expected_dy_dem_x.shape)
+        self.assertEqual(self.em_tensor.grad.shape, self.expected_dy_dem.shape)
+
+        # Check the values of the gradients
+        np.testing.assert_allclose(
+            self.em_x_tensor.grad.numpy(),
+            self.expected_dy_dem_x.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        np.testing.assert_allclose(
+            self.em_tensor.grad.numpy(),
+            self.expected_dy_dem.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_tabulate_fusion_se_r.py b/source/tests/pd/test_tabulate_fusion_se_r.py
new file mode 100644
index 0000000000..acb90e6e23
--- /dev/null
+++ b/source/tests/pd/test_tabulate_fusion_se_r.py
@@ -0,0 +1,1353 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ..consistent.common import (
+    parameterized,
+)
+
+
+@parameterized((paddle.float64, paddle.float32))
+@unittest.skipIf(not ENABLE_CUSTOMIZED_OP, "PyTorch customized OPs are not built")
+class TestTabulateFusionSeAOp(unittest.TestCase):
+    def setUp(self):
+        (dtype,) = self.param
+        if dtype == paddle.float64:
+            self.prec = 1e-10
+        elif dtype == paddle.float32:
+            self.prec = 1e-5
+        self.table_tensor = (
+            paddle.to_tensor(
+                [
+                    6.348551343037398542e-01,
+                    4.209465843706336474e-04,
+                    6.390862740714405368e-03,
+                    -1.544448595628262176e-04,
+                    -1.891095227974180087e-04,
+                    2.695025951562175852e-05,
+                    -1.317549846042939343e00,
+                    -5.624478206903206490e-02,
+                    1.274284553146523905e-02,
+                    -6.836227424141475689e-04,
+                    -1.438066096020836407e-04,
+                    -1.854932873974712940e-06,
+                    -9.996964112615246423e-01,
+                    6.928234423723647617e-02,
+                    -4.974719973810486084e-03,
+                    -2.019584729176823030e-04,
+                    1.077254539742680247e-04,
+                    -8.024209768588029797e-06,
+                    3.552689563657350780e-01,
+                    -3.578299775339799371e-02,
+                    -1.319946251007718743e-03,
+                    1.016701374495701440e-03,
+                    -1.057336720791906388e-04,
+                    5.182678943855506567e-06,
+                    1.227750369557627286e00,
+                    4.100352079064395472e-02,
+                    3.586869164810712295e-03,
+                    -4.304540913340443135e-04,
+                    -1.269943482892440004e-04,
+                    1.459465404430219674e-05,
+                    -1.472642501673147031e00,
+                    -1.611354921283318364e-01,
+                    1.645427874390196360e-02,
+                    2.107392978135091402e-04,
+                    -2.193541011180757461e-04,
+                    1.915392497459551146e-05,
+                    -2.855174490181606739e-01,
+                    9.774337856626263976e-02,
+                    -2.140891880666230714e-03,
+                    -7.148328890055103638e-04,
+                    1.965696332267534503e-05,
+                    -4.593489654121371453e-06,
+                    -1.468441009949382314e00,
+                    -6.360828127262234399e-02,
+                    4.751283295356955282e-03,
+                    8.711899561753186068e-05,
+                    -9.937008678852959884e-06,
+                    4.273569346584811685e-07,
+                    6.348599826995243722e-01,
+                    5.487167506364742930e-04,
+                    6.386116198716365253e-03,
+                    -1.619832375568118791e-04,
+                    -1.877328309473502049e-04,
+                    2.134130914519164856e-05,
+                    -1.318111020264137512e00,
+                    -5.599013082054477008e-02,
+                    1.272225054666903735e-02,
+                    -6.893710047488201898e-04,
+                    -1.434367581078517366e-04,
+                    3.329508890614227371e-05,
+                    -9.990040854920316793e-01,
+                    6.918278968071900348e-02,
+                    -4.980714172967731085e-03,
+                    -1.976574487947816198e-04,
+                    1.070037204086153902e-04,
+                    -7.859875077388093586e-06,
+                    3.549109954092205532e-01,
+                    -3.580909209068139365e-02,
+                    -1.289508598157979719e-03,
+                    1.012474257117017967e-03,
+                    -1.054418924402112718e-04,
+                    -1.245498322204730900e-05,
+                    1.228160763020727630e00,
+                    4.107512853046493134e-02,
+                    3.573879491390910459e-03,
+                    -4.355190226638688713e-04,
+                    -1.258433981470396103e-04,
+                    1.610862268100766631e-05,
+                    -1.474252210958008291e00,
+                    -1.608063442081248406e-01,
+                    1.646046950167207382e-02,
+                    2.019843636566674109e-04,
+                    -2.185756589083626730e-04,
+                    1.978479879983412190e-05,
+                    -2.845402300363228942e-01,
+                    9.770034635718018168e-02,
+                    -2.162325119197382531e-03,
+                    -7.140472215558940627e-04,
+                    1.956302663031799223e-05,
+                    1.932584474244053378e-05,
+                    -1.469076617546759334e00,
+                    -6.351322951074317436e-02,
+                    4.753890907276497185e-03,
+                    8.672114560243554321e-05,
+                    -1.004574434175897967e-05,
+                    -4.345700882560937596e-06,
+                    6.348661083147921769e-01,
+                    6.763897297752743953e-04,
+                    6.381144275303845745e-03,
+                    -1.694690463885140694e-04,
+                    -1.868179426353836598e-04,
+                    3.439291082765030046e-05,
+                    -1.318669650038090335e00,
+                    -5.573589319299507294e-02,
+                    1.270148368741391351e-02,
+                    -6.950749719342792137e-04,
+                    -1.422194703304518733e-04,
+                    3.454751241752252323e-05,
+                    -9.983127558632299836e-01,
+                    6.908311652764687061e-02,
+                    -4.986579772806746212e-03,
+                    -1.933888092529071571e-04,
+                    1.068327546750306073e-04,
+                    -2.976978385983384886e-05,
+                    3.545527765488725169e-01,
+                    -3.583457894275744043e-02,
+                    -1.259197760082061621e-03,
+                    1.008246479193084487e-03,
+                    -1.059401869200098984e-04,
+                    1.721968053146218465e-06,
+                    1.228571871257205572e00,
+                    4.114647496201748883e-02,
+                    3.560738575723638825e-03,
+                    -4.405332425718102457e-04,
+                    -1.251648759618972115e-04,
+                    3.659080417076460655e-05,
+                    -1.475858628153338792e00,
+                    -1.604770750960976822e-01,
+                    1.646639808472218428e-02,
+                    1.932598402043995316e-04,
+                    -2.175904819601363058e-04,
+                    1.230256868634094333e-05,
+                    -2.835634435191126679e-01,
+                    9.765688571984927624e-02,
+                    -2.183734604613508240e-03,
+                    -7.132463811570244078e-04,
+                    2.021887442373574272e-05,
+                    1.321401495096886281e-05,
+                    -1.469711274366155784e00,
+                    -6.341812571665436660e-02,
+                    4.756486470714936521e-03,
+                    8.631384191910702040e-05,
+                    -1.010516500002806932e-05,
+                    -1.110874413279218719e-05,
+                    6.348735101551836735e-01,
+                    8.039610290153098582e-04,
+                    6.375948457075718626e-03,
+                    -1.769074132993461279e-04,
+                    -1.855677150383903214e-04,
+                    3.421271436711027645e-05,
+                    -1.319225739518145257e00,
+                    -5.548207260888919634e-02,
+                    1.268054645200545304e-02,
+                    -7.007297564176242621e-04,
+                    -1.408885818822980523e-04,
+                    3.124701885930576017e-05,
+                    -9.976224235482542557e-01,
+                    6.898332734138989952e-02,
+                    -4.992317635216104131e-03,
+                    -1.891404922064061889e-04,
+                    1.053957535708985289e-04,
+                    -1.089286646983666076e-06,
+                    3.541943058468561834e-01,
+                    -3.585946084769019160e-02,
+                    -1.229013912637771933e-03,
+                    1.004009466262262241e-03,
+                    -1.059129033455631863e-04,
+                    -4.941663399086282537e-06,
+                    1.228983691638902087e00,
+                    4.121755707472917613e-02,
+                    3.547447845420277635e-03,
+                    -4.455036207721562607e-04,
+                    -1.239172256532283074e-04,
+                    3.437341080261359686e-05,
+                    -1.477461752073406132e00,
+                    -1.601476900261984693e-01,
+                    1.647206544856073471e-02,
+                    1.845724864086241608e-04,
+                    -2.173853638475303177e-04,
+                    3.620505631412716563e-05,
+                    -2.825870937484175061e-01,
+                    9.761299713537928413e-02,
+                    -2.205119732548723246e-03,
+                    -7.124245958910824846e-04,
+                    2.074820558303217398e-05,
+                    1.209381466404663338e-05,
+                    -1.470344979888463577e00,
+                    -6.332297013406351649e-02,
+                    4.759069711794740656e-03,
+                    8.589935708505183382e-05,
+                    -1.045842324058424788e-05,
+                    -6.134254562752213537e-06,
+                    6.348821871815598650e-01,
+                    9.314261853726121809e-04,
+                    6.370530236175125580e-03,
+                    -1.842978984547447257e-04,
+                    -1.840210089691990327e-04,
+                    2.234897510077387526e-05,
+                    -1.319779292891724465e00,
+                    -5.522867246076747227e-02,
+                    1.265944033870337014e-02,
+                    -7.063360380236871801e-04,
+                    -1.393416734992873119e-04,
+                    1.931167378610719847e-05,
+                    -9.969330896946905218e-01,
+                    6.888342466806646192e-02,
+                    -4.997928623431705138e-03,
+                    -1.849303524006284602e-04,
+                    1.053651633995249134e-04,
+                    -2.870133904891753420e-05,
+                    3.538355893399378616e-01,
+                    -3.588374034700148041e-02,
+                    -1.198957225773849763e-03,
+                    9.997681359810027708e-04,
+                    -1.060678155548662341e-04,
+                    -4.107776618240329050e-06,
+                    1.229396221507694564e00,
+                    4.128837188660083868e-02,
+                    3.534008730169808672e-03,
+                    -4.504275777948374090e-04,
+                    -1.224778886969254976e-04,
+                    2.455513266683544498e-05,
+                    -1.479061581584721008e00,
+                    -1.598181942132129441e-01,
+                    1.647747255391585064e-02,
+                    1.759082956613747337e-04,
+                    -2.158335508261176197e-04,
+                    6.406725844410341030e-06,
+                    -2.816111850012528728e-01,
+                    9.756868109694678826e-02,
+                    -2.226479900633348240e-03,
+                    -7.115823288942964460e-04,
+                    2.121038517729223415e-05,
+                    1.358027318850170435e-05,
+                    -1.470977733597038872e00,
+                    -6.322776301216057049e-02,
+                    4.761640356162846754e-03,
+                    8.547576468445008296e-05,
+                    -1.081874527005240631e-05,
+                    -8.845528475774308509e-07,
+                    6.348921383103013349e-01,
+                    1.058780765759985421e-03,
+                    6.364891110105044131e-03,
+                    -1.916363332792569681e-04,
+                    -1.827768871456785058e-04,
+                    2.275707291847725182e-05,
+                    -1.320330314380025793e00,
+                    -5.497569611120622923e-02,
+                    1.263816684562326688e-02,
+                    -7.118908987616576157e-04,
+                    -1.380182662155302303e-04,
+                    1.630252530406085050e-05,
+                    -9.962447554247517711e-01,
+                    6.878341103651769428e-02,
+                    -5.003413601927745452e-03,
+                    -1.807403991329658622e-04,
+                    1.040363362483998831e-04,
+                    -4.422604643727719699e-06,
+                    3.534766330394523148e-01,
+                    -3.590741998555346121e-02,
+                    -1.169027863565602274e-03,
+                    9.955202772264954043e-04,
+                    -1.060447700647724903e-04,
+                    -1.021743279826507342e-05,
+                    1.229809458175783687e00,
+                    4.135891644424664892e-02,
+                    3.520422661584679015e-03,
+                    -4.553035794622276055e-04,
+                    -1.210679214963379874e-04,
+                    1.595827246550979495e-05,
+                    -1.480658115605847147e00,
+                    -1.594885928526604546e-01,
+                    1.648262036665308974e-02,
+                    1.672799673730459213e-04,
+                    -2.148155690753495697e-04,
+                    -1.867405535452657550e-06,
+                    -2.806357215496423363e-01,
+                    9.752393810975558408e-02,
+                    -2.247814508535729908e-03,
+                    -7.107227883497464890e-04,
+                    2.207595560206285042e-05,
+                    -1.137331983229785190e-06,
+                    -1.471609534977757372e00,
+                    -6.313250460562676303e-02,
+                    4.764198129054059844e-03,
+                    8.503999275315992160e-05,
+                    -1.072692568096017848e-05,
+                    -1.373273803695183988e-05,
+                    6.349033624136081189e-01,
+                    1.186020367092407990e-03,
+                    6.359032581545111251e-03,
+                    -1.989262833250400370e-04,
+                    -1.812752661309344573e-04,
+                    1.302837915648187095e-05,
+                    -1.320878808237722746e00,
+                    -5.472314689282183064e-02,
+                    1.261672747063919374e-02,
+                    -7.173917679890315846e-04,
+                    -1.373052781380030543e-04,
+                    3.768455339511444900e-05,
+                    -9.955574218354472649e-01,
+                    6.868328895828368363e-02,
+                    -5.008773436308684712e-03,
+                    -1.765844799686671349e-04,
+                    1.034810966435298563e-04,
+                    -1.111176255155353207e-05,
+                    3.531174429312692320e-01,
+                    -3.593050231143132822e-02,
+                    -1.139225984250480384e-03,
+                    9.912704081392112714e-04,
+                    -1.064918174657224404e-04,
+                    2.680738443515978403e-06,
+                    1.230223398925979650e00,
+                    4.142918782293085467e-02,
+                    3.506691073047987512e-03,
+                    -4.601302388532728274e-04,
+                    -1.198865987378785417e-04,
+                    1.656386182477533959e-05,
+                    -1.482251353107205460e00,
+                    -1.591588911206925361e-01,
+                    1.648750985769346228e-02,
+                    1.586901819247656846e-04,
+                    -2.147074421644348298e-04,
+                    2.641762503224190698e-05,
+                    -2.796607076604977760e-01,
+                    9.747876869099537933e-02,
+                    -2.269122958003529523e-03,
+                    -7.098388532529275848e-04,
+                    2.226701915637888804e-05,
+                    1.106237844209756009e-05,
+                    -1.472240383519069384e00,
+                    -6.303719517464229094e-02,
+                    4.766742755353862819e-03,
+                    8.459962202271287246e-05,
+                    -1.132218730142039535e-05,
+                    8.958476322974335592e-07,
+                    6.349158583197994643e-01,
+                    1.313140616388666637e-03,
+                    6.352956158169477396e-03,
+                    -2.061601622854974502e-04,
+                    -1.806298821034440756e-04,
+                    3.770936817966389514e-05,
+                    -1.321424778752664952e00,
+                    -5.447102810827629538e-02,
+                    1.259512371128685033e-02,
+                    -7.228490733933210606e-04,
+                    -1.356407402355522122e-04,
+                    2.099832634320949299e-05,
+                    -9.948710899987588396e-01,
+                    6.858306092758209571e-02,
+                    -5.014008993202081696e-03,
+                    -1.724573933478598642e-04,
+                    1.029144894329912032e-04,
+                    -1.738522780636760158e-05,
+                    3.527580249757622521e-01,
+                    -3.595298987582695727e-02,
+                    -1.109551740263377793e-03,
+                    9.870126155001155040e-04,
+                    -1.064931456292656029e-04,
+                    -2.059910396978558087e-06,
+                    1.230638041011988815e00,
+                    4.149918312660194619e-02,
+                    3.492815399561766294e-03,
+                    -4.649051157564728157e-04,
+                    -1.192927614880224277e-04,
+                    4.072077917749542957e-05,
+                    -1.483841293110880866e00,
+                    -1.588290941739924356e-01,
+                    1.649214200293154520e-02,
+                    1.501282794678792006e-04,
+                    -2.138853834118830831e-04,
+                    2.633111784219914963e-05,
+                    -2.786861475954987011e-01,
+                    9.743317336979973042e-02,
+                    -2.290404652904617314e-03,
+                    -7.089360554728917595e-04,
+                    2.260180638238835256e-05,
+                    1.741828165826791135e-05,
+                    -1.472870278712053782e00,
+                    -6.294183498489253070e-02,
+                    4.769273959660644442e-03,
+                    8.414681093302789892e-05,
+                    -1.142905205912834352e-05,
+                    -4.014065121916994726e-06,
+                    6.349296248136164778e-01,
+                    1.440137170869312810e-03,
+                    6.346663352465874847e-03,
+                    -2.133510744796659759e-04,
+                    -1.788513201196447670e-04,
+                    1.721163944875696416e-05,
+                    -1.321968230245579967e00,
+                    -5.421934303028537461e-02,
+                    1.257335706466754244e-02,
+                    -7.282542863230233527e-04,
+                    -1.343059033644905889e-04,
+                    1.747822893445653714e-05,
+                    -9.941857609618123259e-01,
+                    6.848272942128874607e-02,
+                    -5.019121140152461337e-03,
+                    -1.683596869525186377e-04,
+                    1.024142382012053007e-04,
+                    -2.632719129544749384e-05,
+                    3.523983851077774343e-01,
+                    -3.597488523292310947e-02,
+                    -1.080005278271846739e-03,
+                    9.827512175914082399e-04,
+                    -1.066680880078371994e-04,
+                    3.403258606315080555e-07,
+                    1.231053381658700818e00,
+                    4.156889948792314576e-02,
+                    3.478797077596604108e-03,
+                    -4.696409807358484993e-04,
+                    -1.173636798436718986e-04,
+                    1.149931408689037458e-05,
+                    -1.485427934690428442e00,
+                    -1.584992071496764965e-01,
+                    1.649651778315383566e-02,
+                    1.415960091521040870e-04,
+                    -2.125888038426753843e-04,
+                    7.384582528889821378e-06,
+                    -2.777120456109742896e-01,
+                    9.738715268720327112e-02,
+                    -2.311658999267464203e-03,
+                    -7.080165982958596923e-04,
+                    2.340034491729013294e-05,
+                    5.174033942788913380e-06,
+                    -1.473499220050474623e00,
+                    -6.284642430757329812e-02,
+                    4.771791466347353149e-03,
+                    8.368540130389298475e-05,
+                    -1.162498575113560591e-05,
+                    -5.381585801785509468e-06,
+                    6.349446606365225509e-01,
+                    1.567005718051586727e-03,
+                    6.340155681555815353e-03,
+                    -2.204854663573854625e-04,
+                    -1.779502948888764897e-04,
+                    3.196283450610521294e-05,
+                    -1.322509167069771951e00,
+                    -5.396809490162747525e-02,
+                    1.255142902735281209e-02,
+                    -7.336077414823606981e-04,
+                    -1.332538502428148267e-04,
+                    2.525523713666122703e-05,
+                    -9.935014357470516311e-01,
+                    6.838229689892011409e-02,
+                    -5.024110745516051704e-03,
+                    -1.642860423419652261e-04,
+                    1.011792892256958577e-04,
+                    -5.902237032851650630e-06,
+                    3.520385292366049468e-01,
+                    -3.599619093977864809e-02,
+                    -1.050586739210998023e-03,
+                    9.784837539753422735e-04,
+                    -1.066187407206570670e-04,
+                    -6.052991441884039902e-06,
+                    1.231469418062474341e00,
+                    4.163833406830096812e-02,
+                    3.464637544942418459e-03,
+                    -4.743218246565151001e-04,
+                    -1.164951133813105271e-04,
+                    2.473911917278243621e-05,
+                    -1.487011276970676033e00,
+                    -1.581692351651968476e-01,
+                    1.650063818395723983e-02,
+                    1.331001312464952355e-04,
+                    -2.118074389246019866e-04,
+                    9.192428068946771109e-06,
+                    -2.767384059577842614e-01,
+                    9.734070719609828892e-02,
+                    -2.332885405321092481e-03,
+                    -7.070743922828596519e-04,
+                    2.373777250910882265e-05,
+                    1.127700884024945933e-05,
+                    -1.474127207030835107e00,
+                    -6.275096341939470634e-02,
+                    4.774294999622533293e-03,
+                    8.321347296773265077e-05,
+                    -1.162225195759229858e-05,
+                    -1.468175407624093560e-05,
+                    6.349609644870094494e-01,
+                    1.693741975839754832e-03,
+                    6.333434667015966531e-03,
+                    -2.275719866012916918e-04,
+                    -1.766077012712487378e-04,
+                    2.919052022666632077e-05,
+                    -1.323047593610823247e00,
+                    -5.371728693515605280e-02,
+                    1.252934109528984138e-02,
+                    -7.389107006611626187e-04,
+                    -1.322992615601379437e-04,
+                    3.689337377145077536e-05,
+                    -9.928181153524118230e-01,
+                    6.828176580261838269e-02,
+                    -5.028978678356570489e-03,
+                    -1.602449667799085492e-04,
+                    1.004819833385002965e-04,
+                    -7.012859043909368637e-06,
+                    3.516784632459502014e-01,
+                    -3.601690955621394963e-02,
+                    -1.021296258318379370e-03,
+                    9.742140050919662845e-04,
+                    -1.068837890347894775e-04,
+                    3.261791903209577241e-07,
+                    1.231886147391427544e00,
+                    4.170748405790913882e-02,
+                    3.450338240560582581e-03,
+                    -4.789562532735843967e-04,
+                    -1.153902983973557932e-04,
+                    2.856018069496295048e-05,
+                    -1.488591319127526624e00,
+                    -1.578391833182464787e-01,
+                    1.650450419566778376e-02,
+                    1.246407552546250339e-04,
+                    -2.115332183818513349e-04,
+                    3.149345367837511192e-05,
+                    -2.757652328811996956e-01,
+                    9.729383746118988596e-02,
+                    -2.354083281534554220e-03,
+                    -7.061133365182417328e-04,
+                    2.418809213597686327e-05,
+                    1.280494807360028992e-05,
+                    -1.474754239152433311e00,
+                    -6.265545260258377491e-02,
+                    4.776784283590801948e-03,
+                    8.273687806363864625e-05,
+                    -1.229952261449745124e-05,
+                    3.204146150058887708e-06,
+                    6.349785350208994039e-01,
+                    1.820341692612803541e-03,
+                    6.326501834700739083e-03,
+                    -2.346100929840904846e-04,
+                    -1.748840426396014729e-04,
+                    1.130785525935554482e-05,
+                    -1.323583514286295282e00,
+                    -5.346692231381247606e-02,
+                    1.250709476370755191e-02,
+                    -7.441705970339035966e-04,
+                    -1.303302437099287372e-04,
+                    7.935577538626925858e-06,
+                    -9.921358007514943234e-01,
+                    6.818113855713830995e-02,
+                    -5.033725808341922223e-03,
+                    -1.562353718150353687e-04,
+                    1.001568149392305130e-04,
+                    -2.302258383924021595e-05,
+                    3.513181929939074299e-01,
+                    -3.603704364469759169e-02,
+                    -9.921339651685744804e-04,
+                    9.699384566370250092e-04,
+                    -1.069081013817698415e-04,
+                    -2.744679484186812129e-06,
+                    1.232303566785723392e00,
+                    4.177634667571154814e-02,
+                    3.435900604437185177e-03,
+                    -4.835440426346156498e-04,
+                    -1.140781768005934266e-04,
+                    2.411509316948267986e-05,
+                    -1.490168060387760951e00,
+                    -1.575090566866652331e-01,
+                    1.650811681325956015e-02,
+                    1.162064642248029450e-04,
+                    -2.100324946396962247e-04,
+                    4.868837971279583202e-06,
+                    -2.747925306207861240e-01,
+                    9.724654405895133413e-02,
+                    -2.375252040655950400e-03,
+                    -7.051355614741510987e-04,
+                    2.505903781065493165e-05,
+                    -2.569082101323676566e-06,
+                    -1.475380315917416585e00,
+                    -6.255989214488603956e-02,
+                    4.779259042312647421e-03,
+                    8.224491253736542200e-05,
+                    -1.205054378062991984e-05,
+                    -1.594987943813344381e-05,
+                    6.349973708516511994e-01,
+                    1.946800647308156995e-03,
+                    6.319358714566076195e-03,
+                    -2.415904693897710526e-04,
+                    -1.741570105122868483e-04,
+                    3.342152683043006766e-05,
+                    -1.324116933545430141e00,
+                    -5.321700419064152865e-02,
+                    1.248469152702344660e-02,
+                    -7.493727578058629766e-04,
+                    -1.295525827398787404e-04,
+                    2.659942231629285135e-05,
+                    -9.914544928937398804e-01,
+                    6.808041756983601589e-02,
+                    -5.038353005641925050e-03,
+                    -1.522500103683389601e-04,
+                    9.911425811568465554e-05,
+                    -1.035676665958809070e-05,
+                    3.509577243129330393e-01,
+                    -3.605659577023319351e-02,
+                    -9.630999837076988784e-04,
+                    9.656594578503095369e-04,
+                    -1.070158919994286978e-04,
+                    -2.281503112307771063e-06,
+                    1.232721673357858538e00,
+                    4.184491916948063911e-02,
+                    3.421326077437690516e-03,
+                    -4.880823132679394552e-04,
+                    -1.129872290747681817e-04,
+                    2.854952342195995698e-05,
+                    -1.491741500028839651e00,
+                    -1.571788603283475749e-01,
+                    1.651147703627379656e-02,
+                    1.078118218043548068e-04,
+                    -2.094656285123614196e-04,
+                    1.573608604543182341e-05,
+                    -2.738203034102859035e-01,
+                    9.719882757757769554e-02,
+                    -2.396391097750961291e-03,
+                    -7.041328812172977002e-04,
+                    2.511128111671661627e-05,
+                    1.472819566023977703e-05,
+                    -1.476005436830838402e00,
+                    -6.246428233956573262e-02,
+                    4.781718999863710830e-03,
+                    8.175246233396933941e-05,
+                    -1.310850420537104008e-05,
+                    1.717274673157189222e-05,
+                    6.350174705506670403e-01,
+                    2.073114649501703322e-03,
+                    6.312006840494438151e-03,
+                    -2.485262001215581039e-04,
+                    -1.724445833892894095e-04,
+                    1.623821996891234705e-05,
+                    -1.324647855868849478e00,
+                    -5.296753568880858964e-02,
+                    1.246213287875118370e-02,
+                    -7.545274547770323926e-04,
+                    -1.284298383236558551e-04,
+                    3.142127009671183137e-05,
+                    -9.907741927046019859e-01,
+                    6.797960523066012839e-02,
+                    -5.042861140826992473e-03,
+                    -1.482946605870891395e-04,
+                    9.821987974303589589e-05,
+                    -3.593831829470692349e-06,
+                    3.505970630098214080e-01,
+                    -3.607556850024738748e-02,
+                    -9.341944322877257512e-04,
+                    9.613773761737330267e-04,
+                    -1.072343182304808093e-04,
+                    2.791451096706449119e-06,
+                    1.233140464192951757e00,
+                    4.191319881581374862e-02,
+                    3.406616101162745613e-03,
+                    -4.925758895926437772e-04,
+                    -1.113902906060245713e-04,
+                    1.275308331152581608e-05,
+                    -1.493311637378700762e00,
+                    -1.568485992811522733e-01,
+                    1.651458586873823589e-02,
+                    9.944841367174414462e-05,
+                    -2.085492230796830474e-04,
+                    1.276456024245067926e-05,
+                    -2.728485554775001987e-01,
+                    9.715068861693920699e-02,
+                    -2.417499870240937074e-03,
+                    -7.031148500958378164e-04,
+                    2.576543833825076558e-05,
+                    7.841889896124507091e-06,
+                    -1.476629601400710978e00,
+                    -6.236862348540499201e-02,
+                    4.784163880393361643e-03,
+                    8.124213252544174404e-05,
+                    -1.286332078849730127e-05,
+                    -1.821996546344873330e-06,
+                    6.350388326475970846e-01,
+                    2.199279539485121671e-03,
+                    6.304447750121061969e-03,
+                    -2.554047701160370044e-04,
+                    -1.716061813901302753e-04,
+                    3.413524324276134592e-05,
+                    -1.325176285768258300e00,
+                    -5.271851990161838253e-02,
+                    1.243942031140890699e-02,
+                    -7.596346042592860793e-04,
+                    -1.269803855069738714e-04,
+                    2.314478643438959578e-05,
+                    -9.900949010857222898e-01,
+                    6.787870391214460841e-02,
+                    -5.047251084767826433e-03,
+                    -1.443753107913585767e-04,
+                    9.837034053479728221e-05,
+                    -3.865274593462701621e-05,
+                    3.502362148656810170e-01,
+                    -3.609396440447816545e-02,
+                    -9.054174237006253068e-04,
+                    9.570894530963515055e-04,
+                    -1.071221722792567601e-04,
+                    -5.180134097885568801e-06,
+                    1.233559936349031494e00,
+                    4.198118292014653419e-02,
+                    3.391772117805412056e-03,
+                    -4.970162819604460663e-04,
+                    -1.105584293158747960e-04,
+                    2.757032189173095048e-05,
+                    -1.494878471815561216e00,
+                    -1.565182785628131401e-01,
+                    1.651744431908664865e-02,
+                    9.112268062696188113e-05,
+                    -2.082277461664644284e-04,
+                    3.370820636496137736e-05,
+                    -2.718772910441742408e-01,
+                    9.710212778853387350e-02,
+                    -2.438577777940475859e-03,
+                    -7.020756635958485484e-04,
+                    2.613933618298708639e-05,
+                    1.211520684095310762e-05,
+                    -1.477252809138063672e00,
+                    -6.227291588670166161e-02,
+                    4.786593408182711167e-03,
+                    8.072392747742672100e-05,
+                    -1.281499371544444526e-05,
+                    -1.293175202324119235e-05,
+                    6.350614556306495295e-01,
+                    2.325291188338546311e-03,
+                    6.296682984661446623e-03,
+                    -2.622362895631248896e-04,
+                    -1.701076322674243866e-04,
+                    2.573454296903621253e-05,
+                    -1.325702227786145437e00,
+                    -5.246995989253622206e-02,
+                    1.241655531642829255e-02,
+                    -7.646904682589584622e-04,
+                    -1.257704658362481128e-04,
+                    2.439373356208127567e-05,
+                    -9.894166189151047952e-01,
+                    6.777771596940393439e-02,
+                    -5.051523708536139086e-03,
+                    -1.404733355821404265e-04,
+                    9.677082285072928253e-05,
+                    -3.720510878458014501e-06,
+                    3.498751856359115786e-01,
+                    -3.611178605486395354e-02,
+                    -8.767690652124425499e-04,
+                    9.527998576480508275e-04,
+                    -1.072771816869139909e-04,
+                    -2.281376475091892258e-06,
+                    1.233980086857325631e00,
+                    4.204886881676297983e-02,
+                    3.376795570009583514e-03,
+                    -5.014114486109571937e-04,
+                    -1.092957353261917852e-04,
+                    2.516456964431257380e-05,
+                    -1.496442002767713664e00,
+                    -1.561879031708521548e-01,
+                    1.652005340007862977e-02,
+                    8.282284133744905071e-05,
+                    -2.067123325224875000e-04,
+                    7.057486539657783089e-06,
+                    -2.709065143258797548e-01,
+                    9.705314571543909030e-02,
+                    -2.459624243094573216e-03,
+                    -7.010187162791577066e-04,
+                    2.672975399789282626e-05,
+                    7.629793933874534523e-06,
+                    -1.477875059556995385e00,
+                    -6.217715985326619649e-02,
+                    4.789007307701962507e-03,
+                    8.019935829649041371e-05,
+                    -1.318861260046749971e-05,
+                    -7.150339348059032240e-06,
+                    6.350853379468965887e-01,
+                    2.451145498001100487e-03,
+                    6.288714088740080324e-03,
+                    -2.690159202421790068e-04,
+                    -1.686584359429067433e-04,
+                    1.941481480743946700e-05,
+                    -1.326225686495484890e00,
+                    -5.222185869521017709e-02,
+                    1.239353938406437261e-02,
+                    -7.696964132049412353e-04,
+                    -1.246012242240120604e-04,
+                    2.724071141974432252e-05,
+                    -9.887393470472876089e-01,
+                    6.767664374012982709e-02,
+                    -5.055679883306329545e-03,
+                    -1.366074591188833347e-04,
+                    9.623033677044332457e-05,
+                    -1.113456896173822779e-05,
+                    3.495139810501832756e-01,
+                    -3.612903602543367232e-02,
+                    -8.482494585971035728e-04,
+                    9.485064841097947883e-04,
+                    -1.073561607316583907e-04,
+                    -2.239996380309942211e-06,
+                    1.234400912722548371e00,
+                    4.211625386880359784e-02,
+                    3.361687900729734210e-03,
+                    -5.057597926077623488e-04,
+                    -1.078411892315765344e-04,
+                    1.508800592977199686e-05,
+                    -1.498002229713325750e00,
+                    -1.558574780824932282e-01,
+                    1.652241412871961052e-02,
+                    7.456368677257522147e-05,
+                    -2.062001731191939454e-04,
+                    2.069621557469772063e-05,
+                    -2.699362295319003291e-01,
+                    9.700374303226286243e-02,
+                    -2.480638690415259105e-03,
+                    -6.999405672986690023e-04,
+                    2.700789474676622474e-05,
+                    1.556143061449123430e-05,
+                    -1.478496352174730522e00,
+                    -6.208135570041733303e-02,
+                    4.791405303667145565e-03,
+                    7.966538051836852740e-05,
+                    -1.352687841609079228e-05,
+                    -2.789411930543395566e-06,
+                    6.351104780025849106e-01,
+                    2.576838401336829787e-03,
+                    6.280542610220480118e-03,
+                    -2.757414391158645754e-04,
+                    -1.675762649448408429e-04,
+                    2.787462665161048641e-05,
+                    -1.326746666499438287e00,
+                    -5.197421931349595348e-02,
+                    1.237037400330611749e-02,
+                    -7.746541492504023475e-04,
+                    -1.232228491818352083e-04,
+                    2.166599538617633252e-05,
+                    -9.880630863135209108e-01,
+                    6.757548954459043078e-02,
+                    -5.059720480258220535e-03,
+                    -1.327693574508429343e-04,
+                    9.550030312894054513e-05,
+                    -1.096549240339310371e-05,
+                    3.491526068124157778e-01,
+                    -3.614571689219699124e-02,
+                    -8.198587001702131727e-04,
+                    9.442100079790295610e-04,
+                    -1.074330339280879455e-04,
+                    -2.103241190440061311e-06,
+                    1.234822410923189784e00,
+                    4.218333546826981417e-02,
+                    3.346450553092000530e-03,
+                    -5.100549148199152614e-04,
+                    -1.071543306169886722e-04,
+                    3.572075491055831030e-05,
+                    -1.499559152180234056e00,
+                    -1.555270082545787691e-01,
+                    1.652452752618108200e-02,
+                    6.633607063542407416e-05,
+                    -2.052990867644106118e-04,
+                    1.891505702101457936e-05,
+                    -2.689664408651156746e-01,
+                    9.695392038509384469e-02,
+                    -2.501620547117759490e-03,
+                    -6.988464710389351081e-04,
+                    2.774961528830105395e-05,
+                    4.843681010028069226e-06,
+                    -1.479116686511674494e00,
+                    -6.198550374897651011e-02,
+                    4.793787121096219732e-03,
+                    7.912045955652986253e-05,
+                    -1.359696279035538403e-05,
+                    -9.132339849453571562e-06,
+                    6.351368741634448867e-01,
+                    2.702365862198193025e-03,
+                    6.272170100036473551e-03,
+                    -2.824171711189519380e-04,
+                    -1.661976899287730559e-04,
+                    2.457347650017094835e-05,
+                    -1.327265172431057128e00,
+                    -5.172704472148267896e-02,
+                    1.234706066178771662e-02,
+                    -7.795630288411945592e-04,
+                    -1.217395799935142969e-04,
+                    1.184741714306808905e-05,
+                    -9.873878375219384829e-01,
+                    6.747425568563097942e-02,
+                    -5.063646370480812467e-03,
+                    -1.289626891970745083e-04,
+                    9.513074838211379970e-05,
+                    -2.521433322545949321e-05,
+                    3.487910686007592576e-01,
+                    -3.616183123303555458e-02,
+                    -7.915968808226425679e-04,
+                    9.399119246579864433e-04,
+                    -1.077055728285351480e-04,
+                    6.031191175422362627e-06,
+                    1.235244578411804905e00,
+                    4.225011103602600848e-02,
+                    3.331084970256580589e-03,
+                    -5.143079026275864784e-04,
+                    -1.055716785023949844e-04,
+                    2.051193936812822612e-05,
+                    -1.501112769745742259e00,
+                    -1.551964986234863897e-01,
+                    1.652639461772111712e-02,
+                    5.814089462644928566e-05,
+                    -2.041249358339155683e-04,
+                    6.311073191969795411e-06,
+                    -2.679971525218879380e-01,
+                    9.690367843145115956e-02,
+                    -2.522569242956208650e-03,
+                    -6.977319783847560700e-04,
+                    2.827424678587480721e-05,
+                    2.739673941330651616e-06,
+                    -1.479736062091468574e00,
+                    -6.188960432526132566e-02,
+                    4.796152485364500034e-03,
+                    7.856828747830194362e-05,
+                    -1.395147193446202365e-05,
+                    -4.087221013031299888e-06,
+                    6.351645247550001816e-01,
+                    2.827723875485507743e-03,
+                    6.263598112024793517e-03,
+                    -2.890409134869928735e-04,
+                    -1.648390823803598971e-04,
+                    2.215887759642637032e-05,
+                    -1.327781208952985015e00,
+                    -5.148033786352124164e-02,
+                    1.232360084570068709e-02,
+                    -7.844171563535663055e-04,
+                    -1.210428935521009746e-04,
+                    3.344327592646507844e-05,
+                    -9.867136014577331249e-01,
+                    6.737294444867666932e-02,
+                    -5.067458424877044516e-03,
+                    -1.251812701937470213e-04,
+                    9.419473244264059593e-05,
+                    -1.679002076268449654e-05,
+                    3.484293720675762929e-01,
+                    -3.617738162759492893e-02,
+                    -7.634640860539731316e-04,
+                    9.356082122653546981e-04,
+                    -1.075431084112703954e-04,
+                    -3.044614041061100766e-06,
+                    1.235667412115300623e00,
+                    4.231657802179918798e-02,
+                    3.315592595281378029e-03,
+                    -5.185116053649769336e-04,
+                    -1.041674655671950871e-04,
+                    1.242766263135090892e-05,
+                    -1.502663082036415076e00,
+                    -1.548659541050484978e-01,
+                    1.652801643260504508e-02,
+                    4.998556989557471122e-05,
+                    -2.037688261998792680e-04,
+                    2.657243869390409541e-05,
+                    -2.670283686919466826e-01,
+                    9.685301784023310490e-02,
+                    -2.543484210258855835e-03,
+                    -6.965966582328896994e-04,
+                    2.850491087748043708e-05,
+                    1.232179636112698650e-05,
+                    -1.480354478441044286e00,
+                    -6.179365776107784841e-02,
+                    4.798501122259496952e-03,
+                    7.800586916120723585e-05,
+                    -1.413851691566035862e-05,
+                    -5.727587674967719880e-06,
+                    6.351934280628791507e-01,
+                    2.952908467203564646e-03,
+                    6.254828202758994093e-03,
+                    -2.956111985445306826e-04,
+                    -1.636502852942454153e-04,
+                    2.616921494951480123e-05,
+                    -1.328294780757159899e00,
+                    -5.123410165425365537e-02,
+                    1.229999603970671068e-02,
+                    -7.892274520450543677e-04,
+                    -1.195721301312790567e-04,
+                    2.454197033093738297e-05,
+                    -9.860403788833298488e-01,
+                    6.727155810173718331e-02,
+                    -5.071157514069617352e-03,
+                    -1.214296539729165295e-04,
+                    9.340570341953608358e-05,
+                    -1.444050153586573228e-05,
+                    3.480675228394242149e-01,
+                    -3.619237065717702262e-02,
+                    -7.354603960058733389e-04,
+                    9.313051737393654526e-04,
+                    -1.076930273455606579e-04,
+                    -7.696053039474192446e-07,
+                    1.236090908935226107e00,
+                    4.238273390417521269e-02,
+                    3.299974870987111650e-03,
+                    -5.226642260988254756e-04,
+                    -1.032474625011560351e-04,
+                    2.396475265799989632e-05,
+                    -1.504210088727871764e00,
+                    -1.545353795944727493e-01,
+                    1.652939400402650763e-02,
+                    4.186078937618800693e-05,
+                    -2.027012231708198600e-04,
+                    1.761148452766873776e-05,
+                    -2.660600935582757565e-01,
+                    9.680193929166537592e-02,
+                    -2.564364883962782712e-03,
+                    -6.954454205710857090e-04,
+                    2.907017700829073683e-05,
+                    9.120785771591908463e-06,
+                    -1.480971935090678926e00,
+                    -6.169766439371183325e-02,
+                    4.800832758035045861e-03,
+                    7.743502257440657043e-05,
+                    -1.440171540732098418e-05,
+                    -4.489324897938611976e-06,
+                    6.355509554770921721e-01,
+                    4.194364255265300989e-03,
+                    6.156587518227093006e-03,
+                    -3.584539136959086518e-04,
+                    -1.505562336471176987e-04,
+                    2.631189526673375584e-05,
+                    -1.333295991901433553e00,
+                    -4.879824528740911438e-02,
+                    1.205629889598585497e-02,
+                    -8.346035033896359156e-04,
+                    -1.072962342948566929e-04,
+                    2.412331753624817981e-05,
+                    -9.793640468817854661e-01,
+                    6.625405011186732973e-02,
+                    -5.102126473064734317e-03,
+                    -8.551069374443776396e-05,
+                    8.618032279329005427e-05,
+                    -1.422030758858379208e-05,
+                    3.444418516979214084e-01,
+                    -3.631195473807800889e-02,
+                    -4.625381215785304145e-04,
+                    8.881537622047225473e-04,
+                    -1.080757789189670570e-04,
+                    5.820590714360855199e-08,
+                    1.240361649325028681e00,
+                    4.302664794411619614e-02,
+                    3.137220402938139478e-03,
+                    -5.615677039256951981e-04,
+                    -9.125763978623760322e-05,
+                    2.367398552885374808e-05,
+                    -1.519498310980496925e00,
+                    -1.512290469691385253e-01,
+                    1.652996628226939199e-02,
+                    -3.745688059096337011e-05,
+                    -1.938906911473592626e-04,
+                    1.811217640451412989e-05,
+                    -2.564062357251438717e-01,
+                    9.626832379335603651e-02,
+                    -2.771163091665611831e-03,
+                    -6.829069315554202020e-04,
+                    3.363238372709415958e-05,
+                    8.623099725596635004e-06,
+                    -1.487093617252511990e00,
+                    -6.073523464295225993e-02,
+                    4.823154268625621383e-03,
+                    7.122599345182346051e-05,
+                    -1.664931178025436733e-05,
+                    -4.312450972708557703e-06,
+                ],
+                dtype=dtype,
+            )
+            .reshape([8, 132])
+            .to(device=env.DEVICE)
+        )
+        self.table_info_tensor = paddle.to_tensor(
+            [0, 0.2, 0.4, 0.01, 0.1, -1], dtype=dtype, device="cpu"
+        )
+        self.em_tensor = (
+            paddle.to_tensor(
+                [
+                    0.0343909,
+                    0.11357423,
+                    0.0858676,
+                    0.19337772,
+                    0.1935728,
+                    0.0477744,
+                    0.05845198,
+                    0.19080509,
+                    0.16111261,
+                    0.07179262,
+                    0.10078013,
+                    0.04640909,
+                    0.10433399,
+                    0.15650861,
+                    0.17527857,
+                    0.04249097,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4])
+            .to(device=env.DEVICE)
+        )
+        self.table_info_tensor.stop_gradient = not True
+        self.table_tensor.stop_gradient = not True
+        self.em_tensor.stop_gradient = not True
+        self.last_layer_size = 8
+        self.nloc = 4
+        self.nnei = 4
+        # forward test
+        self.expected_descriptor_tensor = (
+            paddle.to_tensor(
+                [
+                    0.6348771631809248,
+                    -1.3194691113291661,
+                    -0.9973196209241018,
+                    0.354036826929785,
+                    1.229164742167305,
+                    -1.4781646269685296,
+                    -0.2821585274143141,
+                    -1.4706229329558798,
+                    0.634985122151867,
+                    -1.3237744576184003,
+                    -0.9918921699941985,
+                    0.3511893756810418,
+                    1.2324529289292472,
+                    -1.4907308230849954,
+                    -0.27444497948193763,
+                    -1.475603858301356,
+                    0.6349382934267259,
+                    -1.3222859349253087,
+                    -0.9937841045345152,
+                    0.35218726188637794,
+                    1.2312974110088875,
+                    -1.4863573766557947,
+                    -0.27714069648435136,
+                    -1.4738678134261312,
+                    0.635174147464672,
+                    -1.3279549545501694,
+                    -0.9864860923352163,
+                    0.34830716632779013,
+                    1.235810383474792,
+                    -1.5031859872970013,
+                    -0.26670125536196,
+                    -1.48056314536567,
+                    0.6351747075878561,
+                    -1.3279649806303675,
+                    -0.9864729560930998,
+                    0.3483001078156069,
+                    1.2358186430848,
+                    -1.5032161761365208,
+                    -0.26668236472889295,
+                    -1.4805751935655516,
+                    0.6348898134171228,
+                    -1.320207897862343,
+                    -0.996397864565745,
+                    0.35355654279185456,
+                    1.229717427214055,
+                    -1.4803030761521567,
+                    -0.28085278195255303,
+                    -1.4714690036779643,
+                    0.6349015416696653,
+                    -1.3207940654749135,
+                    -0.9956637569426188,
+                    0.3531730613337772,
+                    1.2301592741196672,
+                    -1.482004932451056,
+                    -0.2798116121790224,
+                    -1.472142789257649,
+                    0.63516680538694,
+                    -1.3278226472708177,
+                    -0.9866593634585263,
+                    0.34840024552503546,
+                    1.235701482917897,
+                    -1.5027877523544166,
+                    -0.2669503949447871,
+                    -1.4804042247866864,
+                    0.6350880729003239,
+                    -1.3262837737167732,
+                    -0.9886640555953151,
+                    0.34947378247466837,
+                    1.2344477758485055,
+                    -1.4981756178487995,
+                    -0.26982830526911433,
+                    -1.4785654185805013,
+                    0.6349182326958803,
+                    -1.3215223841369783,
+                    -0.9947481627455212,
+                    0.3526935713672062,
+                    1.230712444499081,
+                    -1.4841259603235413,
+                    -0.2785114943045205,
+                    -1.4729830941778024,
+                    0.6349622896803752,
+                    -1.3230894924528385,
+                    -0.9927648497591829,
+                    0.35165036475319683,
+                    1.2319186867506238,
+                    -1.4887144441648619,
+                    -0.27568933247281807,
+                    -1.4748031156434684,
+                    0.63488841840565,
+                    -1.3201327386056192,
+                    -0.9964918149720808,
+                    0.3536055582326517,
+                    1.2296609874447608,
+                    -1.4800851938933008,
+                    -0.28098595018607436,
+                    -1.4713827704069893,
+                    0.6349684240930827,
+                    -1.3232801685109328,
+                    -0.9925222773369173,
+                    0.3515223472158862,
+                    1.232066971980551,
+                    -1.4892750825477157,
+                    -0.27534360663790663,
+                    -1.4750256975261462,
+                    0.6350768567116333,
+                    -1.326043208512698,
+                    -0.9889756942263948,
+                    0.3496401112248936,
+                    1.234253909454779,
+                    -1.4974578690711353,
+                    -0.2702749376389924,
+                    -1.478279543547352,
+                    0.6351242549804302,
+                    -1.327020671488853,
+                    -0.9877065253616449,
+                    0.34896178641041964,
+                    1.2350451717803512,
+                    -1.5003796519427108,
+                    -0.2684547326157591,
+                    -1.4794437477498377,
+                    0.6348845468620932,
+                    -1.319916787317926,
+                    -0.9967615341650043,
+                    0.3537461965952468,
+                    1.2294990915247286,
+                    -1.4794595816676606,
+                    -0.2813681581698538,
+                    -1.4711352025108961,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4, 8])
+            .to(device=env.DEVICE)
+        )
+        # backward test
+        self.expected_dy_dem = (
+            paddle.to_tensor(
+                [
+                    -0.10588345474250505,
+                    -0.1002972786439324,
+                    -0.10224731275660418,
+                    -0.0947116532720767,
+                    -0.09469805979213963,
+                    -0.10493653217842225,
+                    -0.10418182788008934,
+                    -0.09489094713093613,
+                    -0.09696412406683491,
+                    -0.10323988208957913,
+                    -0.10119709956091572,
+                    -0.10503308350390633,
+                    -0.10094704040733082,
+                    -0.09728619911848688,
+                    -0.09597416541247616,
+                    -0.10531022473788323,
+                ],
+                dtype=dtype,
+            )
+            .reshape([4, 4])
+            .to(device=env.DEVICE)
+        )
+
+    def test_forward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_r(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the shape
+        self.assertEqual(descriptor_tensor.shape, self.expected_descriptor_tensor.shape)
+
+        # Check the values
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+    def test_backward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_r(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the forward
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        # Create a loss and perform backward
+        loss = descriptor_tensor.sum()
+        loss.backward()
+
+        # Check gradients
+        self.assertIsNotNone(self.em_tensor.grad)
+
+        # Check the shapes of the gradients
+        self.assertEqual(self.em_tensor.grad.shape, self.expected_dy_dem.shape)
+
+        # Check the values of the gradients
+        np.testing.assert_allclose(
+            self.em_tensor.grad.numpy(),
+            self.expected_dy_dem.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_tabulate_fusion_se_t.py b/source/tests/pd/test_tabulate_fusion_se_t.py
new file mode 100644
index 0000000000..c9f182df45
--- /dev/null
+++ b/source/tests/pd/test_tabulate_fusion_se_t.py
@@ -0,0 +1,1769 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ..consistent.common import (
+    parameterized,
+)
+
+
+@parameterized((paddle.float64, paddle.float32))
+@unittest.skipIf(not ENABLE_CUSTOMIZED_OP, "PyTorch customized OPs are not built")
+class TestTabulateFusionSeTOp(unittest.TestCase):
+    def setUp(self):
+        (dtype,) = self.param
+        if dtype == paddle.float64:
+            self.prec = 1e-10
+        elif dtype == paddle.float32:
+            # JZ: not sure the reason, but 1e-5 cannot pass the grad test
+            self.prec = 1e-3
+        self.table_tensor = (
+            paddle.to_tensor(
+                [
+                    -1.0600000163027882e02,
+                    7.7059358807135015e02,
+                    -5.6954714749735385e03,
+                    1.2167808756610991e03,
+                    -7.6199102434332218e01,
+                    1.0706136029373441e00,
+                    -1.0600000164528124e02,
+                    7.7059358630452323e02,
+                    -5.6954715659539552e03,
+                    1.2167808757436076e03,
+                    -7.6199099707724926e01,
+                    1.0706134206080884e00,
+                    -1.0600000163027882e02,
+                    7.7059358807135015e02,
+                    -5.6954714749735385e03,
+                    1.2167808756610991e03,
+                    -7.6199102434332218e01,
+                    1.0706136029373441e00,
+                    -1.0600000164528124e02,
+                    7.7059358630452323e02,
+                    -5.6954715659539552e03,
+                    1.2167808757436076e03,
+                    -7.6199099707724926e01,
+                    1.0706134206080884e00,
+                    -9.6000006759336443e01,
+                    6.2969719646863621e02,
+                    -4.2053706363664551e03,
+                    9.0372155784831205e02,
+                    -5.7600014239472898e01,
+                    8.6528676197113796e-01,
+                    -9.6000006828502180e01,
+                    6.2969718981238339e02,
+                    -4.2053709121998018e03,
+                    9.0372156236848912e02,
+                    -5.7600006817493266e01,
+                    8.6528625106787871e-01,
+                    -9.6000006759336443e01,
+                    6.2969719646863621e02,
+                    -4.2053706363664551e03,
+                    9.0372155784831205e02,
+                    -5.7600014239472898e01,
+                    8.6528676197113796e-01,
+                    -9.6000006828502180e01,
+                    6.2969718981238339e02,
+                    -4.2053709121998018e03,
+                    9.0372156236848912e02,
+                    -5.7600006817493266e01,
+                    8.6528625106787871e-01,
+                    -8.6000028021606425e01,
+                    5.0303296429845562e02,
+                    -3.0008648248894533e03,
+                    6.4939597734382562e02,
+                    -4.2250984019314707e01,
+                    6.8180015607155764e-01,
+                    -8.6000028340480625e01,
+                    5.0303293978396903e02,
+                    -3.0008656209622986e03,
+                    6.4939600529391078e02,
+                    -4.2250965541906716e01,
+                    6.8179882734268982e-01,
+                    -8.6000028021606425e01,
+                    5.0303296429845562e02,
+                    -3.0008648248894533e03,
+                    6.4939597734382562e02,
+                    -4.2250984019314707e01,
+                    6.8180015607155764e-01,
+                    -8.6000028340480625e01,
+                    5.0303293978396903e02,
+                    -3.0008656209622986e03,
+                    6.4939600529353049e02,
+                    -4.2250965541830588e01,
+                    6.8179882733888086e-01,
+                    -7.6000116148038558e01,
+                    3.9060139597613619e02,
+                    -2.0515743554479322e03,
+                    4.4772754091167945e02,
+                    -2.9848087537832814e01,
+                    5.2014755686537917e-01,
+                    -7.6000117618125429e01,
+                    3.9060130821883052e02,
+                    -2.0515765138621105e03,
+                    4.4772766653712006e02,
+                    -2.9848047259266409e01,
+                    5.2014443989116910e-01,
+                    -7.6000116148038558e01,
+                    3.9060139597613619e02,
+                    -2.0515743554479322e03,
+                    4.4772754091167945e02,
+                    -2.9848087537832814e01,
+                    5.2014755686537917e-01,
+                    -7.6000117618125742e01,
+                    3.9060130821877993e02,
+                    -2.0515765138659344e03,
+                    4.4772766652483722e02,
+                    -2.9848047256692499e01,
+                    5.2014443976043645e-01,
+                    -6.6000481290731443e01,
+                    2.9240425245900917e02,
+                    -1.3271250821434478e03,
+                    2.9263955624337893e02,
+                    -2.0087224005740719e01,
+                    3.8031147992206349e-01,
+                    -6.6000488067863742e01,
+                    2.9240394960550276e02,
+                    -1.3271304743966571e03,
+                    2.9264002765325057e02,
+                    -2.0087154325946980e01,
+                    3.8030522013794582e-01,
+                    -6.6000481290731443e01,
+                    2.9240425245900917e02,
+                    -1.3271250821434478e03,
+                    2.9263955624337893e02,
+                    -2.0087224005740719e01,
+                    3.8031147992206349e-01,
+                    -6.6000488067883694e01,
+                    2.9240394960308691e02,
+                    -1.3271304745319526e03,
+                    2.9264002727267626e02,
+                    -2.0087154245656002e01,
+                    3.8030521605011575e-01,
+                    -5.6001992867343972e01,
+                    2.0844745574402617e02,
+                    -7.9715799906587699e02,
+                    1.7805563184427194e02,
+                    -1.2663929104029080e01,
+                    2.6224978307822894e-01,
+                    -5.6002024103130161e01,
+                    2.0844646075692629e02,
+                    -7.9717003898786652e02,
+                    1.7805715054974732e02,
+                    -1.2663864677938077e01,
+                    2.6224029170957303e-01,
+                    -5.6001992867343972e01,
+                    2.0844745574402617e02,
+                    -7.9715799906587699e02,
+                    1.7805563184427194e02,
+                    -1.2663929104029080e01,
+                    2.6224978307822894e-01,
+                    -5.6002024104383771e01,
+                    2.0844646064871867e02,
+                    -7.9717004324410516e02,
+                    1.7805714044473001e02,
+                    -1.2663862524337585e01,
+                    2.6224018166598279e-01,
+                    -4.6008230210744550e01,
+                    1.3874976550319553e02,
+                    -4.3134867537287749e02,
+                    9.7902623595157010e01,
+                    -7.2734403121911884e00,
+                    1.6589123996688057e-01,
+                    -4.6008373996710617e01,
+                    1.3874671965012058e02,
+                    -4.3137141216256458e02,
+                    9.7906861443792735e01,
+                    -7.2735856084076280e00,
+                    1.6588642735924275e-01,
+                    -4.6008230210744550e01,
+                    1.3874976550319553e02,
+                    -4.3134867537287749e02,
+                    9.7902623595157010e01,
+                    -7.2734403121911884e00,
+                    1.6589123996688057e-01,
+                    -4.6008374075307870e01,
+                    1.3874671513440606e02,
+                    -4.3137152784492957e02,
+                    9.7906652364871050e01,
+                    -7.2735401377994249e00,
+                    1.6588408717348646e-01,
+                    -3.6033642533368131e01,
+                    8.3364086172019398e01,
+                    -1.9942175516407502e02,
+                    4.6124022747838069e01,
+                    -3.6130563858549958e00,
+                    9.1249773312287188e-02,
+                    -3.6034298111245583e01,
+                    8.3355843868269616e01,
+                    -1.9945266030093268e02,
+                    4.6135000705962462e01,
+                    -3.6142786797647353e00,
+                    9.1293932043118198e-02,
+                    -3.6033642533368131e01,
+                    8.3364086172019398e01,
+                    -1.9942175516407502e02,
+                    4.6124022747838069e01,
+                    -3.6130563858549958e00,
+                    9.1249773312287188e-02,
+                    -3.6034302998781108e01,
+                    8.3355675173745269e01,
+                    -1.9945516784358935e02,
+                    4.6132303200740992e01,
+                    -3.6136582565667807e00,
+                    9.1261386291659793e-02,
+                    -2.6132076703837274e01,
+                    4.2398929436319683e01,
+                    -7.1037171119057973e01,
+                    1.3425662262407457e01,
+                    -7.5172495708992593e-01,
+                    7.7522572203268742e-03,
+                    -2.6134776894873077e01,
+                    4.2384732735328775e01,
+                    -7.1030526549717337e01,
+                    1.3431455085299461e01,
+                    -7.5302028721199155e-01,
+                    7.8186246126207160e-03,
+                    -2.6132076703837274e01,
+                    4.2398929436319683e01,
+                    -7.1037171119057973e01,
+                    1.3425662262405055e01,
+                    -7.5172495708944420e-01,
+                    7.7522572203027138e-03,
+                    -2.6135071381093578e01,
+                    4.2379566840123424e01,
+                    -7.1067162844830236e01,
+                    1.3434603316099608e01,
+                    -7.5251233833488806e-01,
+                    7.7734884077347950e-03,
+                    -2.2221480705551805e01,
+                    3.0067218434037404e01,
+                    -4.1779705297521097e01,
+                    -1.9077757705724110e02,
+                    3.6413466026808294e02,
+                    -1.6067397401486718e02,
+                    -2.2225430071703467e01,
+                    3.0060809113889512e01,
+                    -4.1712800191721314e01,
+                    -1.9084786311022177e02,
+                    3.6410062714257685e02,
+                    -1.6063028238785057e02,
+                    -2.2221480705551830e01,
+                    3.0067218434036263e01,
+                    -4.1779705297545611e01,
+                    -1.9077757705723738e02,
+                    3.6413466026815809e02,
+                    -1.6067397401492047e02,
+                    -2.2226913938674084e01,
+                    3.0042371820589185e01,
+                    -4.1801582285426832e01,
+                    -1.9048619249019526e02,
+                    3.6373874557858261e02,
+                    -1.6052358406417352e02,
+                    -2.1250858373060836e01,
+                    2.7343847665267702e01,
+                    -3.6044215009418814e01,
+                    -1.7618484800469861e02,
+                    3.3120085405644409e02,
+                    -1.4534825256321494e02,
+                    -2.1254939505030809e01,
+                    2.7342716030835884e01,
+                    -3.5955450545431681e01,
+                    -1.7635550119316844e02,
+                    3.3127447930769307e02,
+                    -1.4533876561022046e02,
+                    -2.1250858373060954e01,
+                    2.7343847665262818e01,
+                    -3.6044215009514119e01,
+                    -1.7618484800464822e02,
+                    3.3120085405666612e02,
+                    -1.4534825256338749e02,
+                    -2.1257155379297881e01,
+                    2.7317691772612619e01,
+                    -3.6063526926252166e01,
+                    -1.7588696592837897e02,
+                    3.3079005662384850e02,
+                    -1.4519086534447842e02,
+                    -2.0283472228681301e01,
+                    2.4763027042036295e01,
+                    -3.0876160316998963e01,
+                    -1.6184864900381874e02,
+                    2.9976970905591691e02,
+                    -1.3084395423768876e02,
+                    -2.0287461515322455e01,
+                    2.4769400540137131e01,
+                    -3.0762734380983186e01,
+                    -1.6214886052089241e02,
+                    2.9998995088792128e02,
+                    -1.3088331758129965e02,
+                    -2.0283472228681809e01,
+                    2.4763027042017129e01,
+                    -3.0876160317336627e01,
+                    -1.6184864900359682e02,
+                    2.9976970905662938e02,
+                    -1.3084395423826805e02,
+                    -2.0290765181946348e01,
+                    2.4735639907973120e01,
+                    -3.0892738413082597e01,
+                    -1.6154574482310053e02,
+                    2.9934595420013272e02,
+                    -1.3068028494926122e02,
+                    -1.9319499689234629e01,
+                    2.2323824431805683e01,
+                    -2.6243395369841849e01,
+                    -1.4782286378121026e02,
+                    2.6985759662396487e02,
+                    -1.1715474197881395e02,
+                    -1.9323022570439292e01,
+                    2.2340565860680357e01,
+                    -2.6102786429129356e01,
+                    -1.4828764857305418e02,
+                    2.7027298759214750e02,
+                    -1.1726163007473576e02,
+                    -1.9319499689236839e01,
+                    2.2323824431730525e01,
+                    -2.6243395371031539e01,
+                    -1.4782286378021576e02,
+                    2.6985759662609979e02,
+                    -1.1715474198068593e02,
+                    -1.9327939259284843e01,
+                    2.2295320666731183e01,
+                    -2.6257097174199931e01,
+                    -1.4751677383623073e02,
+                    2.6942341041084092e02,
+                    -1.1698575776762208e02,
+                    -1.8359079763330211e01,
+                    2.0025118950280675e01,
+                    -2.2113826757823226e01,
+                    -1.3415932552431914e02,
+                    2.4147795894487624e02,
+                    -1.0427314537549884e02,
+                    -1.8361534194530734e01,
+                    2.0055847278170305e01,
+                    -2.1944107342764479e01,
+                    -1.3482982214648752e02,
+                    2.4214772485703989e02,
+                    -1.0447085300268679e02,
+                    -1.8359079763339750e01,
+                    2.0025118949989704e01,
+                    -2.2113826761939308e01,
+                    -1.3415932552009582e02,
+                    2.4147795895089951e02,
+                    -1.0427314538136979e02,
+                    -1.8368836959765495e01,
+                    1.9995657614892380e01,
+                    -2.2124533894067383e01,
+                    -1.3385233293246981e02,
+                    2.4103659293914149e02,
+                    -1.0410011400771683e02,
+                    -1.7402299525814517e01,
+                    1.7865597763687486e01,
+                    -1.8455503416511757e01,
+                    -1.2090765118569301e02,
+                    2.1464125749038132e02,
+                    -9.2190581022134992e01,
+                    -1.7402744551259310e01,
+                    1.7914800567904472e01,
+                    -1.8255754666855470e01,
+                    -1.2183089355280822e02,
+                    2.1563582256173194e02,
+                    -9.2507405324257306e01,
+                    -1.7402299525855486e01,
+                    1.7865597762572605e01,
+                    -1.8455503430527756e01,
+                    -1.2090765116826699e02,
+                    2.1464125750558804e02,
+                    -9.2190581039770791e01,
+                    -1.7413567239985614e01,
+                    1.7835392747330133e01,
+                    -1.8463115133795956e01,
+                    -1.2060260469703572e02,
+                    2.1419685510959093e02,
+                    -9.2015134441585104e01,
+                    -1.6449179896085464e01,
+                    1.5843762224435309e01,
+                    -1.5236722252652665e01,
+                    -1.0811515163854509e02,
+                    1.8935506712501905e02,
+                    -8.0897437157402223e01,
+                    -1.6446174965543889e01,
+                    1.5916874201410112e01,
+                    -1.5007553197461570e01,
+                    -1.0934291295595986e02,
+                    1.9075532567542470e02,
+                    -8.1366596347119696e01,
+                    -1.6449179896260411e01,
+                    1.5843762220214204e01,
+                    -1.5236722299508587e01,
+                    -1.0811515156878269e02,
+                    1.8935506715588940e02,
+                    -8.0897437207525684e01,
+                    -1.6462173655481337e01,
+                    1.5813096619069219e01,
+                    -1.5241142983208677e01,
+                    -1.0781563484017332e02,
+                    1.8891289499393798e02,
+                    -8.0721658713418606e01,
+                    -1.5499661595231082e01,
+                    1.3957945516559789e01,
+                    -1.2426145992195885e01,
+                    -9.5826844741964834e01,
+                    1.6562434781973772e02,
+                    -7.0383233416004117e01,
+                    -1.5491037589250178e01,
+                    1.4061349904707843e01,
+                    -1.2170301483989650e01,
+                    -9.7412966929875139e01,
+                    1.6751874597575440e02,
+                    -7.1041920384880939e01,
+                    -1.5499661595973759e01,
+                    1.3957945500778198e01,
+                    -1.2426146145776961e01,
+                    -9.5826844470313858e01,
+                    1.6562434784656404e02,
+                    -7.0383233547510557e01,
+                    -1.5514618579274794e01,
+                    1.3927192540790591e01,
+                    -1.2427264674287118e01,
+                    -9.5537423121432880e01,
+                    1.6519113036542510e02,
+                    -7.0209783384625098e01,
+                    -1.4553592409098401e01,
+                    1.2206343505203831e01,
+                    -9.9929274597052196e00,
+                    -8.4085595900823435e01,
+                    1.4345191724964303e02,
+                    -6.0636862050381758e01,
+                    -1.4536130507533649e01,
+                    1.2347228125716077e01,
+                    -9.7159302678980044e00,
+                    -8.6081002959763751e01,
+                    1.4592996741513730e02,
+                    -6.1523840242331410e01,
+                    -1.4553592412232879e01,
+                    1.2206343446986155e01,
+                    -9.9929279524397305e00,
+                    -8.4085594870780753e01,
+                    1.4345191706222485e02,
+                    -6.0636862352071532e01,
+                    -1.4570766853404239e01,
+                    1.2175998366492486e01,
+                    -9.9905856922863112e00,
+                    -8.3812185051328299e01,
+                    1.4303633648493073e02,
+                    -6.0469165577726159e01,
+                    -1.3610717065161962e01,
+                    1.0587059629986399e01,
+                    -7.9068321681349163e00,
+                    -7.2932404423885004e01,
+                    1.2283913327111270e02,
+                    -5.1646910322317169e01,
+                    -1.3579708436673444e01,
+                    1.0773027159520954e01,
+                    -7.6175370796795425e00,
+                    -7.5376833196183071e01,
+                    1.2597958225245242e02,
+                    -5.2797863799745748e01,
+                    -1.3610717078313911e01,
+                    1.0587059418306087e01,
+                    -7.9068337121483454e00,
+                    -7.2932400620636059e01,
+                    1.2283913169238102e02,
+                    -5.1646910832841897e01,
+                    -1.3630368323321786e01,
+                    1.0557789879027116e01,
+                    -7.9007777139483810e00,
+                    -7.2682825476758552e01,
+                    1.2245259140017740e02,
+                    -5.1489446559796768e01,
+                    -1.2670671078399982e01,
+                    9.0981634949263963e00,
+                    -6.1383490362855788e00,
+                    -6.2406844162279825e01,
+                    1.0378677653422224e02,
+                    -4.3402055519687693e01,
+                    -1.2619333100308433e01,
+                    9.3364634226935799e00,
+                    -5.8491811509717584e00,
+                    -6.5316414528433455e01,
+                    1.0763857666200300e02,
+                    -4.4841832720191050e01,
+                    -1.2670671133253135e01,
+                    9.0981627374157021e00,
+                    -6.1383537481895356e00,
+                    -6.2406830503476570e01,
+                    1.0378676818216074e02,
+                    -4.3402055529436716e01,
+                    -1.2693036794620980e01,
+                    9.0708908225804148e00,
+                    -6.1281713411274001e00,
+                    -6.2191660620037396e01,
+                    1.0344456594081470e02,
+                    -4.3260806640248063e01,
+                    -1.1732979767504439e01,
+                    7.7377614739662697e00,
+                    -4.6587775146685351e00,
+                    -5.2547655563671029e01,
+                    8.6296103981829802e01,
+                    -3.5891515805495345e01,
+                    -1.1651721415208119e01,
+                    8.0340005825064456e00,
+                    -4.3852919661646119e00,
+                    -5.5898160750405737e01,
+                    9.0851291378134590e01,
+                    -3.7622755083739385e01,
+                    -1.1732979994779518e01,
+                    7.7377588120662892e00,
+                    -4.6587914600219875e00,
+                    -5.2547607987974565e01,
+                    8.6296066930227624e01,
+                    -3.5891510429190419e01,
+                    -1.1758218632638741e01,
+                    7.7137968422318544e00,
+                    -4.6438239588320966e00,
+                    -5.2381405657406454e01,
+                    8.6019170302439520e01,
+                    -3.5774653697918737e01,
+                    -1.0797063195543267e01,
+                    6.5040766534586290e00,
+                    -3.4402783696562169e00,
+                    -4.3393478931462226e01,
+                    7.0370032342568010e01,
+                    -2.9105535302381853e01,
+                    -1.0672637254876815e01,
+                    6.8603244928014488e00,
+                    -3.1995767859681346e00,
+                    -4.7101348454718874e01,
+                    7.5530774605740319e01,
+                    -3.1094453979913311e01,
+                    -1.0797064129672576e01,
+                    6.5040675030570139e00,
+                    -3.4403181344841500e00,
+                    -4.3393319126804485e01,
+                    7.0369884883020177e01,
+                    -2.9105501594155889e01,
+                    -1.0825134802124644e01,
+                    6.4853446725127366e00,
+                    -3.4195560956016346e00,
+                    -4.3296381389022351e01,
+                    7.0187483762520671e01,
+                    -2.9024415860031247e01,
+                    -9.8622468030169337e00,
+                    5.3955359781222549e00,
+                    -2.4558741324534137e00,
+                    -3.4983728078555984e01,
+                    5.6014425934291204e01,
+                    -2.3035887876475471e01,
+                    -9.6769173769353625e00,
+                    5.8079540801032961e00,
+                    -2.2635143148159220e00,
+                    -3.8890523502249145e01,
+                    6.1563046720547966e01,
+                    -2.5198820521877391e01,
+                    -9.8622505990399034e00,
+                    5.3955054149765509e00,
+                    -2.4559821583353774e00,
+                    -3.4983216045684472e01,
+                    5.6013889382190079e01,
+                    -2.3035736114340502e01,
+                    -9.8926597117464805e00,
+                    5.3849440641688187e00,
+                    -2.4279562878572039e00,
+                    -3.4983707025980287e01,
+                    5.5966629574570753e01,
+                    -2.3006306589550750e01,
+                    -8.9277749780883457e00,
+                    4.4108678323349286e00,
+                    -1.6793815271288624e00,
+                    -2.7359655656676122e01,
+                    4.3239544183593061e01,
+                    -1.7676416286664047e01,
+                    -8.6587749152265552e00,
+                    4.8674392165289442e00,
+                    -1.5450097170494306e00,
+                    -3.1230915545542118e01,
+                    4.8829474992442343e01,
+                    -1.9874755288141955e01,
+                    -8.9277901202336185e00,
+                    4.4107699183102085e00,
+                    -1.6796551456533098e00,
+                    -2.7358123514289456e01,
+                    4.3237769027728554e01,
+                    -1.7675844947587926e01,
+                    -8.9590559763951383e00,
+                    4.4128957610428623e00,
+                    -1.6423658138809611e00,
+                    -2.7493743583145054e01,
+                    4.3380518846300511e01,
+                    -1.7719639183506050e01,
+                    -7.9928164326293913e00,
+                    3.5492331091008302e00,
+                    -1.0852462622393610e00,
+                    -2.0565792757352423e01,
+                    3.2061909496398073e01,
+                    -1.3023704651715642e01,
+                    -7.6125412569887647e00,
+                    4.0287966748633526e00,
+                    -1.0084592804412351e00,
+                    -2.4116992333062022e01,
+                    3.7252797603904497e01,
+                    -1.5077495076198684e01,
+                    -7.9928747817255603e00,
+                    3.5489404571097585e00,
+                    -1.0858609980296849e00,
+                    -2.0561701094768868e01,
+                    3.2056747083970720e01,
+                    -1.3021877019728107e01,
+                    -8.0213899495838241e00,
+                    3.5708128515175943e00,
+                    -1.0368753205735253e00,
+                    -2.0877831538201836e01,
+                    3.2456559535389509e01,
+                    -1.3165540198118645e01,
+                    -7.0564174984379102e00,
+                    2.8104770395789380e00,
+                    -6.4821407306458223e-01,
+                    -1.4652118176169953e01,
+                    2.2507145963021038e01,
+                    -9.0780963613608154e00,
+                    -6.5338936679228468e00,
+                    3.2846161494194233e00,
+                    -6.1760141818709846e-01,
+                    -1.7606122820367215e01,
+                    2.6855555289500277e01,
+                    -1.0803821410528570e01,
+                    -7.0566263531717324e00,
+                    2.8097184139861691e00,
+                    -6.4925197579297411e-01,
+                    -1.4643483271177150e01,
+                    2.2495243692983838e01,
+                    -9.0734373052814821e00,
+                    -7.0742646195707266e00,
+                    2.8621047467298468e00,
+                    -5.8641470402843421e-01,
+                    -1.5178915176777426e01,
+                    2.3211717123277591e01,
+                    -9.3414295847965061e00,
+                    -6.1172231064332783e00,
+                    2.1957964102200167e00,
+                    -3.4265643705632465e-01,
+                    -9.6769153352706798e00,
+                    1.4613873405033004e01,
+                    -5.8450824172251430e00,
+                    -5.4212678780860326e00,
+                    2.6341589573018260e00,
+                    -3.4085224757280796e-01,
+                    -1.1835854891340576e01,
+                    1.7794701474942944e01,
+                    -7.1075278532253687e00,
+                    -6.1178367984533244e00,
+                    2.1945528943967396e00,
+                    -3.4261268423617658e-01,
+                    -9.6695829134679272e00,
+                    1.4600877298870854e01,
+                    -5.8381668136523013e00,
+                    -6.1072022151656586e00,
+                    2.2922503774685161e00,
+                    -2.6715334266026142e-01,
+                    -1.0408120531614587e01,
+                    1.5617405440391840e01,
+                    -6.2270636615178061e00,
+                    -5.1722074807324017e00,
+                    1.7098190643016411e00,
+                    -1.4098618492175408e-01,
+                    -5.7061337346696464e00,
+                    8.4331806866534098e00,
+                    -3.3349192888568142e00,
+                    -4.2766424379800121e00,
+                    2.0860564217794284e00,
+                    -1.5548660419053545e-01,
+                    -7.0034949575065015e00,
+                    1.0332245608764421e01,
+                    -4.0873492185766374e00,
+                    -5.1727690165421372e00,
+                    1.7132539127425084e00,
+                    -1.2776576793785877e-01,
+                    -5.7565343018918274e00,
+                    8.4941254548170697e00,
+                    -3.3479852132230872e00,
+                    -5.0998839330979591e00,
+                    1.8678855512825561e00,
+                    -5.7718910331047868e-02,
+                    -6.5095346397755423e00,
+                    9.5462002113817768e00,
+                    -3.7632628689263172e00,
+                    -4.2112469382255613e00,
+                    1.3675717927787789e00,
+                    -9.4961575783498800e-03,
+                    -2.7877417589321136e00,
+                    3.9953503912711956e00,
+                    -1.5499906707437840e00,
+                    -3.1046711877098376e00,
+                    1.6568346830533449e00,
+                    -4.5990009889900242e-02,
+                    -3.3140676307068091e00,
+                    4.7472200808709299e00,
+                    -1.8492173878772247e00,
+                    -4.1976749320353317e00,
+                    1.4246952243441517e00,
+                    8.7531923058200650e-02,
+                    -3.0996975434049761e00,
+                    4.4668738099197531e00,
+                    -1.7103055321708385e00,
+                    -4.0163145894665320e00,
+                    1.5923303121893606e00,
+                    5.8249749369824022e-02,
+                    -3.3748048713195491e00,
+                    4.7925769874900315e00,
+                    -1.8598420111853879e00,
+                    -3.1955533414298376e00,
+                    1.2168024121915868e00,
+                    9.9474205814620603e-02,
+                    -8.6811124876189694e-01,
+                    1.1994338853723501e00,
+                    -4.4837238870567747e-01,
+                    -1.9098914522594992e00,
+                    1.3654451552507061e00,
+                    2.9537044429980407e-03,
+                    -9.3701125207094127e-01,
+                    1.2575365835116745e00,
+                    -4.7248060681970733e-01,
+                    -3.0285770502890443e00,
+                    1.6166340190704305e00,
+                    4.8662683065338386e-01,
+                    -1.2308607057515726e00,
+                    1.6114560066217587e00,
+                    -6.5896729332189652e-01,
+                    -2.8078044229222514e00,
+                    1.4555130910035559e00,
+                    9.0876948497501955e-02,
+                    -1.0566809618626720e00,
+                    1.3938154223720176e00,
+                    -5.2279617091852160e-01,
+                    -1.9963264755188566e00,
+                    1.3672906754961440e00,
+                    2.0801988470625002e-01,
+                    2.0083818728351077e-02,
+                    -1.5135587406137185e-02,
+                    -1.4175240342178652e-02,
+                    -6.9344786794476854e-01,
+                    1.2280621078720415e00,
+                    1.2333381103148277e-02,
+                    -1.0895386066093759e-02,
+                    2.1764282171790141e-02,
+                    -1.0106900291744604e-02,
+                    -1.2036881930169383e00,
+                    2.0482931230000392e00,
+                    -1.2689218008973949e-01,
+                    -5.0580690719339239e-01,
+                    3.4047786101030464e-01,
+                    -7.0959386937004015e-02,
+                    -1.4470760938303664e00,
+                    1.4285049373060201e00,
+                    5.5764887956399375e-02,
+                    -2.9461990750009881e-02,
+                    2.3005167601875431e-02,
+                    -1.0760396189439407e-02,
+                    -4.3024292433642597e-01,
+                    1.7121633497582587e00,
+                    3.5705413032693957e-02,
+                    -9.9216800479772127e-01,
+                    1.5115432403429119e00,
+                    -6.3985596276149748e-01,
+                    5.4770961684437192e-01,
+                    1.2565653391084903e00,
+                    9.1639130181564755e-03,
+                    -6.8547618650262643e-01,
+                    1.2037212931265591e00,
+                    -5.1526772142324506e-01,
+                    4.8142431677326969e-01,
+                    1.2842025505965851e00,
+                    -3.1103960497811806e-01,
+                    -3.8667287940463613e-01,
+                    9.2663039525338942e-01,
+                    -4.1330437951972537e-01,
+                    1.9976512094478704e-02,
+                    1.4898674304290889e00,
+                    -2.1940405767858565e-03,
+                    -8.0791207141984167e-01,
+                    1.3979310081478775e00,
+                    -5.9845265079421794e-01,
+                    1.1971451112382212e00,
+                    1.6539633089946477e00,
+                    -2.7009878691796618e-01,
+                    -2.8868139196850624e00,
+                    4.7294193613612734e00,
+                    -1.9578020397520424e00,
+                    1.8164162541717044e00,
+                    1.4570111710269262e00,
+                    2.2385898037164991e-02,
+                    -3.1195681762439769e00,
+                    4.9723722392038878e00,
+                    -2.0423972644796100e00,
+                    1.5812403987207633e00,
+                    1.1421043858413655e00,
+                    -4.4319666868952730e-02,
+                    -2.3144705949527720e00,
+                    3.7448930479898297e00,
+                    -1.5426803544433196e00,
+                    1.4992161878806018e00,
+                    1.6612039136364238e00,
+                    -2.2870713891204597e-02,
+                    -3.4442115437939465e00,
+                    5.5057190995408973e00,
+                    -2.2657208348376137e00,
+                    2.4658130352390710e00,
+                    1.5819912227884063e00,
+                    -1.3204477532594588e-01,
+                    -5.7752803465671017e00,
+                    9.0677018990478242e00,
+                    -3.6843468204828174e00,
+                    3.1062201217160963e00,
+                    1.8205810727868250e00,
+                    7.3942159732456811e-02,
+                    -7.3418038323250947e00,
+                    1.1309154676354810e01,
+                    -4.5733470083866452e00,
+                    2.5667672162869133e00,
+                    1.3762236869878626e00,
+                    5.4823291778512563e-02,
+                    -5.5558964069977943e00,
+                    8.5620133672289516e00,
+                    -3.4575259608624478e00,
+                    2.9333361085351610e00,
+                    1.9771000784477066e00,
+                    2.1600903596218385e-02,
+                    -7.7786452012965430e00,
+                    1.2026327126407146e01,
+                    -4.8722408979121159e00,
+                    3.5238342146994350e00,
+                    1.8411341262124141e00,
+                    1.0485737443151430e-01,
+                    -1.0316470080846322e01,
+                    1.5628354265192609e01,
+                    -6.2547428286449396e00,
+                    4.3947471898784478e00,
+                    2.3129375587624681e00,
+                    1.6998863701958250e-01,
+                    -1.3069120913924280e01,
+                    1.9764673064124775e01,
+                    -7.9234176878170990e00,
+                    3.5464051944219954e00,
+                    1.7786047141550632e00,
+                    1.8395466553434961e-01,
+                    -1.0256713338978345e01,
+                    1.5450540198835597e01,
+                    -6.1709943751208902e00,
+                    4.3074781177775723e00,
+                    2.4284702978185178e00,
+                    1.2121907902830774e-01,
+                    -1.3510697720561426e01,
+                    2.0490823414440431e01,
+                    -8.2265504110307699e00,
+                    4.5269670710447079e00,
+                    2.3411415500822019e00,
+                    3.7814443659878427e-01,
+                    -1.6533454371385766e01,
+                    2.4532574055181296e01,
+                    -9.7222898630871342e00,
+                    5.6498078480438974e00,
+                    2.8871559084424092e00,
+                    3.1648740182441881e-01,
+                    -1.9832336139347099e01,
+                    2.9630584562783888e01,
+                    -1.1804975183138390e01,
+                    4.5317970588477650e00,
+                    2.3235629480266455e00,
+                    4.0711209040396701e-01,
+                    -1.6523611973754900e01,
+                    2.4482080409856291e01,
+                    -9.6968326211377835e00,
+                    5.6107427774726322e00,
+                    2.9693568967987254e00,
+                    2.6856229367890733e-01,
+                    -2.0186235796983127e01,
+                    3.0228033555488111e01,
+                    -1.2057362656117963e01,
+                    5.5230828784340904e00,
+                    3.0159142144119913e00,
+                    7.5032702265793638e-01,
+                    -2.4452361306480910e01,
+                    3.5745746299744695e01,
+                    -1.4059387633540990e01,
+                    6.8467243986091164e00,
+                    3.5205846294935204e00,
+                    5.5323452910250115e-01,
+                    -2.7424447720726722e01,
+                    4.0542113968978946e01,
+                    -1.6058340606199877e01,
+                    5.5241079122419858e00,
+                    3.0111097413061287e00,
+                    7.6043241689918206e-01,
+                    -2.4453330947201032e01,
+                    3.5733842835424838e01,
+                    -1.4052622761934279e01,
+                    6.8330970703372866e00,
+                    3.5730950345697865e00,
+                    5.0442967447855436e-01,
+                    -2.7630302835415993e01,
+                    4.0921397061842079e01,
+                    -1.6223699529825666e01,
+                    6.5233214752268127e00,
+                    3.8455313715589599e00,
+                    1.2738445662734672e00,
+                    -3.4142511056048967e01,
+                    4.9288751118195229e01,
+                    -1.9258816488331760e01,
+                    7.9798691992574877e00,
+                    4.2304633704347614e00,
+                    9.4916911879724064e-01,
+                    -3.6082800915305256e01,
+                    5.2740474636382487e01,
+                    -2.0757970588732530e01,
+                    6.5235391967368317e00,
+                    3.8442392655293900e00,
+                    1.2772689685023881e00,
+                    -3.4144245582802192e01,
+                    4.9286600694030149e01,
+                    -1.9257235266278844e01,
+                    7.9780164759860508e00,
+                    4.2581364755189171e00,
+                    9.0490824102641643e-01,
+                    -3.6146890048111374e01,
+                    5.2902251888236343e01,
+                    -2.0834714063750525e01,
+                    7.5301209868737518e00,
+                    4.8266093670811516e00,
+                    1.9906532239804082e00,
+                    -4.5696171225139402e01,
+                    6.5222794336738914e01,
+                    -2.5330008845677121e01,
+                    9.0592048208341964e00,
+                    5.0524444639807982e00,
+                    1.5639083038511417e00,
+                    -4.6227354827270197e01,
+                    6.6742768625790532e01,
+                    -2.6090733281390481e01,
+                    7.5301672757177256e00,
+                    4.8262668988539703e00,
+                    1.9917837214882572e00,
+                    -4.5697152262800707e01,
+                    6.5222641787790508e01,
+                    -2.5329699752317662e01,
+                    9.0617089689058279e00,
+                    5.0627200474303731e00,
+                    1.5306087886050987e00,
+                    -4.6201245261995687e01,
+                    6.6753711704174307e01,
+                    -2.6103836713323240e01,
+                    8.5439978438576958e00,
+                    5.9605352581937785e00,
+                    2.9388171122244109e00,
+                    -5.9213652478598007e01,
+                    8.3623964589400401e01,
+                    -3.2288651007290504e01,
+                    1.0100238105795977e01,
+                    6.0156046860821641e00,
+                    2.4311227628788585e00,
+                    -5.8189717323516248e01,
+                    8.2972590004142106e01,
+                    -3.2212869674305303e01,
+                    8.5440076687321067e00,
+                    5.9604459430021439e00,
+                    2.9391801366526531e00,
+                    -5.9214078468041464e01,
+                    8.3624068891376510e01,
+                    -3.2288610777657510e01,
+                    1.0103667533796683e01,
+                    6.0158650887345448e00,
+                    2.4107760944314816e00,
+                    -5.8125625048064265e01,
+                    8.2906979417176174e01,
+                    -3.2191629006406409e01,
+                    9.5650113177877785e00,
+                    7.2498153679976820e00,
+                    4.1551371399277919e00,
+                    -7.4795843598083408e01,
+                    1.0457037732454131e02,
+                    -4.0151433068943419e01,
+                    1.1116968561077568e01,
+                    7.1347098863330896e00,
+                    3.5688140741297674e00,
+                    -7.2151486218593305e01,
+                    1.0165680693075836e02,
+                    -3.9206269356622016e01,
+                    9.5650133940644455e00,
+                    7.2497924894015711e00,
+                    4.1552503042122613e00,
+                    -7.4796005009548836e01,
+                    1.0457044971811401e02,
+                    -4.0151435976986221e01,
+                    1.1120034079668221e01,
+                    7.1303147700774092e00,
+                    3.5594873892317103e00,
+                    -7.2082067018068685e01,
+                    1.0156598726189708e02,
+                    -3.9171834664292227e01,
+                    1.0593064483227742e01,
+                    8.6969028070512202e00,
+                    5.6755396034912966e00,
+                    -9.2539537763180832e01,
+                    1.2813560149579646e02,
+                    -4.8933613418447223e01,
+                    1.2119543877083460e01,
+                    8.4137603187360543e00,
+                    4.9925034366798311e00,
+                    -8.8194505075704640e01,
+                    1.2287993196505218e02,
+                    -4.7096724506223822e01,
+                    1.0593064919257221e01,
+                    8.6968970567044934e00,
+                    5.6755738143875760e00,
+                    -9.2539593640863643e01,
+                    1.2813563331215474e02,
+                    -4.8933618162805772e01,
+                    1.2121921818513506e01,
+                    8.4078642204619420e00,
+                    4.9908632634858190e00,
+                    -8.8134432374832016e01,
+                    1.2279086550380391e02,
+                    -4.7060844505587738e01,
+                    1.1627957207938659e01,
+                    1.0303707615441018e01,
+                    7.5344011042552923e00,
+                    -1.1253294830348190e02,
+                    1.5438372244089408e02,
+                    -5.8647453529357783e01,
+                    1.3114510015623049e01,
+                    9.8513572940713416e00,
+                    6.7213349376406626e00,
+                    -1.0635738219113546e02,
+                    1.4665751311861146e02,
+                    -5.5881528760137869e01,
+                    1.1627957298834614e01,
+                    1.0303706197478814e01,
+                    7.5344111366673712e00,
+                    -1.1253296638384563e02,
+                    1.5438373415898508e02,
+                    -5.8647455853629580e01,
+                    1.3116237925845430e01,
+                    9.8455331102145145e00,
+                    6.7243141059359051e00,
+                    -1.0631074264006560e02,
+                    1.4658112805680690e02,
+                    -5.5849452095162235e01,
+                    1.2669386535689361e01,
+                    1.2071287030293307e01,
+                    9.7633555455962835e00,
+                    -1.3485075345900265e02,
+                    1.8336444946299886e02,
+                    -6.9300787627414508e01,
+                    1.4105804414673191e01,
+                    1.1444289269702800e01,
+                    8.7789794745243590e00,
+                    -1.2666835962860844e02,
+                    1.7298274034188972e02,
+                    -6.5547771558832267e01,
+                    1.2669386554490638e01,
+                    1.2071286687068984e01,
+                    9.7633584027450482e00,
+                    -1.3485075900242089e02,
+                    1.8336445335820781e02,
+                    -6.9300788508071975e01,
+                    1.4107018463574896e01,
+                    1.1439185153305873e01,
+                    8.7843335749580440e00,
+                    -1.2663444344319166e02,
+                    1.7292158897636148e02,
+                    -6.5521162694327174e01,
+                    1.3716937488160630e01,
+                    1.3999597459400730e01,
+                    1.2389915672436279e01,
+                    -1.5954894249539399e02,
+                    2.1510813446746886e02,
+                    -8.0895567204040049e01,
+                    1.5095682313349364e01,
+                    1.3189272906323732e01,
+                    1.1192627051714643e01,
+                    -1.4915916817312757e02,
+                    2.0184825850919157e02,
+                    -7.6081293415969839e01,
+                    1.3716937492019641e01,
+                    1.3999597377767842e01,
+                    1.2389916464009524e01,
+                    -1.5954894412085929e02,
+                    2.1510813567394996e02,
+                    -8.0895567498068928e01,
+                    1.5096520030681436e01,
+                    1.3185064407456906e01,
+                    1.1198910160279951e01,
+                    -1.4913565617175487e02,
+                    2.0180124290250004e02,
+                    -7.6060129778156622e01,
+                    1.4770075388032444e01,
+                    1.6087303167766446e01,
+                    1.5436222950666867e01,
+                    -1.8666021493779203e02,
+                    2.4962122089688103e02,
+                    -9.3426463524457304e01,
+                    1.6085379191481852e01,
+                    1.5083589447287226e01,
+                    1.3991739427782750e01,
+                    -1.7386892459375579e02,
+                    2.3325385095807121e02,
+                    -8.7470099643500802e01,
+                    1.4770075388818769e01,
+                    1.6087303148664304e01,
+                    1.5436223164442264e01,
+                    -1.8666021539675981e02,
+                    2.4962122125116741e02,
+                    -9.3426463615076329e01,
+                    1.6085951551006787e01,
+                    1.5080238931969067e01,
+                    1.3998101278449143e01,
+                    -1.7385331837944693e02,
+                    2.3321864790104019e02,
+                    -8.7453697552144448e01,
+                    1.5828143941097450e01,
+                    1.8331670220961666e01,
+                    1.8918268274003861e01,
+                    -2.1619095210442941e02,
+                    2.8688297635978756e02,
+                    -1.0687973526499771e02,
+                    1.7075534787366465e01,
+                    1.7125200136366264e01,
+                    1.7207074959934751e01,
+                    -2.0084388544719391e02,
+                    2.6720765911058965e02,
+                    -9.9705133726570395e01,
+                    1.5828143941256627e01,
+                    1.8331670216557445e01,
+                    1.8918268330404022e01,
+                    -2.1619095222989833e02,
+                    2.8688297645950814e02,
+                    -1.0687973529137253e02,
+                    1.7075923730873765e01,
+                    1.7122590193964911e01,
+                    1.7213058024904747e01,
+                    -2.0083402645820061e02,
+                    2.6718180837697332e02,
+                    -9.9692640534772679e01,
+                    1.6890371426423382e01,
+                    2.0728579569842751e01,
+                    2.2845917469463828e01,
+                    -2.4812083435502871e02,
+                    3.2684448823688496e02,
+                    -1.2123263616047282e02,
+                    1.8066449820492846e01,
+                    1.9312661524160735e01,
+                    2.0870036016187061e01,
+                    -2.3013589616073858e02,
+                    3.0372498377642154e02,
+                    -1.1277999824352135e02,
+                    1.6890371426455424e01,
+                    2.0728579568840633e01,
+                    2.2845917484032956e01,
+                    -2.4812083438838550e02,
+                    3.2684448826399682e02,
+                    -1.2123263616782057e02,
+                    1.8066713333743454e01,
+                    1.9310657703202459e01,
+                    2.0875423564416035e01,
+                    -2.3013008228413184e02,
+                    3.0370630494679148e02,
+                    -1.1277060230387309e02,
+                    1.7955886187113396e01,
+                    2.3272683588860026e01,
+                    2.7223982220959247e01,
+                    -2.8240595076334000e02,
+                    3.6943078590316281e02,
+                    -1.3645364576977221e02,
+                    1.9058236733002300e01,
+                    2.1644988962398710e01,
+                    2.5012267757287322e01,
+                    -2.6180071928343307e02,
+                    3.4282650121799617e02,
+                    -1.2669036882336400e02,
+                    1.7955886187119816e01,
+                    2.3272683588634656e01,
+                    2.7223982224651898e01,
+                    -2.8240595077199526e02,
+                    3.6943078591032139e02,
+                    -1.3645364577174797e02,
+                    1.9058414960148450e01,
+                    2.1643466247439289e01,
+                    2.5016983354038196e01,
+                    -2.6179767020610126e02,
+                    3.4281320617581565e02,
+                    -1.2668337355331974e02,
+                    1.9023741366983238e01,
+                    2.5957710504548576e01,
+                    3.2054387652193789e01,
+                    -3.1898571318422574e02,
+                    4.1454655650462962e02,
+                    -1.5250373535684176e02,
+                    2.0050906563887416e01,
+                    2.4121527381838824e01,
+                    2.9665428981325245e01,
+                    -2.9589665055055406e02,
+                    3.8453661583827250e02,
+                    -1.4143340987287985e02,
+                    1.9023741366984520e01,
+                    2.5957710504498362e01,
+                    3.2054387653114766e01,
+                    -3.1898571318642672e02,
+                    4.1454655650647550e02,
+                    -1.5250373535735841e02,
+                    2.0051026978020587e01,
+                    2.4120379273875816e01,
+                    2.9669474257430963e01,
+                    -2.9589543070583102e02,
+                    3.8452729731205977e02,
+                    -1.4142824748467820e02,
+                    2.0092947487287756e01,
+                    2.8776895490568755e01,
+                    3.7339233558876920e01,
+                    -9.8781982607414882e00,
+                    7.0916635282296292e-01,
+                    -1.2340880155534291e-02,
+                    2.1044418341890132e01,
+                    2.6741847681518077e01,
+                    3.4861073630499796e01,
+                    -9.1700568642165461e00,
+                    6.5220324713443967e-01,
+                    -1.1045071585279443e-02,
+                    2.0092947487288011e01,
+                    2.8776895490557653e01,
+                    3.7339233559103448e01,
+                    -9.8781982608033179e00,
+                    7.0916635282857932e-01,
+                    -1.2340880155703077e-02,
+                    2.1044499630877905e01,
+                    2.6740987496092696e01,
+                    3.4864491165514394e01,
+                    -9.1707199731434574e00,
+                    6.5223741134844682e-01,
+                    -1.1045188698410773e-02,
+                    2.1162510215379026e01,
+                    3.1723491960797684e01,
+                    4.3084295875067085e01,
+                    -4.1033675985379521e00,
+                    -6.6095139594000130e-01,
+                    6.0977735530407223e-02,
+                    2.2038706806958309e01,
+                    2.9505670300337073e01,
+                    4.0630600131872811e01,
+                    -2.7905442844326718e00,
+                    -8.3885972791335117e-01,
+                    6.8309956404426039e-02,
+                    2.1162510215379076e01,
+                    3.1723491960795304e01,
+                    4.3084295875120795e01,
+                    -4.1033675985539224e00,
+                    -6.6095139593840913e-01,
+                    6.0977735530354210e-02,
+                    2.2038761643178379e01,
+                    2.9505029336592230e01,
+                    4.0633451796171073e01,
+                    -2.7913314472201640e00,
+                    -8.3878528163749511e-01,
+                    6.8307595298566767e-02,
+                    3.1719012432820758e01,
+                    6.7480322661109355e01,
+                    1.3318978565899991e02,
+                    -1.6791944323404795e01,
+                    -1.0181217992701848e00,
+                    1.2989592638281225e-01,
+                    3.2009499874031789e01,
+                    6.5013296175889408e01,
+                    1.3669799889514238e02,
+                    -1.7009031615065428e01,
+                    -1.0689880784706638e00,
+                    1.3388972346122466e-01,
+                    3.1719012432820758e01,
+                    6.7480322661109355e01,
+                    1.3318978565899991e02,
+                    -1.6791944323404795e01,
+                    -1.0181217992701848e00,
+                    1.2989592638281225e-01,
+                    3.2009500887769519e01,
+                    6.5013269472322307e01,
+                    1.3669829238273672e02,
+                    -1.7009116366540379e01,
+                    -1.0689798256828462e00,
+                    1.3388945486998777e-01,
+                    4.1931127118492086e01,
+                    1.1600186087954401e02,
+                    3.1751764022286790e02,
+                    -4.6438894455748802e01,
+                    -8.7599401950869438e-01,
+                    2.2297105562740663e-01,
+                    4.2002297497564768e01,
+                    1.1479764873768737e02,
+                    3.2393143797302810e02,
+                    -4.7847299173836262e01,
+                    -7.8150712905299369e-01,
+                    2.2131248436241077e-01,
+                    4.1931127118492086e01,
+                    1.1600186087954401e02,
+                    3.1751764022286790e02,
+                    -4.6438894455748802e01,
+                    -8.7599401950869438e-01,
+                    2.2297105562740663e-01,
+                    4.2002297514594851e01,
+                    1.1479764793294436e02,
+                    3.2393145467669495e02,
+                    -4.7847304068128608e01,
+                    -7.8150664807362491e-01,
+                    2.2131246858403722e-01,
+                    5.1984670105634827e01,
+                    1.7926303194781252e02,
+                    6.2846495111925287e02,
+                    -1.0034649475039414e02,
+                    2.4606292097951082e-01,
+                    3.3256752105517051e-01,
+                    5.2000554052128159e01,
+                    1.7883235795593501e02,
+                    6.3273302895025176e02,
+                    -1.0138733878813618e02,
+                    3.2804187851642969e-01,
+                    3.3055293107858102e-01,
+                    5.1984670105634827e01,
+                    1.7926303194781252e02,
+                    6.2846495111925287e02,
+                    -1.0034649475039414e02,
+                    2.4606292097951082e-01,
+                    3.3256752105517051e-01,
+                    5.2000554052402805e01,
+                    1.7883235793562420e02,
+                    6.3273302962903426e02,
+                    -1.0138733898825184e02,
+                    3.2804189825766372e-01,
+                    3.3055293042886030e-01,
+                    6.1996666427075382e01,
+                    2.5724136589119979e02,
+                    1.0913830717468406e03,
+                    -1.8317243758181812e02,
+                    2.5193786568880601e00,
+                    4.6277932792022042e-01,
+                    6.2000133522892554e01,
+                    2.5710536851489377e02,
+                    1.0934673032018356e03,
+                    -1.8370056934287794e02,
+                    2.5630609198690104e00,
+                    4.6162176037505448e-01,
+                    6.1996666427075382e01,
+                    2.5724136589119979e02,
+                    1.0913830717468406e03,
+                    -1.8317243758181812e02,
+                    2.5193786568880601e00,
+                    4.6277932792022042e-01,
+                    6.2000133522896938e01,
+                    2.5710536851442714e02,
+                    1.0934673032246803e03,
+                    -1.8370056934963364e02,
+                    2.5630609205366826e00,
+                    4.6162176035304603e-01,
+                    7.1999279107664492e01,
+                    3.4965254984584158e02,
+                    1.7356304176273381e03,
+                    -3.0063395678020430e02,
+                    6.2079056750108883e00,
+                    6.1505333334154833e-01,
+                    7.2000032172982571e01,
+                    3.4961232791697932e02,
+                    1.7365043785874466e03,
+                    -3.0086002522613632e02,
+                    6.2270725229979789e00,
+                    6.1452738833821030e-01,
+                    7.1999279107664492e01,
+                    3.4965254984584158e02,
+                    1.7356304176273381e03,
+                    -3.0063395678020430e02,
+                    6.2079056750108883e00,
+                    6.1505333334154833e-01,
+                    7.2000032172982642e01,
+                    3.4961232791696904e02,
+                    1.7365043785881401e03,
+                    -3.0086002522634379e02,
+                    6.2270725230187063e00,
+                    6.1452738833751985e-01,
+                    8.1999844359310714e01,
+                    4.5636323545227941e02,
+                    2.5918884526432239e03,
+                    -4.5885344883307727e02,
+                    1.1616256691917803e01,
+                    7.8948404417119522e-01,
+                    8.2000007751936337e01,
+                    4.5635184072744744e02,
+                    2.5922210189842476e03,
+                    -4.5894061525528980e02,
+                    1.1623761628208563e01,
+                    7.8927378661620728e-01,
+                    8.1999844359310714e01,
+                    4.5636323545227941e02,
+                    2.5918884526432239e03,
+                    -4.5885344883307727e02,
+                    1.1616256691917803e01,
+                    7.8948404417119522e-01,
+                    8.2000007751936337e01,
+                    4.5635184072744744e02,
+                    2.5922210189842476e03,
+                    -4.5894061525528980e02,
+                    1.1623761628208563e01,
+                    7.8927378661620728e-01,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([8, 174])
+        )  # 1392
+        self.table_info_tensor = paddle.to_tensor(
+            [
+                -2.1000000000000000e01,
+                2.1000000000000000e01,
+                1.0500000000000000e02,
+                1.0000000000000000e00,
+                1.0000000000000000e01,
+                -1.0000000000000000e00,
+            ],
+            dtype=dtype,
+            device="cpu",
+        )
+        self.em_x_tensor = (
+            paddle.to_tensor(
+                [
+                    9.3816147034272368e-01,
+                    -1.6703373029862567e-01,
+                    -4.4294526064601734e-02,
+                    -2.8798505489184573e-01,
+                    -1.6703373029862567e-01,
+                    9.2489218226366088e-01,
+                    -2.8928196536572048e-01,
+                    -4.7833509099876154e-01,
+                    -4.4294526064601734e-02,
+                    -2.8928196536572048e-01,
+                    5.7034320185695120e-01,
+                    1.8771147911830000e-01,
+                    -2.8798505489184573e-01,
+                    -4.7833509099876154e-01,
+                    1.8771147911830000e-01,
+                    4.0174654365823070e-01,
+                    8.4370316144902313e-01,
+                    -3.7813146789689916e-02,
+                    -3.6989397568296523e-01,
+                    -4.0554075086539937e-01,
+                    -3.7813146789689916e-02,
+                    6.5766402633747112e-01,
+                    -4.2312966361682885e-01,
+                    1.2685067374257861e-01,
+                    -3.6989397568296523e-01,
+                    -4.2312966361682885e-01,
+                    6.0171576901660107e-01,
+                    9.8283160997298613e-02,
+                    -4.0554075086539937e-01,
+                    1.2685067374257861e-01,
+                    9.8283160997298613e-02,
+                    2.1324148100625978e-01,
+                    9.7843596341516559e-01,
+                    -1.0492833888237871e-01,
+                    -1.0538688914576379e-01,
+                    -2.0453551592353389e-01,
+                    -1.0492833888237871e-01,
+                    7.7943976693565231e-01,
+                    -1.5898500035781410e-01,
+                    9.4834209331437741e-02,
+                    -1.0538688914576379e-01,
+                    -1.5898500035781410e-01,
+                    7.4778071691708869e-01,
+                    -6.1895255142095873e-01,
+                    -2.0453551592353389e-01,
+                    9.4834209331437741e-02,
+                    -6.1895255142095873e-01,
+                    6.0844713798743799e-01,
+                    1.0079020879244640e00,
+                    -2.3855984150631487e-01,
+                    -3.4608276043004524e-02,
+                    -4.7448768267289088e-01,
+                    -2.3855984150631487e-01,
+                    4.9732018171028253e-01,
+                    -3.1320787082485729e-01,
+                    -1.4528004145602180e-01,
+                    -3.4608276043004524e-02,
+                    -3.1320787082485729e-01,
+                    4.7696729363954582e-01,
+                    1.1723268074231248e-01,
+                    -4.7448768267289088e-01,
+                    -1.4528004145602180e-01,
+                    1.1723268074231248e-01,
+                    4.0511515406019899e-01,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 16])
+        )  # 3072
+        self.em_tensor = self.em_x_tensor.reshape([4, 4, 4])
+        self.table_info_tensor.stop_gradient = not False
+        self.table_tensor.stop_gradient = not False
+        self.em_x_tensor.stop_gradient = not True
+        self.em_tensor.stop_gradient = not True
+        self.last_layer_size = 4
+        self.nloc = 192
+        self.nnei_i = 4
+        self.nnei_j = 4
+
+        self.expected_descriptor_tensor = (
+            paddle.to_tensor(
+                [
+                    1.4271973325754339e00,
+                    2.5214997685364109e00,
+                    3.1394341134078902e00,
+                    2.2727894815158436e00,
+                    1.9127738317829568e00,
+                    2.5288382955492263e00,
+                    3.1401587802428659e00,
+                    2.5252400661016079e00,
+                    9.4806287131835343e-01,
+                    2.3778589851963829e00,
+                    2.8273548699126683e00,
+                    1.9358633427396228e00,
+                    2.1586806210305824e00,
+                    2.6256636737020518e00,
+                    3.3955783231847523e00,
+                    2.7091329174140033e00,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 4])
+        )
+        self.expected_dy_dem_x = (
+            paddle.to_tensor(
+                [
+                    5.648489055364202,
+                    -0.8109841888364551,
+                    -0.24536867097411239,
+                    -1.1747441933374314,
+                    -0.8109841888364551,
+                    5.551778760144183,
+                    -1.177917429853053,
+                    -1.679018415609313,
+                    -0.24536867097411239,
+                    -1.177917429853053,
+                    3.066855971667982,
+                    1.0527786223200397,
+                    -1.1747441933374314,
+                    -1.679018415609313,
+                    1.0527786223200397,
+                    2.154128070312613,
+                    4.941344648306369,
+                    -0.2103366776681705,
+                    -1.367487115395829,
+                    -1.4564034402591373,
+                    -0.2103366776681705,
+                    3.608285604419272,
+                    -1.5040667548923485,
+                    0.7193959503618953,
+                    -1.367487115395829,
+                    -1.5040667548923485,
+                    3.254731998053089,
+                    0.5591533452715003,
+                    -1.4564034402591373,
+                    0.7193959503618953,
+                    0.5591533452715003,
+                    1.1890751500286143,
+                    5.930646543070966,
+                    -0.5504936332829675,
+                    -0.5526209441647617,
+                    -0.9416633243620358,
+                    -0.5504936332829675,
+                    4.459184963787069,
+                    -0.7803876483286257,
+                    0.5396747709549512,
+                    -0.5526209441647617,
+                    -0.7803876483286257,
+                    4.228294605542883,
+                    -2.4024371569601737,
+                    -0.9416633243620358,
+                    0.5396747709549512,
+                    -2.4024371569601737,
+                    3.2959904931493385,
+                    6.1224119486112745,
+                    -1.0448644883385283,
+                    -0.1928789305436163,
+                    -1.665305701289531,
+                    -1.0448644883385283,
+                    2.654867798405467,
+                    -1.2350917789985936,
+                    -0.7261316753808301,
+                    -0.1928789305436163,
+                    -1.2350917789985936,
+                    2.5453318630183253,
+                    0.6656811038445796,
+                    -1.665305701289531,
+                    -0.7261316753808301,
+                    0.6656811038445796,
+                    2.1713171576639834,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 16])
+        )
+        self.expected_dy_dem = (
+            paddle.to_tensor(
+                [
+                    -5.714759600210596,
+                    -11.831713987629353,
+                    -11.190122813510595,
+                    -12.370780785045307,
+                    -11.831713987629353,
+                    -5.794533094540567,
+                    -12.376066372858583,
+                    -13.075171860091254,
+                    -11.190122813510595,
+                    -12.376066372858583,
+                    -7.803917285588026,
+                    -9.877251464178656,
+                    -12.370780785045307,
+                    -13.075171860091254,
+                    -9.877251464178656,
+                    -8.705541422983027,
+                    -6.27628449412496,
+                    -11.154143613489751,
+                    -12.688057210800388,
+                    -12.817836346604595,
+                    -11.154143613489751,
+                    -7.330064381543641,
+                    -12.8806650535401,
+                    -10.220630213188823,
+                    -12.688057210800388,
+                    -12.8806650535401,
+                    -7.634743369735131,
+                    -10.382921713303563,
+                    -12.817836346604595,
+                    -10.220630213188823,
+                    -10.382921713303563,
+                    -9.734474315546018,
+                    -5.471377616034635,
+                    -11.517744760556846,
+                    -11.520149882632618,
+                    -12.009085059484455,
+                    -11.517744760556846,
+                    -6.648325517174191,
+                    -11.792420704526007,
+                    -10.402546165885312,
+                    -11.520149882632618,
+                    -11.792420704526007,
+                    -6.828388581370712,
+                    -13.58699904400806,
+                    -12.009085059484455,
+                    -10.402546165885312,
+                    -13.58699904400806,
+                    -7.598306221049991,
+                    -5.292546900481144,
+                    -12.161894405917199,
+                    -11.136299195212807,
+                    -13.061667970148363,
+                    -12.161894405917199,
+                    -8.194852173884833,
+                    -12.47192675731089,
+                    -11.724533065186144,
+                    -11.136299195212807,
+                    -12.47192675731089,
+                    -8.30347749201922,
+                    -10.275210657956574,
+                    -13.061667970148363,
+                    -11.724533065186144,
+                    -10.275210657956574,
+                    -8.687482898190318,
+                ],
+                dtype=dtype,
+            )
+            .to(device=env.DEVICE)
+            .reshape([4, 4, 4])
+        )
+
+    def test_forward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_t(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the shape
+        self.assertEqual(descriptor_tensor.shape, self.expected_descriptor_tensor.shape)
+
+        # Check the values
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+    def test_backward(self):
+        # Call the forward function
+        forward_result = paddle.ops.deepmd.tabulate_fusion_se_t(
+            self.table_tensor.numpy(),
+            self.table_info_tensor.numpy(),
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the forward
+        np.testing.assert_allclose(
+            descriptor_tensor.numpy(),
+            self.expected_descriptor_tensor.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        # Create a loss and perform backward
+        loss = descriptor_tensor.sum()
+        loss.backward()
+
+        # Check gradients
+        self.assertIsNotNone(self.em_x_tensor.grad)
+        self.assertIsNotNone(self.em_tensor.grad)
+
+        # Check the shapes of the gradients
+        self.assertEqual(self.em_x_tensor.grad.shape, self.expected_dy_dem_x.shape)
+        self.assertEqual(self.em_tensor.grad.shape, self.expected_dy_dem.shape)
+
+        # Check the values of the gradients
+        np.testing.assert_allclose(
+            self.em_x_tensor.grad.numpy(),
+            self.expected_dy_dem_x.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        np.testing.assert_allclose(
+            self.em_tensor.grad.numpy(),
+            self.expected_dy_dem.numpy(),
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py
new file mode 100644
index 0000000000..7306302cf7
--- /dev/null
+++ b/source/tests/pd/test_training.py
@@ -0,0 +1,453 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.pd.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pd.utils.finetune import (
+    get_finetune_rules,
+)
+
+from .model.test_permutation import (
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_zbl,
+)
+
+
+class DPTrainTest:
+    def test_dp_train(self):
+        # test training from scratch
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        state_dict_trained = trainer.wrapper.model.state_dict()
+
+        # test fine-tuning using same input
+        finetune_model = self.config["training"].get("save_ckpt", "model.ckpt") + ".pd"
+        self.config["model"], finetune_links = get_finetune_rules(
+            finetune_model,
+            self.config["model"],
+        )
+        trainer_finetune = get_trainer(
+            deepcopy(self.config),
+            finetune_model=finetune_model,
+            finetune_links=finetune_links,
+        )
+
+        # test fine-tuning using empty input
+        self.config_empty = deepcopy(self.config)
+        if "descriptor" in self.config_empty["model"]:
+            self.config_empty["model"]["descriptor"] = {}
+        if "fitting_net" in self.config_empty["model"]:
+            self.config_empty["model"]["fitting_net"] = {}
+        self.config_empty["model"], finetune_links = get_finetune_rules(
+            finetune_model,
+            self.config_empty["model"],
+            change_model_params=True,
+        )
+        trainer_finetune_empty = get_trainer(
+            deepcopy(self.config_empty),
+            finetune_model=finetune_model,
+            finetune_links=finetune_links,
+        )
+
+        # test fine-tuning using random fitting
+        self.config["model"], finetune_links = get_finetune_rules(
+            finetune_model, self.config["model"], model_branch="RANDOM"
+        )
+        trainer_finetune_random = get_trainer(
+            deepcopy(self.config_empty),
+            finetune_model=finetune_model,
+            finetune_links=finetune_links,
+        )
+
+        # check parameters
+        state_dict_finetuned = trainer_finetune.wrapper.model.state_dict()
+        state_dict_finetuned_empty = trainer_finetune_empty.wrapper.model.state_dict()
+        state_dict_finetuned_random = trainer_finetune_random.wrapper.model.state_dict()
+        for state_key in state_dict_finetuned:
+            if "out_bias" not in state_key and "out_std" not in state_key:
+                np.testing.assert_allclose(
+                    state_dict_trained[state_key].numpy(),
+                    state_dict_finetuned[state_key].numpy(),
+                )
+                np.testing.assert_allclose(
+                    state_dict_trained[state_key].numpy(),
+                    state_dict_finetuned_empty[state_key].numpy(),
+                )
+                if "fitting_net" not in state_key:
+                    np.testing.assert_allclose(
+                        state_dict_trained[state_key].numpy(),
+                        state_dict_finetuned_random[state_key].numpy(),
+                    )
+
+        # check running
+        trainer_finetune.run()
+        trainer_finetune_empty.run()
+        trainer_finetune_random.run()
+
+    def test_trainable(self):
+        fix_params = deepcopy(self.config)
+        fix_params["model"]["descriptor"]["trainable"] = False
+        fix_params["model"]["fitting_net"]["trainable"] = False
+        free_descriptor = hasattr(self, "not_all_grad") and self.not_all_grad
+        if free_descriptor:
+            # can not set requires_grad false for all parameters,
+            # because the input coord has no grad, thus the loss if all set to false
+            # we only check trainable for fitting net
+            fix_params["model"]["descriptor"]["trainable"] = True
+            trainer_fix = get_trainer(fix_params)
+            model_dict_before_training = deepcopy(
+                trainer_fix.model.get_fitting_net().state_dict()
+            )
+            trainer_fix.run()
+            model_dict_after_training = deepcopy(
+                trainer_fix.model.get_fitting_net().state_dict()
+            )
+        else:
+            trainer_fix = get_trainer(fix_params)
+            model_dict_before_training = deepcopy(trainer_fix.model.state_dict())
+            trainer_fix.run()
+            model_dict_after_training = deepcopy(trainer_fix.model.state_dict())
+        for key in model_dict_before_training:
+            np.testing.assert_allclose(
+                model_dict_before_training[key].numpy(),
+                model_dict_after_training[key].numpy(),
+            )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pd"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestEnergyModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDOSModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "dos/data/atomic_system")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["model"]["type_map"] = ["H"]
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/zbl.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_zbl)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestFparam(unittest.TestCase, DPTrainTest):
+    """Test if `fparam` can be loaded correctly."""
+
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["fitting_net"]["numb_fparam"] = 1
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
+        shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+
+    def tearDown(self) -> None:
+        (self.set_path / "fparam.npy").unlink(missing_ok=True)
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDipoleModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+@unittest.skip("Unable to fill empty grad inputs")
+class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/test_update_sel.py b/source/tests/pd/test_update_sel.py
new file mode 100644
index 0000000000..5ee36e75ff
--- /dev/null
+++ b/source/tests/pd/test_update_sel.py
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import random
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+from deepmd.pd.model.model.model import (
+    BaseModel,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+
+from ..seed import (
+    GLOBAL_SEED,
+)
+
+
+def update_sel(jdata):
+    type_map = jdata["model"].get("type_map")
+    train_data = None
+    jdata["model"], _ = BaseModel.update_sel(train_data, type_map, jdata["model"])
+    return jdata
+
+
+class TestTrain(unittest.TestCase):
+    def setUp(self) -> None:
+        self.update_sel = UpdateSel()
+        self.mock_min_nbor_dist = random.Random(GLOBAL_SEED).random()
+        return super().setUp()
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_one_sel(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
+
+        min_nbor_dist, sel = self.update_sel.update_one_sel(None, None, 6, "auto")
+        # self.assertEqual(descriptor['sel'], [11,22])
+        self.assertEqual(sel, [12, 24])
+        self.assertAlmostEqual(min_nbor_dist, self.mock_min_nbor_dist)
+        min_nbor_dist, sel = self.update_sel.update_one_sel(None, None, 6, "auto:1.5")
+        # self.assertEqual(descriptor['sel'], [15,30])
+        self.assertEqual(sel, [16, 32])
+        self.assertAlmostEqual(min_nbor_dist, self.mock_min_nbor_dist)
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel_hybrid(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
+
+        jdata = {
+            "model": {
+                "descriptor": {
+                    "type": "hybrid",
+                    "list": [
+                        {"type": "se_e2_a", "rcut": 6, "sel": "auto"},
+                        {"type": "se_e2_a", "rcut": 6, "sel": "auto:1.5"},
+                    ],
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {
+                "descriptor": {
+                    "type": "hybrid",
+                    "list": [
+                        {"type": "se_e2_a", "rcut": 6, "sel": [12, 24]},
+                        {"type": "se_e2_a", "rcut": 6, "sel": [16, 32]},
+                    ],
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
+
+        jdata = {
+            "model": {"descriptor": {"type": "se_e2_a", "rcut": 6, "sel": "auto"}},
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {"descriptor": {"type": "se_e2_a", "rcut": 6, "sel": [12, 24]}},
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel_atten_auto(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [25]
+
+        jdata = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": "auto",
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 28,
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel_atten_int(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [25]
+
+        jdata = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel_atten_list(self, sel_mock):
+        sel_mock.return_value = self.mock_min_nbor_dist, [25]
+
+        jdata = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    def test_skip_frozen(self):
+        jdata = {
+            "model": {
+                "type": "frozen",
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = jdata.copy()
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    def test_wrap_up_4(self):
+        self.assertEqual(self.update_sel.wrap_up_4(12), 3 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(13), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(14), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(15), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(16), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(17), 5 * 4)
diff --git a/source/tests/pd/test_utils.py b/source/tests/pd/test_utils.py
new file mode 100644
index 0000000000..8d25cff964
--- /dev/null
+++ b/source/tests/pd/test_utils.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+    to_paddle_tensor,
+)
+
+from ..seed import (
+    GLOBAL_SEED,
+)
+
+
+class TestCvt(unittest.TestCase):
+    def test_to_numpy(self):
+        rng = np.random.default_rng(GLOBAL_SEED)
+        foo = rng.normal([3, 4])
+        for ptp, npp in zip(
+            [paddle.float16, paddle.float32, paddle.float64],
+            [np.float16, np.float32, np.float64],
+        ):
+            foo = foo.astype(npp)
+            bar = to_paddle_tensor(foo)
+            self.assertEqual(bar.dtype, ptp)
+            onk = to_numpy_array(bar)
+            self.assertEqual(onk.dtype, npp)
+        with self.assertRaises(ValueError) as ee:
+            foo = foo.astype(np.int8)
+            bar = to_paddle_tensor(foo)
+        with self.assertRaises(ValueError) as ee:
+            bar = to_paddle_tensor(foo)
+            bar = to_numpy_array(bar.int())
diff --git a/source/tests/pd/water b/source/tests/pd/water
new file mode 120000
index 0000000000..7e5219651f
--- /dev/null
+++ b/source/tests/pd/water
@@ -0,0 +1 @@
+model/water
\ No newline at end of file
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/nopbc b/source/tests/pd/water_tensor/dipole/atomic_system/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy
new file mode 100644
index 0000000000..2cabc71e21
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/set.000/box.npy b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..ed03e9c85b
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/box.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/set.000/coord.npy b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..ebee6a5611
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/atomic_system/set.000/coord.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/type.raw b/source/tests/pd/water_tensor/dipole/atomic_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pd/water_tensor/dipole/atomic_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pd/water_tensor/dipole/atomic_system/type_map.raw b/source/tests/pd/water_tensor/dipole/atomic_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/water_tensor/dipole/atomic_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/water_tensor/dipole/global_system/nopbc b/source/tests/pd/water_tensor/dipole/global_system/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pd/water_tensor/dipole/global_system/set.000/box.npy b/source/tests/pd/water_tensor/dipole/global_system/set.000/box.npy
new file mode 100644
index 0000000000..652530cfe8
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/global_system/set.000/box.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/global_system/set.000/coord.npy b/source/tests/pd/water_tensor/dipole/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..4f6c37e77a
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/global_system/set.000/coord.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/global_system/set.000/dipole.npy b/source/tests/pd/water_tensor/dipole/global_system/set.000/dipole.npy
new file mode 100644
index 0000000000..c16efad029
Binary files /dev/null and b/source/tests/pd/water_tensor/dipole/global_system/set.000/dipole.npy differ
diff --git a/source/tests/pd/water_tensor/dipole/global_system/type.raw b/source/tests/pd/water_tensor/dipole/global_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pd/water_tensor/dipole/global_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pd/water_tensor/dipole/global_system/type_map.raw b/source/tests/pd/water_tensor/dipole/global_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/water_tensor/dipole/global_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy b/source/tests/pd/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy
new file mode 100644
index 0000000000..2aa2cdd4f2
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy differ
diff --git a/source/tests/pd/water_tensor/polar/atomic_system/set.000/box.npy b/source/tests/pd/water_tensor/polar/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..a0ce7ef9a7
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/atomic_system/set.000/box.npy differ
diff --git a/source/tests/pd/water_tensor/polar/atomic_system/set.000/coord.npy b/source/tests/pd/water_tensor/polar/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..baa2c0a7c3
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/atomic_system/set.000/coord.npy differ
diff --git a/source/tests/pd/water_tensor/polar/atomic_system/type.raw b/source/tests/pd/water_tensor/polar/atomic_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pd/water_tensor/polar/atomic_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pd/water_tensor/polar/atomic_system/type_map.raw b/source/tests/pd/water_tensor/polar/atomic_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/water_tensor/polar/atomic_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/water_tensor/polar/global_system/set.000/box.npy b/source/tests/pd/water_tensor/polar/global_system/set.000/box.npy
new file mode 100644
index 0000000000..652530cfe8
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/global_system/set.000/box.npy differ
diff --git a/source/tests/pd/water_tensor/polar/global_system/set.000/coord.npy b/source/tests/pd/water_tensor/polar/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..4f6c37e77a
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/global_system/set.000/coord.npy differ
diff --git a/source/tests/pd/water_tensor/polar/global_system/set.000/polarizability.npy b/source/tests/pd/water_tensor/polar/global_system/set.000/polarizability.npy
new file mode 100644
index 0000000000..893767e565
Binary files /dev/null and b/source/tests/pd/water_tensor/polar/global_system/set.000/polarizability.npy differ
diff --git a/source/tests/pd/water_tensor/polar/global_system/type.raw b/source/tests/pd/water_tensor/polar/global_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pd/water_tensor/polar/global_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pd/water_tensor/polar/global_system/type_map.raw b/source/tests/pd/water_tensor/polar/global_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pd/water_tensor/polar/global_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pd/water_tensor/se_e2_a.json b/source/tests/pd/water_tensor/se_e2_a.json
new file mode 100644
index 0000000000..946c81aa63
--- /dev/null
+++ b/source/tests/pd/water_tensor/se_e2_a.json
@@ -0,0 +1,85 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "atom_exclude_types": [
+      1
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 3.80,
+      "rcut": 4.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 6,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "type": "dipole",
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "decay_steps": 5000,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment6": " that's all"
+  },
+  "_comment7": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "pd/water_tensor/dipole/atomic_system",
+        "pd/water_tensor/dipole/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment8": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pd/water_tensor/dipole/atomic_system",
+        "pd/water_tensor/dipole/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment9": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment10": "that's all"
+  },
+  "_comment11": "that's all"
+}