diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 91bcae3702..18fd7a1ac1 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -90,7 +90,7 @@ jobs:
           rm -rf .git
         if: matrix.dp_pkg_name == 'deepmd-kit-cu11'
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16
+        uses: pypa/cibuildwheel@v2.17
         env:
           CIBW_BUILD_VERBOSITY: 1
           CIBW_ARCHS: all
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index 72977bd339..e11f773b3a 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -42,7 +42,7 @@ jobs:
       - name: Test C library
         run: ./source/install/docker_test_package_c.sh
       - name: Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         if: startsWith(github.ref, 'refs/tags/')
         with:
           files: ${{ matrix.filename }}
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index d98f8ca58d..0e2243b75e 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -31,6 +31,10 @@ jobs:
       run: |
          wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
          unzip libtorch.zip
+    # https://github.com/actions/runner-images/issues/9491
+    - name: Fix kernel mmap rnd bits
+      run: sudo sysctl vm.mmap_rnd_bits=28
+      if: ${{ matrix.check_memleak }}
     - run: |
          export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
          source/install/test_cc_local.sh
diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml
new file mode 100644
index 0000000000..2608bb1071
--- /dev/null
+++ b/.github/workflows/todo.yml
@@ -0,0 +1,20 @@
+name: TODO workflow
+on:
+  push:
+    branches:
+      - devel
+jobs:
+  build:
+    if: github.repository_owner == 'deepmodeling'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run tdg-github-action
+      uses: ribtoks/tdg-github-action@master
+      with:
+        TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        REPO: ${{ github.repository }}
+        SHA: ${{ github.sha }}
+        REF: ${{ github.ref }}
+        EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*"
+        COMMENT_ON_ISSUES: 1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 041d47f0da..1860f87fea 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,6 @@ repos:
     -   id: check-json
     -   id: check-added-large-files
         args: ['--maxkb=1024', '--enforce-all']
-        # TODO: remove the following after resolved
         exclude: |
             (?x)^(
                 source/tests/infer/dipolecharge_e.pbtxt|
@@ -30,7 +29,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.2
+    rev: v0.3.3
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -53,7 +52,7 @@ repos:
     -   id: blacken-docs
 # C++
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.6
+    rev: v18.1.1
     hooks:
     -   id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
index d28afdb239..2ca0ff2f93 100644
--- a/backend/dp_backend.py
+++ b/backend/dp_backend.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """A PEP-517 backend to find TensorFlow."""
+
 from typing import (
     List,
 )
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index fb9e719600..4d63f3118d 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -83,6 +83,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
         # TypeError if submodule_search_locations are None
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
+        tf_version = ""
         if os.environ.get("CIBUILDWHEEL", "0") == "1":
             cuda_version = os.environ.get("CUDA_VERSION", "12.2")
             if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
@@ -99,9 +100,10 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
                         "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'",
                     ]
                 )
+                tf_version = "2.14.1"
             else:
                 raise RuntimeError("Unsupported CUDA version")
-        requires.extend(get_tf_requirement()["cpu"])
+        requires.extend(get_tf_requirement(tf_version)["cpu"])
         # setuptools will re-find tensorflow after installing setup_requires
         tf_install_dir = None
     return tf_install_dir, requires
diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index 5664c3edc6..1ce4beb723 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -7,6 +7,7 @@
 The top module (deepmd.__init__) should not import any third-party
 modules for performance.
 """
+
 try:
     from deepmd._version import version as __version__
 except ImportError:
diff --git a/deepmd/backend/__init__.py b/deepmd/backend/__init__.py
index 8969edd480..2b3f24c5ed 100644
--- a/deepmd/backend/__init__.py
+++ b/deepmd/backend/__init__.py
@@ -3,6 +3,7 @@
 
 Avoid directly importing third-party libraries in this module for performance.
 """
+
 # copy from dpdata
 from importlib import (
     import_module,
diff --git a/deepmd/common.py b/deepmd/common.py
index c776975591..84f98c6318 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -71,8 +71,9 @@
     )
 
 
-# TODO this is not a good way to do things. This is some global variable to which
-# TODO anyone can write and there is no good way to keep track of the changes
+# TODO: refactor data_requirement to make it not a global variable
+# this is not a good way to do things. This is some global variable to which
+# anyone can write and there is no good way to keep track of the changes
 data_requirement = {}
 
 
@@ -180,9 +181,10 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
     return default_mesh
 
 
-# TODO maybe rename this to j_deprecated and only warn about deprecated keys,
-# TODO if the deprecated_key argument is left empty function puppose is only custom
-# TODO error since dict[key] already raises KeyError when the key is missing
+# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys
+# maybe rename this to j_deprecated and only warn about deprecated keys,
+# if the deprecated_key argument is left empty function puppose is only custom
+# error since dict[key] already raises KeyError when the key is missing
 def j_must_have(
     jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
 ) -> "_DICT_VAL":
@@ -238,7 +240,7 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
         raise TypeError("config file must be json, or yaml/yml")
 
 
-# TODO port completely to pathlib when all callers are ported
+# TODO port expand_sys_str completely to pathlib when all callers are ported
 def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
     """Recursively iterate over directories taking those that contain `type.raw` file.
 
diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py
index e51ca0a65e..37f6b8bf28 100644
--- a/deepmd/dpmodel/atomic_model/__init__.py
+++ b/deepmd/dpmodel/atomic_model/__init__.py
@@ -14,7 +14,6 @@
 
 """
 
-
 from .base_atomic_model import (
     BaseAtomicModel,
 )
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
index 990847c1de..42d1e67138 100644
--- a/deepmd/dpmodel/atomic_model/base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -56,22 +56,19 @@ def reinit_pair_exclude(
 
     def atomic_output_def(self) -> FittingOutputDef:
         old_def = self.fitting_output_def()
-        if self.atom_excl is None:
-            return old_def
-        else:
-            old_list = list(old_def.get_data().values())
-            return FittingOutputDef(
-                old_list  # noqa:RUF005
-                + [
-                    OutputVariableDef(
-                        name="mask",
-                        shape=[1],
-                        reduciable=False,
-                        r_differentiable=False,
-                        c_differentiable=False,
-                    )
-                ]
-            )
+        old_list = list(old_def.get_data().values())
+        return FittingOutputDef(
+            old_list  # noqa:RUF005
+            + [
+                OutputVariableDef(
+                    name="mask",
+                    shape=[1],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                )
+            ]
+        )
 
     def forward_common_atomic(
         self,
@@ -82,6 +79,37 @@ def forward_common_atomic(
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
     ) -> Dict[str, np.ndarray]:
+        """Common interface for atomic inference.
+
+        This method accept extended coordinates, extended atom typs, neighbor list,
+        and predict the atomic contribution of the fit property.
+
+        Parameters
+        ----------
+        extended_coord
+            extended coodinates, shape: nf x (nall x 3)
+        extended_atype
+            extended atom typs, shape: nf x nall
+            for a type < 0 indicating the atomic is virtual.
+        nlist
+            neighbor list, shape: nf x nloc x nsel
+        mapping
+            extended to local index mapping, shape: nf x nall
+        fparam
+            frame parameters, shape: nf x dim_fparam
+        aparam
+            atomic parameter, shape: nf x nloc x dim_aparam
+
+        Returns
+        -------
+        ret_dict
+            dict of output atomic properties.
+            should implement the definition of `fitting_output_def`.
+            ret_dict["mask"] of shape nf x nloc will be provided.
+            ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real.
+            ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual.
+
+        """
         _, nloc, _ = nlist.shape
         atype = extended_atype[:, :nloc]
         if self.pair_excl is not None:
@@ -89,24 +117,28 @@ def forward_common_atomic(
             # exclude neighbors in the nlist
             nlist = np.where(pair_mask == 1, nlist, -1)
 
+        ext_atom_mask = self.make_atom_mask(extended_atype)
         ret_dict = self.forward_atomic(
             extended_coord,
-            extended_atype,
+            np.where(ext_atom_mask, extended_atype, 0),
             nlist,
             mapping=mapping,
             fparam=fparam,
             aparam=aparam,
         )
 
+        # nf x nloc
+        atom_mask = ext_atom_mask[:, :nloc].astype(np.int32)
         if self.atom_excl is not None:
-            atom_mask = self.atom_excl.build_type_exclude_mask(atype)
-            for kk in ret_dict.keys():
-                out_shape = ret_dict[kk].shape
-                ret_dict[kk] = (
-                    ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
-                    * atom_mask[:, :, None]
-                ).reshape(out_shape)
-            ret_dict["mask"] = atom_mask
+            atom_mask *= self.atom_excl.build_type_exclude_mask(atype)
+
+        for kk in ret_dict.keys():
+            out_shape = ret_dict[kk].shape
+            ret_dict[kk] = (
+                ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
+                * atom_mask[:, :, None]
+            ).reshape(out_shape)
+        ret_dict["mask"] = atom_mask
 
         return ret_dict
 
diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
index 110aa26162..4907483d1d 100644
--- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -26,6 +26,7 @@
 )
 
 
+@BaseAtomicModel.register("standard")
 class DPAtomicModel(BaseAtomicModel):
     """Model give atomic prediction of some physical property.
 
diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
index ac2a73a381..088cf34900 100644
--- a/deepmd/dpmodel/atomic_model/linear_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
-import sys
-from abc import (
-    abstractmethod,
-)
 from typing import (
     Dict,
     List,
@@ -225,40 +221,38 @@ def fitting_output_def(self) -> FittingOutputDef:
             ]
         )
 
-    @staticmethod
-    def serialize(models, type_map) -> dict:
+    def serialize(self) -> dict:
         return {
             "@class": "Model",
             "type": "linear",
             "@version": 1,
-            "models": [model.serialize() for model in models],
-            "model_name": [model.__class__.__name__ for model in models],
-            "type_map": type_map,
+            "models": [model.serialize() for model in self.models],
+            "type_map": self.type_map,
         }
 
-    @staticmethod
-    def deserialize(data) -> Tuple[List[BaseAtomicModel], List[str]]:
+    @classmethod
+    def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
         data = copy.deepcopy(data)
         check_version_compatibility(data.pop("@version", 1), 1, 1)
         data.pop("@class")
         data.pop("type")
-        model_names = data["model_name"]
-        type_map = data["type_map"]
+        type_map = data.pop("type_map")
         models = [
-            getattr(sys.modules[__name__], name).deserialize(model)
-            for name, model in zip(model_names, data["models"])
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
         ]
-        return models, type_map
+        data.pop("models")
+        return cls(models, type_map, **data)
 
-    @abstractmethod
     def _compute_weight(
         self,
         extended_coord: np.ndarray,
         extended_atype: np.ndarray,
         nlists_: List[np.ndarray],
-    ) -> np.ndarray:
+    ) -> List[np.ndarray]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
-        raise NotImplementedError
+        nmodels = len(self.models)
+        return [np.ones(1) / nmodels for _ in range(nmodels)]
 
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
@@ -335,10 +329,10 @@ def serialize(self) -> dict:
             {
                 "@class": "Model",
                 "type": "zbl",
-                "@version": 1,
-                "models": LinearEnergyAtomicModel.serialize(
-                    [self.dp_model, self.zbl_model], self.type_map
-                ),
+                "@version": 2,
+                "models": LinearEnergyAtomicModel(
+                    models=[self.models[0], self.models[1]], type_map=self.type_map
+                ).serialize(),
                 "sw_rmin": self.sw_rmin,
                 "sw_rmax": self.sw_rmax,
                 "smin_alpha": self.smin_alpha,
@@ -349,16 +343,15 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
         data.pop("@class")
         data.pop("type")
         sw_rmin = data.pop("sw_rmin")
         sw_rmax = data.pop("sw_rmax")
         smin_alpha = data.pop("smin_alpha")
-
-        ([dp_model, zbl_model], type_map) = LinearEnergyAtomicModel.deserialize(
-            data.pop("models")
-        )
+        linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models"))
+        dp_model, zbl_model = linear_model.models
+        type_map = linear_model.type_map
 
         return cls(
             dp_model=dp_model,
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
index ce1a6708e6..936c2b0943 100644
--- a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from abc import (
     ABC,
-    abstractclassmethod,
     abstractmethod,
 )
 from typing import (
@@ -13,6 +12,10 @@
 from deepmd.dpmodel.output_def import (
     FittingOutputDef,
 )
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
 
 
 def make_base_atomic_model(
@@ -31,7 +34,7 @@ def make_base_atomic_model(
 
     """
 
-    class BAM(ABC):
+    class BAM(ABC, PluginVariant, make_plugin_registry("atomic model")):
         """Base Atomic Model provides the interfaces of an atomic model."""
 
         @abstractmethod
@@ -128,10 +131,33 @@ def fwd(
         def serialize(self) -> dict:
             pass
 
-        @abstractclassmethod
-        def deserialize(cls):
+        @classmethod
+        @abstractmethod
+        def deserialize(cls, data: dict):
             pass
 
+        def make_atom_mask(
+            self,
+            atype: t_tensor,
+        ) -> t_tensor:
+            """The atoms with type < 0 are treated as virutal atoms,
+            which serves as place-holders for multi-frame calculations
+            with different number of atoms in different frames.
+
+            Parameters
+            ----------
+            atype
+                Atom types. >= 0 for real atoms <0 for virtual atoms.
+
+            Returns
+            -------
+            mask
+                True for real atoms and False for virutal atoms.
+
+            """
+            # supposed to be supported by all backends
+            return atype >= 0
+
         def do_grad_r(
             self,
             var_name: Optional[str] = None,
@@ -174,10 +200,6 @@ def do_grad_(self, var_name: str, base: str) -> bool:
                 return self.fitting_output_def()[var_name].c_differentiable
             return self.fitting_output_def()[var_name].r_differentiable
 
-        def get_model_def_script(self) -> str:
-            # TODO: implement this method; saved to model
-            raise NotImplementedError
-
     setattr(BAM, fwd_method_name, BAM.fwd)
     delattr(BAM, "fwd")
 
diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
index 46ec808ad4..99b8ec1eff 100644
--- a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
@@ -25,6 +25,7 @@
 )
 
 
+@BaseAtomicModel.register("pairtab")
 class PairTabAtomicModel(BaseAtomicModel):
     """Pairwise tabulation energy model.
 
diff --git a/deepmd/dpmodel/fitting/__init__.py b/deepmd/dpmodel/fitting/__init__.py
index 929a63fda7..866a710a3b 100644
--- a/deepmd/dpmodel/fitting/__init__.py
+++ b/deepmd/dpmodel/fitting/__init__.py
@@ -2,6 +2,9 @@
 from .dipole_fitting import (
     DipoleFitting,
 )
+from .dos_fitting import (
+    DOSFittingNet,
+)
 from .ener_fitting import (
     EnergyFittingNet,
 )
@@ -21,4 +24,5 @@
     "DipoleFitting",
     "EnergyFittingNet",
     "PolarFitting",
+    "DOSFittingNet",
 ]
diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
index e00f031549..6d6324770c 100644
--- a/deepmd/dpmodel/fitting/dipole_fitting.py
+++ b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 from typing import (
     Any,
     Dict,
@@ -19,6 +20,9 @@
     OutputVariableDef,
     fitting_check_output,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .general_fitting import (
     GeneralFitting,
@@ -153,6 +157,12 @@ def serialize(self) -> dict:
         data["c_differentiable"] = self.c_differentiable
         return data
 
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
     def output_def(self):
         return FittingOutputDef(
             [
diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py
new file mode 100644
index 0000000000..7c86d392b0
--- /dev/null
+++ b/deepmd/dpmodel/fitting/dos_fitting.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.fitting.invar_fitting import (
+    InvarFitting,
+)
+
+if TYPE_CHECKING:
+    from deepmd.dpmodel.fitting.general_fitting import (
+        GeneralFitting,
+    )
+
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@InvarFitting.register("dos")
+class DOSFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        numb_dos: int = 300,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        bias_dos: Optional[np.ndarray] = None,
+        rcond: Optional[float] = None,
+        trainable: Union[bool, List[bool]] = True,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = False,
+        exclude_types: List[int] = [],
+        # not used
+        seed: Optional[int] = None,
+    ):
+        if bias_dos is not None:
+            self.bias_dos = bias_dos
+        else:
+            self.bias_dos = np.zeros((ntypes, numb_dos), dtype=DEFAULT_PRECISION)
+        super().__init__(
+            var_name="dos",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=numb_dos,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            bias_atom=bias_dos,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            trainable=trainable,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data["numb_dos"] = data.pop("dim_out")
+        data.pop("tot_ener_zero", None)
+        data.pop("var_name", None)
+        data.pop("layer_name", None)
+        data.pop("use_aparam_as_mask", None)
+        data.pop("spin", None)
+        data.pop("atom_ener", None)
+        return super().deserialize(data)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        dd = {
+            **super().serialize(),
+            "type": "dos",
+        }
+        dd["@variables"]["bias_atom_e"] = self.bias_atom_e
+
+        return dd
diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py
index 3a0e9909b9..7f83f1e886 100644
--- a/deepmd/dpmodel/fitting/ener_fitting.py
+++ b/deepmd/dpmodel/fitting/ener_fitting.py
@@ -18,6 +18,9 @@
     from deepmd.dpmodel.fitting.general_fitting import (
         GeneralFitting,
     )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
 @InvarFitting.register("ener")
@@ -69,6 +72,7 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
         data.pop("var_name")
         data.pop("dim_out")
         return super().deserialize(data)
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index 01bf107c63..5681f5bf0c 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -21,9 +21,6 @@
     FittingNet,
     NetworkCollection,
 )
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
 
 from .base_fitting import (
     BaseFitting,
@@ -43,6 +40,8 @@ class GeneralFitting(NativeOP, BaseFitting):
             The dimension of the input descriptor.
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
+    bias_atom_e
+            Average enery per atom for each element.
     resnet_dt
             Time-step `dt` in the resnet construction:
             :math:`y = x + dt * \phi (Wx + b)`
@@ -88,6 +87,7 @@ def __init__(
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
+        bias_atom_e: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
         trainable: Optional[List[bool]] = None,
@@ -128,7 +128,11 @@ def __init__(
 
         net_dim_out = self._net_out_dim()
         # init constants
-        self.bias_atom_e = np.zeros([self.ntypes, net_dim_out])
+        if bias_atom_e is None:
+            self.bias_atom_e = np.zeros([self.ntypes, net_dim_out])
+        else:
+            assert bias_atom_e.shape == (self.ntypes, net_dim_out)
+            self.bias_atom_e = bias_atom_e
         if self.numb_fparam > 0:
             self.fparam_avg = np.zeros(self.numb_fparam)
             self.fparam_inv_std = np.ones(self.numb_fparam)
@@ -256,7 +260,6 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
         data.pop("@class")
         data.pop("type")
         variables = data.pop("@variables")
@@ -310,7 +313,8 @@ def _call_common(
             )
         xx = descriptor
         if self.remove_vaccum_contribution is not None:
-            # TODO: Idealy, the input for vaccum should be computed;
+            # TODO: comput the input for vaccum when setting remove_vaccum_contribution
+            # Idealy, the input for vaccum should be computed;
             # we consider it as always zero for convenience.
             # Needs a compute_input_stats for vaccum passed from the
             # descriptor.
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
index fd556ff074..9bf1731830 100644
--- a/deepmd/dpmodel/fitting/invar_fitting.py
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 from typing import (
     Any,
     Dict,
@@ -16,6 +17,9 @@
     OutputVariableDef,
     fitting_check_output,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .general_fitting import (
     GeneralFitting,
@@ -78,6 +82,8 @@ class InvarFitting(GeneralFitting):
             Number of atomic parameter
     rcond
             The condition number for the regression of atomic energy.
+    bias_atom
+            Bias for each element.
     tot_ener_zero
             Force the total energy to zero. Useful for the charge fitting.
     trainable
@@ -113,10 +119,11 @@ def __init__(
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
+        bias_atom: Optional[np.ndarray] = None,
         rcond: Optional[float] = None,
         tot_ener_zero: bool = False,
         trainable: Optional[List[bool]] = None,
-        atom_ener: Optional[List[float]] = [],
+        atom_ener: Optional[List[float]] = None,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         layer_name: Optional[List[Optional[str]]] = None,
@@ -148,6 +155,7 @@ def __init__(
             numb_fparam=numb_fparam,
             numb_aparam=numb_aparam,
             rcond=rcond,
+            bias_atom_e=bias_atom,
             tot_ener_zero=tot_ener_zero,
             trainable=trainable,
             activation_function=activation_function,
@@ -169,6 +177,12 @@ def serialize(self) -> dict:
         data["atom_ener"] = self.atom_ener
         return data
 
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
     def _net_out_dim(self):
         """Set the FittingNet output dim."""
         return self.dim_out
@@ -177,10 +191,6 @@ def compute_output_stats(self, merged):
         """Update the output bias for fitting net."""
         raise NotImplementedError
 
-    def init_fitting_stat(self, result_dict):
-        """Initialize the model bias by the statistics."""
-        raise NotImplementedError
-
     def output_def(self):
         return FittingOutputDef(
             [
diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py
index 041076ba89..c7341798c3 100644
--- a/deepmd/dpmodel/fitting/make_base_fitting.py
+++ b/deepmd/dpmodel/fitting/make_base_fitting.py
@@ -67,10 +67,6 @@ def compute_output_stats(self, merged):
             """Update the output bias for fitting net."""
             raise NotImplementedError
 
-        def init_fitting_stat(self, **kwargs):
-            """Initialize the model bias by the statistics."""
-            raise NotImplementedError
-
         @abstractmethod
         def serialize(self) -> dict:
             """Serialize the obj to dict."""
diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
index 4f7c33b9a8..5d75037137 100644
--- a/deepmd/dpmodel/fitting/polarizability_fitting.py
+++ b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 from typing import (
     Any,
     Dict,
@@ -22,6 +23,9 @@
     OutputVariableDef,
     fitting_check_output,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .general_fitting import (
     GeneralFitting,
@@ -139,6 +143,7 @@ def __init__(
             ntypes, 1
         )
         self.shift_diag = shift_diag
+        self.constant_matrix = np.zeros(ntypes, dtype=GLOBAL_NP_FLOAT_PRECISION)
         super().__init__(
             var_name=var_name,
             ntypes=ntypes,
@@ -168,15 +173,36 @@ def _net_out_dim(self):
             else self.embedding_width * self.embedding_width
         )
 
+    def __setitem__(self, key, value):
+        if key in ["constant_matrix"]:
+            self.constant_matrix = value
+        else:
+            super().__setitem__(key, value)
+
+    def __getitem__(self, key):
+        if key in ["constant_matrix"]:
+            return self.constant_matrix
+        else:
+            return super().__getitem__(key)
+
     def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "polar"
+        data["@version"] = 2
         data["embedding_width"] = self.embedding_width
         data["old_impl"] = self.old_impl
         data["fit_diag"] = self.fit_diag
+        data["shift_diag"] = self.shift_diag
         data["@variables"]["scale"] = self.scale
+        data["@variables"]["constant_matrix"] = self.constant_matrix
         return data
 
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        return super().deserialize(data)
+
     def output_def(self):
         return FittingOutputDef(
             [
@@ -246,4 +272,13 @@ def call(
             "bim,bmj->bij", np.transpose(gr, axes=(0, 2, 1)), out
         )  # (nframes * nloc, 3, 3)
         out = out.reshape(nframes, nloc, 3, 3)
+        if self.shift_diag:
+            bias = self.constant_matrix[atype]
+            # (nframes, nloc, 1)
+            bias = np.expand_dims(bias, axis=-1) * self.scale[atype]
+            eye = np.eye(3)
+            eye = np.tile(eye, (nframes, nloc, 1, 1))
+            # (nframes, nloc, 3, 3)
+            bias = np.expand_dims(bias, axis=-1) * eye
+            out = out + bias
         return {self.var_name: out}
diff --git a/deepmd/dpmodel/model/__init__.py b/deepmd/dpmodel/model/__init__.py
index cb796e6d35..c1ff15ab0d 100644
--- a/deepmd/dpmodel/model/__init__.py
+++ b/deepmd/dpmodel/model/__init__.py
@@ -8,6 +8,8 @@
 according to output variable definition
 `deepmd.dpmodel.OutputVariableDef`.
 
+All models should be inherited from :class:`deepmd.dpmodel.model.base_model.BaseModel`.
+Models generated by `make_model` have already done it.
 """
 
 from .dp_model import (
diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index 95c448442e..5169d1b5fe 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -172,3 +172,10 @@ class BaseModel(make_base_model()):
     deepmd.dpmodel.model.base_model.BaseBaseModel
         Backend-independent BaseModel class.
     """
+
+    def __init__(self) -> None:
+        self.model_def_script = ""
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index 15f9027d4c..8d84c435b4 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 
+
 from deepmd.dpmodel.atomic_model import (
     DPAtomicModel,
 )
@@ -17,7 +18,7 @@
 
 # use "class" to resolve "Variable not allowed in type expression"
 @BaseModel.register("standard")
-class DPModel(make_model(DPAtomicModel), BaseModel):
+class DPModel(make_model(DPAtomicModel)):
     @classmethod
     def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """Update the selection and perform neighbor statistics.
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index d1f671c8de..68889ad331 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -4,10 +4,14 @@
     List,
     Optional,
     Tuple,
+    Type,
 )
 
 import numpy as np
 
+from deepmd.dpmodel.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
 from deepmd.dpmodel.common import (
     GLOBAL_ENER_FLOAT_PRECISION,
     GLOBAL_NP_FLOAT_PRECISION,
@@ -15,7 +19,11 @@
     RESERVED_PRECISON_DICT,
     NativeOP,
 )
+from deepmd.dpmodel.model.base_model import (
+    BaseModel,
+)
 from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
     ModelOutputDef,
     OutputVariableCategory,
     OutputVariableOperation,
@@ -34,7 +42,7 @@
 )
 
 
-def make_model(T_AtomicModel):
+def make_model(T_AtomicModel: Type[BaseAtomicModel]):
     """Make a model as a derived class of an atomic model.
 
     The model provide two interfaces.
@@ -57,16 +65,19 @@ def make_model(T_AtomicModel):
 
     """
 
-    class CM(T_AtomicModel, NativeOP):
+    class CM(NativeOP, BaseModel):
         def __init__(
             self,
             *args,
+            # underscore to prevent conflict with normal inputs
+            atomic_model_: Optional[T_AtomicModel] = None,
             **kwargs,
         ):
-            super().__init__(
-                *args,
-                **kwargs,
-            )
+            BaseModel.__init__(self)
+            if atomic_model_ is not None:
+                self.atomic_model: T_AtomicModel = atomic_model_
+            else:
+                self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs)
             self.precision_dict = PRECISION_DICT
             self.reverse_precision_dict = RESERVED_PRECISON_DICT
             self.global_np_float_precision = GLOBAL_NP_FLOAT_PRECISION
@@ -208,7 +219,7 @@ def call_lower(
                 extended_coord, fparam=fparam, aparam=aparam
             )
             del extended_coord, fparam, aparam
-            atomic_ret = self.forward_common_atomic(
+            atomic_ret = self.atomic_model.forward_common_atomic(
                 cc_ext,
                 extended_atype,
                 nlist,
@@ -377,4 +388,89 @@ def _format_nlist(
             assert ret.shape[-1] == nnei
             return ret
 
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+            """
+            return self.atomic_model.do_grad_r(var_name)
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+            """
+            return self.atomic_model.do_grad_c(var_name)
+
+        def serialize(self) -> dict:
+            return self.atomic_model.serialize()
+
+        @classmethod
+        def deserialize(cls, data) -> "CM":
+            return cls(atomic_model_=T_AtomicModel.deserialize(data))
+
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+            return self.atomic_model.get_dim_fparam()
+
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+            return self.atomic_model.get_dim_aparam()
+
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+            return self.atomic_model.get_sel_type()
+
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+            return self.atomic_model.is_aparam_nall()
+
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            return self.atomic_model.get_rcut()
+
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+            return self.atomic_model.get_type_map()
+
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nsel()
+
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nnei()
+
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected atoms for each type."""
+            return self.atomic_model.get_sel()
+
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            return self.atomic_model.mixed_types()
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model."""
+            return self.atomic_model.atomic_output_def()
+
     return CM
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 6206367b1b..661358ed70 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -3,6 +3,7 @@
 
 See issue #2982 for more information.
 """
+
 import copy
 import itertools
 import json
@@ -89,7 +90,8 @@ def __call__(self):
         return self.count
 
 
-# TODO: should be moved to otherwhere...
+# TODO: move save_dp_model and load_dp_model to a seperated module
+# should be moved to otherwhere...
 def save_dp_model(filename: str, model_dict: dict) -> None:
     """Save a DP model to a file in the native format.
 
@@ -230,6 +232,10 @@ def deserialize(cls, data: dict) -> "NativeLayer":
             variables.get("b", None),
             variables.get("idt", None),
         )
+        if obj.b is not None:
+            obj.b = obj.b.ravel()
+        if obj.idt is not None:
+            obj.idt = obj.idt.ravel()
         obj.check_shape_consistency()
         return obj
 
@@ -604,7 +610,8 @@ def __init__(
                 resnet_dt=resnet_dt,
                 precision=precision,
             )
-            i_in, i_ot = neuron[-1], out_dim
+            i_in = neuron[-1] if len(neuron) > 0 else in_dim
+            i_ot = out_dim
             self.layers.append(
                 T_NetworkLayer(
                     i_in,
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
index 1aa1820495..ca8b18023b 100644
--- a/deepmd/dpmodel/utils/nlist.py
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -15,7 +15,7 @@
 
 ## translated from torch implemantation by chatgpt
 def build_neighbor_list(
-    coord1: np.ndarray,
+    coord: np.ndarray,
     atype: np.ndarray,
     nloc: int,
     rcut: float,
@@ -26,10 +26,11 @@ def build_neighbor_list(
 
     Parameters
     ----------
-    coord1 : np.ndarray
+    coord : np.ndarray
         exptended coordinates of shape [batch_size, nall x 3]
     atype : np.ndarray
         extended atomic types of shape [batch_size, nall]
+        type < 0 the atom is treat as virtual atoms.
     nloc : int
         number of local atoms.
     rcut : float
@@ -54,11 +55,20 @@ def build_neighbor_list(
         if distinguish_types==True and we have two types
         |---- nsel[0] -----| |---- nsel[1] -----|
         xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
 
     """
-    batch_size = coord1.shape[0]
-    coord1 = coord1.reshape(batch_size, -1)
-    nall = coord1.shape[1] // 3
+    batch_size = coord.shape[0]
+    coord = coord.reshape(batch_size, -1)
+    nall = coord.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    xmax = np.max(coord) + 2.0 * rcut
+    # nf x nall
+    is_vir = atype < 0
+    coord1 = np.where(is_vir[:, :, None], xmax, coord.reshape(-1, nall, 3)).reshape(
+        -1, nall * 3
+    )
     if isinstance(sel, int):
         sel = [sel]
     nsel = sum(sel)
@@ -88,7 +98,7 @@ def build_neighbor_list(
             axis=-1,
         )
     assert list(nlist.shape) == [batch_size, nloc, nsel]
-    nlist = np.where((rr > rcut), -1, nlist)
+    nlist = np.where(np.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist)
 
     if distinguish_types:
         return nlist_distinguish_types(nlist, atype, sel)
@@ -182,7 +192,7 @@ def build_multiple_neighbor_list(
     ret = {}
     for rc, ns in zip(rcuts[::-1], nsels[::-1]):
         tnlist_1 = np.copy(nlist0[:, :, :ns])
-        tnlist_1[rr[:, :, :ns] > rc] = int(-1)
+        tnlist_1[rr[:, :, :ns] > rc] = -1
         ret[get_multiple_nlist_key(rc, ns)] = tnlist_1
     return ret
 
diff --git a/deepmd/dpmodel/utils/update_sel.py b/deepmd/dpmodel/utils/update_sel.py
index f36e63651d..48463b5743 100644
--- a/deepmd/dpmodel/utils/update_sel.py
+++ b/deepmd/dpmodel/utils/update_sel.py
@@ -17,5 +17,5 @@ def neighbor_stat(self) -> Type[NeighborStat]:
         return NeighborStat
 
     def hook(self, min_nbor_dist, max_nbor_size):
-        # TODO: save to the model
+        # TODO: save to the model in UpdateSel.hook
         pass
diff --git a/deepmd/driver.py b/deepmd/driver.py
index 1e5e36c652..0b48f2ac84 100644
--- a/deepmd/driver.py
+++ b/deepmd/driver.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """dpdata driver."""
+
 # Derived from https://github.com/deepmodeling/dpdata/blob/18a0ed5ebced8b1f6887038883d46f31ae9990a4/dpdata/plugins/deepmd.py#L361-L443
 # under LGPL-3.0-or-later license.
 # The original deepmd driver maintained in the dpdata package will be overriden.
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index ccf8b1da1e..cad6e12d2b 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test trained DeePMD model."""
+
 import logging
 from pathlib import (
     Path,
diff --git a/deepmd/infer/deep_dos.py b/deepmd/infer/deep_dos.py
index d95d2a119f..7823f02999 100644
--- a/deepmd/infer/deep_dos.py
+++ b/deepmd/infer/deep_dos.py
@@ -56,6 +56,11 @@ def output_def(self) -> ModelOutputDef:
             )
         )
 
+    @property
+    def numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return self.get_numb_dos()
+
     def eval(
         self,
         coords: np.ndarray,
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 065982a870..aae2082e13 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -242,6 +242,10 @@ def _check_mixed_types(self, atom_types: np.ndarray) -> bool:
         atom_types : np.ndarray
             The atom types of all frames, in shape nframes * natoms.
         """
+        if np.count_nonzero(atom_types[0] == -1) > 0:
+            # assume mixed_types if there are virtual types, even when
+            # the atom types of all frames are the same
+            return False
         return np.all(np.equal(atom_types, atom_types[0]))
 
     @property
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index cb5d79797b..c214e39e92 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -29,8 +29,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: Literal[False] = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    ...
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 @overload
@@ -40,8 +39,7 @@ def calc_model_devi_f(
     relative: Optional[float] = None,
     *,
     atomic: Literal[True],
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    ...
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 def calc_model_devi_f(
diff --git a/deepmd/main.py b/deepmd/main.py
index 09457419e8..b503107c73 100644
--- a/deepmd/main.py
+++ b/deepmd/main.py
@@ -4,6 +4,7 @@
 If only printing the help message, this module does not call
 the main DeePMD-kit module to avoid the slow import of TensorFlow.
 """
+
 import argparse
 import logging
 import os
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 3ef91c76d6..824a5b4e2e 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -286,9 +286,7 @@ def freeze(FLAGS):
     torch.jit.save(
         model,
         FLAGS.output,
-        {
-            # TODO: _extra_files
-        },
+        {},
     )
 
 
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index b8031993c0..f46d5fce49 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -388,7 +388,7 @@ def _eval_model(
         else:
             aparam_input = None
         do_atomic_virial = any(
-            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
+            x.category == OutputVariableCategory.DERV_C for x in request_defs
         )
         batch_output = model(
             coord_input,
diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 1d70528e88..edae53a771 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -77,7 +77,7 @@ def __init__(
         self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference
         self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
 
-        # TODO need support for atomic energy and atomic pref
+        # TODO EnergyStdLoss need support for atomic energy and atomic pref
         self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
         self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
 
@@ -90,20 +90,30 @@ def __init__(
         self.use_l1_all = use_l1_all
         self.inference = inference
 
-    def forward(self, model_pred, label, natoms, learning_rate, mae=False):
-        """Return loss on loss and force.
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
+        """Return loss on energy and force.
 
-        Args:
-        - natoms: Tell atom count.
-        - p_energy: Predicted energy of all atoms.
-        - p_force: Predicted force per atom.
-        - l_energy: Actual energy of all atoms.
-        - l_force: Actual force per atom.
+        Parameters
+        ----------
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, torch.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
 
         Returns
         -------
-        - loss: Loss to minimize.
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
+        loss: torch.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, torch.Tensor]
+            Other losses for display.
         """
+        model_pred = model(**input_dict)
         coef = learning_rate / self.starter_learning_rate
         pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
         pref_f = self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * coef
@@ -200,7 +210,7 @@ def forward(self, model_pred, label, natoms, learning_rate, mae=False):
                 more_loss["mae_v"] = mae_v.detach()
         if not self.inference:
             more_loss["rmse"] = torch.sqrt(loss.detach())
-        return loss, more_loss
+        return model_pred, loss, more_loss
 
     @property
     def label_requirement(self) -> List[DataRequirementItem]:
diff --git a/deepmd/pt/loss/ener_spin.py b/deepmd/pt/loss/ener_spin.py
index b94acf26ea..1f10e3cf5f 100644
--- a/deepmd/pt/loss/ener_spin.py
+++ b/deepmd/pt/loss/ener_spin.py
@@ -47,7 +47,7 @@ def __init__(
         self.has_fr = (start_pref_fr != 0.0 and limit_pref_fr != 0.0) or inference
         self.has_fm = (start_pref_fm != 0.0 and limit_pref_fm != 0.0) or inference
 
-        # TODO need support for virial, atomic energy and atomic pref
+        # TODO EnergySpinLoss needs support for virial, atomic energy and atomic pref
         self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
         self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
         self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
@@ -63,13 +63,15 @@ def __init__(
         self.use_l1_all = use_l1_all
         self.inference = inference
 
-    def forward(self, model_pred, label, natoms, learning_rate, mae=False):
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
         """Return energy loss with magnetic labels.
 
         Parameters
         ----------
-        model_pred : dict[str, torch.Tensor]
-            Model predictions.
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
         label : dict[str, torch.Tensor]
             Labels.
         natoms : int
@@ -77,11 +79,14 @@ def forward(self, model_pred, label, natoms, learning_rate, mae=False):
 
         Returns
         -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
         loss: torch.Tensor
             Loss for model to minimize.
         more_loss: dict[str, torch.Tensor]
             Other losses for display.
         """
+        model_pred = model(**input_dict)
         coef = learning_rate / self.starter_learning_rate
         pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
         pref_fr = self.limit_pref_fr + (self.start_pref_fr - self.limit_pref_fr) * coef
@@ -175,7 +180,7 @@ def forward(self, model_pred, label, natoms, learning_rate, mae=False):
 
         if not self.inference:
             more_loss["rmse"] = torch.sqrt(loss.detach())
-        return loss, more_loss
+        return model_pred, loss, more_loss
 
     @property
     def label_requirement(self) -> List[DataRequirementItem]:
diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py
index 925ff8f4ef..cc253424ca 100644
--- a/deepmd/pt/loss/loss.py
+++ b/deepmd/pt/loss/loss.py
@@ -19,7 +19,7 @@ def __init__(self, **kwargs):
         """Construct loss."""
         super().__init__()
 
-    def forward(self, model_pred, label, natoms, learning_rate):
+    def forward(self, input_dict, model, label, natoms, learning_rate):
         """Return loss ."""
         raise NotImplementedError
 
diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py
index 5ac0a6e37b..238e6a7796 100644
--- a/deepmd/pt/loss/tensor.py
+++ b/deepmd/pt/loss/tensor.py
@@ -63,13 +63,15 @@ def __init__(
             "Can not assian zero weight both to `pref` and `pref_atomic`"
         )
 
-    def forward(self, model_pred, label, natoms, learning_rate=0.0, mae=False):
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
         """Return loss on local and global tensors.
 
         Parameters
         ----------
-        model_pred : dict[str, torch.Tensor]
-            Model predictions.
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
         label : dict[str, torch.Tensor]
             Labels.
         natoms : int
@@ -77,11 +79,14 @@ def forward(self, model_pred, label, natoms, learning_rate=0.0, mae=False):
 
         Returns
         -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
         loss: torch.Tensor
             Loss for model to minimize.
         more_loss: dict[str, torch.Tensor]
             Other losses for display.
         """
+        model_pred = model(**input_dict)
         del learning_rate, mae
         loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
         more_loss = {}
@@ -127,13 +132,13 @@ def forward(self, model_pred, label, natoms, learning_rate=0.0, mae=False):
                 atom_num = natoms
                 l2_global_loss = torch.mean(torch.square(diff))
             if not self.inference:
-                more_loss[
-                    f"l2_global_{self.tensor_name}_loss"
-                ] = l2_global_loss.detach()
+                more_loss[f"l2_global_{self.tensor_name}_loss"] = (
+                    l2_global_loss.detach()
+                )
             loss += self.global_weight * l2_global_loss
             rmse_global = l2_global_loss.sqrt() / atom_num
             more_loss[f"rmse_global_{self.tensor_name}"] = rmse_global.detach()
-        return loss, more_loss
+        return model_pred, loss, more_loss
 
     @property
     def label_requirement(self) -> List[DataRequirementItem]:
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index 877f0c5d6b..877c9c2d4f 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -21,6 +21,9 @@
     AtomExcludeMask,
     PairExcludeMask,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 BaseAtomicModel_ = make_base_atomic_model(torch.Tensor)
 
@@ -55,33 +58,44 @@ def reinit_pair_exclude(
         else:
             self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types)
 
-    # export public methods that are not abstract
-    get_nsel = torch.jit.export(BaseAtomicModel_.get_nsel)
-    get_nnei = torch.jit.export(BaseAtomicModel_.get_nnei)
-    get_ntypes = torch.jit.export(BaseAtomicModel_.get_ntypes)
-
-    @torch.jit.export
-    def get_model_def_script(self) -> str:
-        return self.model_def_script
+    # to make jit happy...
+    def make_atom_mask(
+        self,
+        atype: torch.Tensor,
+    ) -> torch.Tensor:
+        """The atoms with type < 0 are treated as virutal atoms,
+        which serves as place-holders for multi-frame calculations
+        with different number of atoms in different frames.
+
+        Parameters
+        ----------
+        atype
+            Atom types. >= 0 for real atoms <0 for virtual atoms.
+
+        Returns
+        -------
+        mask
+            True for real atoms and False for virutal atoms.
+
+        """
+        # supposed to be supported by all backends
+        return atype >= 0
 
     def atomic_output_def(self) -> FittingOutputDef:
         old_def = self.fitting_output_def()
-        if self.atom_excl is None:
-            return old_def
-        else:
-            old_list = list(old_def.get_data().values())
-            return FittingOutputDef(
-                old_list  # noqa:RUF005
-                + [
-                    OutputVariableDef(
-                        name="mask",
-                        shape=[1],
-                        reduciable=False,
-                        r_differentiable=False,
-                        c_differentiable=False,
-                    )
-                ]
-            )
+        old_list = list(old_def.get_data().values())
+        return FittingOutputDef(
+            old_list  # noqa:RUF005
+            + [
+                OutputVariableDef(
+                    name="mask",
+                    shape=[1],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                )
+            ]
+        )
 
     def forward_common_atomic(
         self,
@@ -93,6 +107,37 @@ def forward_common_atomic(
         aparam: Optional[torch.Tensor] = None,
         comm_dict: Optional[Dict[str, torch.Tensor]] = None
     ) -> Dict[str, torch.Tensor]:
+        """Common interface for atomic inference.
+
+        This method accept extended coordinates, extended atom typs, neighbor list,
+        and predict the atomic contribution of the fit property.
+
+        Parameters
+        ----------
+        extended_coord
+            extended coodinates, shape: nf x (nall x 3)
+        extended_atype
+            extended atom typs, shape: nf x nall
+            for a type < 0 indicating the atomic is virtual.
+        nlist
+            neighbor list, shape: nf x nloc x nsel
+        mapping
+            extended to local index mapping, shape: nf x nall
+        fparam
+            frame parameters, shape: nf x dim_fparam
+        aparam
+            atomic parameter, shape: nf x nloc x dim_aparam
+
+        Returns
+        -------
+        ret_dict
+            dict of output atomic properties.
+            should implement the definition of `fitting_output_def`.
+            ret_dict["mask"] of shape nf x nloc will be provided.
+            ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real.
+            ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual.
+
+        """
         _, nloc, _ = nlist.shape
         atype = extended_atype[:, :nloc]
 
@@ -101,9 +146,10 @@ def forward_common_atomic(
             # exclude neighbors in the nlist
             nlist = torch.where(pair_mask == 1, nlist, -1)
 
+        ext_atom_mask = self.make_atom_mask(extended_atype)
         ret_dict = self.forward_atomic(
             extended_coord,
-            extended_atype,
+            torch.where(ext_atom_mask, extended_atype, 0),
             nlist,
             mapping=mapping,
             fparam=fparam,
@@ -111,15 +157,18 @@ def forward_common_atomic(
             comm_dict=comm_dict
         )
 
+        # nf x nloc
+        atom_mask = ext_atom_mask[:, :nloc].to(torch.int32)
         if self.atom_excl is not None:
-            atom_mask = self.atom_excl(atype)
-            for kk in ret_dict.keys():
-                out_shape = ret_dict[kk].shape
-                ret_dict[kk] = (
-                    ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
-                    * atom_mask[:, :, None]
-                ).reshape(out_shape)
-            ret_dict["mask"] = atom_mask
+            atom_mask *= self.atom_excl(atype)
+
+        for kk in ret_dict.keys():
+            out_shape = ret_dict[kk].shape
+            ret_dict[kk] = (
+                ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
+                * atom_mask[:, :, None]
+            ).view(out_shape)
+        ret_dict["mask"] = atom_mask
 
         return ret_dict
 
@@ -128,3 +177,25 @@ def serialize(self) -> dict:
             "atom_exclude_types": self.atom_exclude_types,
             "pair_exclude_types": self.pair_exclude_types,
         }
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The sampled data frames from different data systems.
+        stat_file_path
+            The path to the statistics files.
+        """
+        raise NotImplementedError
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 7e369b7334..6b023217cb 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -33,6 +33,7 @@
 log = logging.getLogger(__name__)
 
 
+@BaseAtomicModel.register("standard")
 class DPAtomicModel(torch.nn.Module, BaseAtomicModel):
     """Model give atomic prediction of some physical property.
 
@@ -55,7 +56,6 @@ def __init__(
         **kwargs,
     ):
         torch.nn.Module.__init__(self)
-        self.model_def_script = ""
         ntypes = len(type_map)
         self.type_map = type_map
         self.ntypes = ntypes
@@ -225,17 +225,14 @@ def wrapped_sampler():
         if self.fitting_net is not None:
             self.fitting_net.compute_output_stats(wrapped_sampler, stat_file_path)
 
-    @torch.jit.export
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
 
-    @torch.jit.export
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
 
-    @torch.jit.export
     def get_sel_type(self) -> List[int]:
         """Get the selected atom types of this model.
 
@@ -245,7 +242,6 @@ def get_sel_type(self) -> List[int]:
         """
         return self.fitting_net.get_sel_type()
 
-    @torch.jit.export
     def is_aparam_nall(self) -> bool:
         """Check whether the shape of atomic parameters is (nframes, nall, ndim).
 
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index 68705049ae..f7216f46ef 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
-import sys
-from abc import (
-    abstractmethod,
-)
 from typing import (
     Dict,
     List,
@@ -96,12 +92,10 @@ def mixed_types(self) -> bool:
         """
         return True
 
-    @torch.jit.export
     def get_rcut(self) -> float:
         """Get the cut-off radius."""
         return max(self.get_model_rcuts())
 
-    @torch.jit.export
     def get_type_map(self) -> List[str]:
         """Get the type map."""
         return self.type_map
@@ -262,48 +256,48 @@ def fitting_output_def(self) -> FittingOutputDef:
             ]
         )
 
-    @staticmethod
-    def serialize(models, type_map) -> dict:
+    def serialize(self) -> dict:
         return {
             "@class": "Model",
             "@version": 1,
             "type": "linear",
-            "models": [model.serialize() for model in models],
-            "model_name": [model.__class__.__name__ for model in models],
-            "type_map": type_map,
+            "models": [model.serialize() for model in self.models],
+            "type_map": self.type_map,
         }
 
-    @staticmethod
-    def deserialize(data) -> Tuple[List[BaseAtomicModel], List[str]]:
+    @classmethod
+    def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
         data = copy.deepcopy(data)
         check_version_compatibility(data.pop("@version", 1), 1, 1)
-        model_names = data["model_name"]
-        type_map = data["type_map"]
+        data.pop("@class")
+        data.pop("type")
+        type_map = data.pop("type_map")
         models = [
-            getattr(sys.modules[__name__], name).deserialize(model)
-            for name, model in zip(model_names, data["models"])
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
         ]
-        return models, type_map
+        data.pop("models")
+        return cls(models, type_map, **data)
 
-    @abstractmethod
     def _compute_weight(
         self, extended_coord, extended_atype, nlists_
     ) -> List[torch.Tensor]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
-        raise NotImplementedError
+        nmodels = len(self.models)
+        return [
+            torch.ones(1, dtype=torch.float64, device=env.DEVICE) / nmodels
+            for _ in range(nmodels)
+        ]
 
-    @torch.jit.export
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         # tricky...
         return max([model.get_dim_fparam() for model in self.models])
 
-    @torch.jit.export
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return max([model.get_dim_aparam() for model in self.models])
 
-    @torch.jit.export
     def get_sel_type(self) -> List[int]:
         """Get the selected atom types of this model.
 
@@ -324,7 +318,6 @@ def get_sel_type(self) -> List[int]:
             )
         ).tolist()
 
-    @torch.jit.export
     def is_aparam_nall(self) -> bool:
         """Check whether the shape of atomic parameters is (nframes, nall, ndim).
 
@@ -366,9 +359,6 @@ def __init__(
     ):
         models = [dp_model, zbl_model]
         super().__init__(models, type_map, **kwargs)
-        self.model_def_script = ""
-        self.dp_model = dp_model
-        self.zbl_model = zbl_model
 
         self.sw_rmin = sw_rmin
         self.sw_rmax = sw_rmax
@@ -397,8 +387,8 @@ def compute_or_load_stat(
         stat_file_path
             The dictionary of paths to the statistics files.
         """
-        self.dp_model.compute_or_load_stat(sampled_func, stat_file_path)
-        self.zbl_model.compute_or_load_stat(sampled_func, stat_file_path)
+        self.models[0].compute_or_load_stat(sampled_func, stat_file_path)
+        self.models[1].compute_or_load_stat(sampled_func, stat_file_path)
 
     def change_energy_bias(self):
         # need to implement
@@ -409,11 +399,11 @@ def serialize(self) -> dict:
         dd.update(
             {
                 "@class": "Model",
-                "@version": 1,
+                "@version": 2,
                 "type": "zbl",
-                "models": LinearEnergyAtomicModel.serialize(
-                    [self.dp_model, self.zbl_model], self.type_map
-                ),
+                "models": LinearEnergyAtomicModel(
+                    models=[self.models[0], self.models[1]], type_map=self.type_map
+                ).serialize(),
                 "sw_rmin": self.sw_rmin,
                 "sw_rmax": self.sw_rmax,
                 "smin_alpha": self.smin_alpha,
@@ -424,14 +414,13 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
         sw_rmin = data.pop("sw_rmin")
         sw_rmax = data.pop("sw_rmax")
         smin_alpha = data.pop("smin_alpha")
-
-        [dp_model, zbl_model], type_map = LinearEnergyAtomicModel.deserialize(
-            data.pop("models")
-        )
+        linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models"))
+        dp_model, zbl_model = linear_model.models
+        type_map = linear_model.type_map
 
         data.pop("@class", None)
         data.pop("type", None)
@@ -486,7 +475,7 @@ def _compute_weight(
             dim=-1,
         )  # handle masked nnei.
 
-        sigma = numerator / denominator  # nfrmes, nloc
+        sigma = numerator / torch.clamp(denominator, 1e-20)  # nfrmes, nloc
         u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
         coef = torch.zeros_like(u)
         left_mask = sigma < self.sw_rmin
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index 19a67fc8ff..7c7c8a2969 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -35,6 +35,7 @@
 )
 
 
+@BaseAtomicModel.register("pairtab")
 class PairTabAtomicModel(torch.nn.Module, BaseAtomicModel):
     """Pairwise tabulation energy model.
 
@@ -78,7 +79,6 @@ def __init__(
         **kwargs,
     ):
         torch.nn.Module.__init__(self)
-        self.model_def_script = ""
         self.tab_file = tab_file
         self.rcut = rcut
         self.tab = self._set_pairtab(tab_file, rcut)
@@ -139,11 +139,9 @@ def fitting_output_def(self) -> FittingOutputDef:
             ]
         )
 
-    @torch.jit.export
     def get_rcut(self) -> float:
         return self.rcut
 
-    @torch.jit.export
     def get_type_map(self) -> List[str]:
         return self.type_map
 
@@ -417,20 +415,28 @@ def _extract_spline_coefficient(
         # (nframes, nloc, nnei)
         expanded_i_type = i_type.unsqueeze(-1).expand(-1, -1, j_type.shape[-1])
 
-        # (nframes, nloc, nnei, nspline, 4)
-        expanded_tab_data = tab_data[expanded_i_type, j_type]
-
-        # (nframes, nloc, nnei, 1, 4)
-        expanded_idx = idx.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, -1, -1, 4)
-
         # handle the case where idx is beyond the number of splines
-        clipped_indices = torch.clamp(expanded_idx, 0, nspline - 1).to(torch.int64)
-
+        clipped_indices = torch.clamp(idx, 0, nspline - 1).to(torch.int64)
+
+        nframes = i_type.shape[0]
+        nloc = i_type.shape[1]
+        nnei = j_type.shape[2]
+        ntypes = tab_data.shape[0]
+        # tab_data_idx: (nframes, nloc, nnei)
+        tab_data_idx = (
+            expanded_i_type * ntypes * nspline + j_type * nspline + clipped_indices
+        )
+        # tab_data: (ntype, ntype, nspline, 4)
+        tab_data = tab_data.view(ntypes * ntypes * nspline, 4)
+        # tab_data_idx: (nframes * nloc * nnei, 4)
+        tab_data_idx = tab_data_idx.view(nframes * nloc * nnei, 1).expand(-1, 4)
         # (nframes, nloc, nnei, 4)
-        final_coef = torch.gather(expanded_tab_data, 3, clipped_indices).squeeze()
+        final_coef = torch.gather(tab_data, 0, tab_data_idx).view(
+            nframes, nloc, nnei, 4
+        )
 
         # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`.
-        final_coef[expanded_idx.squeeze() > nspline] = 0
+        final_coef[idx > nspline] = 0
         return final_coef
 
     @staticmethod
@@ -454,17 +460,14 @@ def _calculate_ener(coef: torch.Tensor, uu: torch.Tensor) -> torch.Tensor:
         ener = etmp * uu + a0  # this energy has the extrapolated value when rcut > rmax
         return ener
 
-    @torch.jit.export
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return 0
 
-    @torch.jit.export
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return 0
 
-    @torch.jit.export
     def get_sel_type(self) -> List[int]:
         """Get the selected atom types of this model.
 
@@ -474,7 +477,6 @@ def get_sel_type(self) -> List[int]:
         """
         return []
 
-    @torch.jit.export
     def is_aparam_nall(self) -> bool:
         """Check whether the shape of atomic parameters is (nframes, nall, ndim).
 
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 8e4352e60c..1675215d7b 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -7,11 +7,15 @@
 communication of the atomic properties according to output variable
 definition `deepmd.dpmodel.OutputVariableDef`.
 
+All models should be inherited from :class:`deepmd.pt.model.model.model.BaseModel`.
+Models generated by `make_model` have already done it.
 """
 
 import copy
 import json
 
+import numpy as np
+
 from deepmd.pt.model.atomic_model import (
     DPAtomicModel,
     PairTabAtomicModel,
@@ -35,6 +39,9 @@
 from .ener_model import (
     EnergyModel,
 )
+from .frozen import (
+    FrozenModel,
+)
 from .make_hessian_model import (
     make_hessian_model,
 )
@@ -52,6 +59,12 @@
 
 def get_spin_model(model_params):
     model_params = copy.deepcopy(model_params)
+    if not model_params["spin"]["use_spin"] or isinstance(
+        model_params["spin"]["use_spin"][0], int
+    ):
+        use_spin = np.full(len(model_params["type_map"]), False)
+        use_spin[model_params["spin"]["use_spin"]] = True
+        model_params["spin"]["use_spin"] = use_spin.tolist()
     # include virtual spin and placeholder types
     model_params["type_map"] += [item + "_spin" for item in model_params["type_map"]]
     spin = Spin(
@@ -135,7 +148,8 @@ def get_standard_model(model_params):
     fitting_net["type"] = fitting_net.get("type", "ener")
     fitting_net["ntypes"] = descriptor.get_ntypes()
     fitting_net["mixed_types"] = descriptor.mixed_types()
-    fitting_net["embedding_width"] = descriptor.get_dim_emb()
+    if fitting_net["type"] in ["dipole", "polar"]:
+        fitting_net["embedding_width"] = descriptor.get_dim_emb()
     fitting_net["dim_descrpt"] = descriptor.get_dim_out()
     grad_force = "direct" not in fitting_net["type"]
     if not grad_force:
@@ -147,8 +161,8 @@ def get_standard_model(model_params):
     pair_exclude_types = model_params.get("pair_exclude_types", [])
 
     model = DPModel(
-        descriptor,
-        fitting,
+        descriptor=descriptor,
+        fitting=fitting,
         type_map=model_params["type_map"],
         atom_exclude_types=atom_exclude_types,
         pair_exclude_types=pair_exclude_types,
@@ -171,6 +185,7 @@ def get_model(model_params):
     "get_model",
     "DPModel",
     "EnergyModel",
+    "FrozenModel",
     "SpinModel",
     "SpinEnergyModel",
     "DPZBLModel",
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 8b6f2c47c1..45b120771b 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -38,7 +38,7 @@ def forward(
             aparam=aparam,
             do_atomic_virial=do_atomic_virial,
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["dipole"] = model_ret["dipole"]
             model_predict["global_dipole"] = model_ret["dipole_redu"]
@@ -77,7 +77,7 @@ def forward_lower(
             aparam=aparam,
             do_atomic_virial=do_atomic_virial,
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["dipole"] = model_ret["dipole"]
             model_predict["global_dipole"] = model_ret["dipole_redu"]
diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py
new file mode 100644
index 0000000000..680eac41f5
--- /dev/null
+++ b/deepmd/pt/model/model/dos_model.py
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class DOSModel(DPModel):
+    model_type = "dos"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
index 0df45d4f84..d7b3c4f4e2 100644
--- a/deepmd/pt/model/model/dp_model.py
+++ b/deepmd/pt/model/model/dp_model.py
@@ -1,4 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
 from deepmd.pt.model.atomic_model import (
     DPAtomicModel,
 )
@@ -11,6 +18,9 @@
 from deepmd.pt.model.task.dipole import (
     DipoleFittingNet,
 )
+from deepmd.pt.model.task.dos import (
+    DOSFittingNet,
+)
 from deepmd.pt.model.task.ener import (
     EnergyFittingNet,
     EnergyFittingNetDirect,
@@ -25,11 +35,22 @@
 
 
 @BaseModel.register("standard")
-class DPModel(make_model(DPAtomicModel), BaseModel):
-    def __new__(cls, descriptor, fitting, *args, **kwargs):
+class DPModel(make_model(DPAtomicModel)):
+    def __new__(
+        cls,
+        descriptor=None,
+        fitting=None,
+        *args,
+        # disallow positional atomic_model_
+        atomic_model_: Optional[DPAtomicModel] = None,
+        **kwargs,
+    ):
         from deepmd.pt.model.model.dipole_model import (
             DipoleModel,
         )
+        from deepmd.pt.model.model.dos_model import (
+            DOSModel,
+        )
         from deepmd.pt.model.model.ener_model import (
             EnergyModel,
         )
@@ -37,6 +58,11 @@ def __new__(cls, descriptor, fitting, *args, **kwargs):
             PolarModel,
         )
 
+        if atomic_model_ is not None:
+            fitting = atomic_model_.fitting_net
+        else:
+            assert fitting is not None, "fitting network is not provided"
+
         # according to the fitting network to decide the type of the model
         if cls is DPModel:
             # map fitting to model
@@ -48,6 +74,8 @@ def __new__(cls, descriptor, fitting, *args, **kwargs):
                 cls = DipoleModel
             elif isinstance(fitting, PolarFittingNet):
                 cls = PolarModel
+            elif isinstance(fitting, DOSFittingNet):
+                cls = DOSModel
             # else: unknown fitting type, fall back to DPModel
         return super().__new__(cls)
 
@@ -67,3 +95,30 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
             global_jdata, local_jdata["descriptor"]
         )
         return local_jdata_cpy
+
+    def get_fitting_net(self):
+        """Get the fitting network."""
+        return self.atomic_model.fitting_net
+
+    def get_descriptor(self):
+        """Get the descriptor."""
+        return self.atomic_model.descriptor
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        # directly call the forward_common method when no specific transform rule
+        return self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index fdf9334119..bbc82b8d77 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -24,7 +24,7 @@
 
 
 @BaseModel.register("zbl")
-class DPZBLModel(DPZBLModel_, BaseModel):
+class DPZBLModel(DPZBLModel_):
     model_type = "ener"
 
     def __init__(
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index c702f01bf3..db5fff57f6 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -40,7 +40,7 @@ def forward(
             aparam=aparam,
             do_atomic_virial=do_atomic_virial,
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["atom_energy"] = model_ret["energy"]
             model_predict["energy"] = model_ret["energy_redu"]
@@ -83,7 +83,7 @@ def forward_lower(
             do_atomic_virial=do_atomic_virial,
             comm_dict=comm_dict
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["atom_energy"] = model_ret["energy"]
             model_predict["energy"] = model_ret["energy_redu"]
diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py
new file mode 100644
index 0000000000..e3dcd389bb
--- /dev/null
+++ b/deepmd/pt/model/model/frozen.py
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import tempfile
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+
+
+@BaseModel.register("frozen")
+class FrozenModel(BaseModel):
+    """Load model from a frozen model, which cannot be trained.
+
+    Parameters
+    ----------
+    model_file : str
+        The path to the frozen model
+    """
+
+    def __init__(self, model_file: str, **kwargs):
+        super().__init__(**kwargs)
+        self.model_file = model_file
+        if model_file.endswith(".pth"):
+            self.model = torch.jit.load(model_file)
+        else:
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(suffix=".pth") as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model = torch.jit.load(f.name)
+
+    @torch.jit.export
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of developer implemented atomic models."""
+        return self.model.fitting_output_def()
+
+    @torch.jit.export
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.model.get_rcut()
+
+    @torch.jit.export
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.model.get_type_map()
+
+    @torch.jit.export
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.model.get_sel()
+
+    @torch.jit.export
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.model.get_dim_fparam()
+
+    @torch.jit.export
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.model.get_dim_aparam()
+
+    @torch.jit.export
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.model.get_sel_type()
+
+    @torch.jit.export
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.model.is_aparam_nall()
+
+    @torch.jit.export
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.model.mixed_types()
+
+    @torch.jit.export
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        return self.model.forward(
+            coord,
+            atype,
+            box=box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        # try to use the original script instead of "frozen model"
+        # Note: this cannot change the script of the parent model
+        # it may still try to load hard-coded filename, which might
+        # be a problem
+        return self.model.get_model_def_script()
+
+    def serialize(self) -> dict:
+        from deepmd.pt.model.model import (
+            get_model,
+        )
+
+        # try to recover the original model
+        model_def_script = json.loads(self.get_model_def_script())
+        model = get_model(model_def_script)
+        model.load_state_dict(self.model.state_dict())
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        raise RuntimeError("Should not touch here.")
+
+    @torch.jit.export
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nnei()
+
+    @torch.jit.export
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nsel()
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        return local_jdata
+
+    @torch.jit.export
+    def model_output_type(self) -> str:
+        """Get the output type for the model."""
+        return self.model.model_output_type()
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index fd8c601569..b76b98921e 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -4,6 +4,7 @@
     List,
     Optional,
     Tuple,
+    Type,
 )
 
 import torch
@@ -12,10 +13,17 @@
     ModelOutputDef,
 )
 from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
     OutputVariableCategory,
     OutputVariableOperation,
     check_operation_applied,
 )
+from deepmd.pt.model.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
 from deepmd.pt.model.model.transform_output import (
     communicate_extended_output,
     fit_output_to_model_output,
@@ -30,9 +38,12 @@
     extend_input_and_build_neighbor_list,
     nlist_distinguish_types,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 
-def make_model(T_AtomicModel):
+def make_model(T_AtomicModel: Type[BaseAtomicModel]):
     """Make a model as a derived class of an atomic model.
 
     The model provide two interfaces.
@@ -55,16 +66,19 @@ def make_model(T_AtomicModel):
 
     """
 
-    class CM(T_AtomicModel):
+    class CM(BaseModel):
         def __init__(
             self,
             *args,
+            # underscore to prevent conflict with normal inputs
+            atomic_model_: Optional[T_AtomicModel] = None,
             **kwargs,
         ):
-            super().__init__(
-                *args,
-                **kwargs,
-            )
+            super().__init__(*args, **kwargs)
+            if atomic_model_ is not None:
+                self.atomic_model: T_AtomicModel = atomic_model_
+            else:
+                self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs)
             self.precision_dict = PRECISION_DICT
             self.reverse_precision_dict = RESERVED_PRECISON_DICT
             self.global_pt_float_precision = GLOBAL_PT_FLOAT_PRECISION
@@ -204,7 +218,7 @@ def forward_common_lower(
                 extended_coord, fparam=fparam, aparam=aparam
             )
             del extended_coord, fparam, aparam
-            atomic_ret = self.forward_common_atomic(
+            atomic_ret = self.atomic_model.forward_common_atomic(
                 cc_ext,
                 extended_atype,
                 nlist,
@@ -384,4 +398,105 @@ def _format_nlist(
             assert nlist.shape[-1] == nnei
             return nlist
 
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+            """
+            return self.atomic_model.do_grad_r(var_name)
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+            """
+            return self.atomic_model.do_grad_c(var_name)
+
+        def serialize(self) -> dict:
+            return self.atomic_model.serialize()
+
+        @classmethod
+        def deserialize(cls, data) -> "CM":
+            return cls(atomic_model_=T_AtomicModel.deserialize(data))
+
+        @torch.jit.export
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+            return self.atomic_model.get_dim_fparam()
+
+        @torch.jit.export
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+            return self.atomic_model.get_dim_aparam()
+
+        @torch.jit.export
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+            return self.atomic_model.get_sel_type()
+
+        @torch.jit.export
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+            return self.atomic_model.is_aparam_nall()
+
+        @torch.jit.export
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            return self.atomic_model.get_rcut()
+
+        @torch.jit.export
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+            return self.atomic_model.get_type_map()
+
+        @torch.jit.export
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nsel()
+
+        @torch.jit.export
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nnei()
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model."""
+            return self.atomic_model.atomic_output_def()
+
+        def compute_or_load_stat(
+            self,
+            sampled_func,
+            stat_file_path: Optional[DPPath] = None,
+        ):
+            """Compute or load the statistics."""
+            return self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
+
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected atoms for each type."""
+            return self.atomic_model.get_sel()
+
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            return self.atomic_model.mixed_types()
+
     return CM
diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
index e32d2f307d..a62050b2d1 100644
--- a/deepmd/pt/model/model/model.py
+++ b/deepmd/pt/model/model/model.py
@@ -3,6 +3,8 @@
     Optional,
 )
 
+import torch
+
 from deepmd.dpmodel.model.base_model import (
     make_base_model,
 )
@@ -11,61 +13,15 @@
 )
 
 
-# trick: torch.nn.Module should not be inherbited here, otherwise,
-# the abstract method will override the method from the atomic model
-# as Python resolves method lookups using the C3 linearisation.
-# See https://stackoverflow.com/a/47117600/9567349
-# Take an example, this is the situation for only inheriting make_model():
-#       torch.nn.Module        BaseAtomicModel        make_model()
-#             |                       |                    |
-#             -------------------------                    |
-#                         |                                |
-#                    DPAtomicModel                      BaseModel
-#                         |                                |
-#                make_model(DPAtomicModel)                 |
-#                         |                                |
-#                         ----------------------------------
-#                                           |
-#                                         DPModel
-#
-# The order is: DPModel -> make_model(DPAtomicModel) -> DPAtomicModel ->
-# torch.nn.Module -> BaseAtomicModel -> BaseModel -> make_model()
-#
-# However, if BaseModel also inherbits from torch.nn.Module:
-#         torch.nn.Module                      make_model()
-#                |                                   |
-#                |---------------------------        |
-#                |                          |        |
-#                |      BaseAtomicModel     |        |
-#                |            |             |        |
-#                |-------------             ----------
-#                |                              |
-#           DPAtomicModel                   BaseModel
-#                |                              |
-#                |                              |
-#       make_model(DPAtomicModel)               |
-#                |                              |
-#                |                              |
-#                --------------------------------
-#                         |
-#                         |
-#                      DPModel
-#
-# The order is DPModel -> make_model(DPAtomicModel) -> DPAtomicModel ->
-# BaseModel -> torch.nn.Module -> BaseAtomicModel -> make_model()
-# BaseModel has higher proirity than BaseAtomicModel, which is not what
-# we want.
-# Alternatively, we can also make BaseAtomicModel in front of torch.nn.Module
-# in DPAtomicModel (and other classes), but this requires the developer aware
-# of it when developing it...
-class BaseModel(make_base_model()):
+class BaseModel(torch.nn.Module, make_base_model()):
     def __init__(self, *args, **kwargs):
         """Construct a basic model for different tasks."""
-        super().__init__(*args, **kwargs)
+        torch.nn.Module.__init__(self)
+        self.model_def_script = ""
 
     def compute_or_load_stat(
         self,
-        sampled,
+        sampled_func,
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -78,9 +34,14 @@ def compute_or_load_stat(
 
         Parameters
         ----------
-        sampled
+        sampled_func
             The sampled data frames from different data systems.
         stat_file_path
             The path to the statistics files.
         """
         raise NotImplementedError
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index bf430c6706..403058aa47 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -38,7 +38,7 @@ def forward(
             aparam=aparam,
             do_atomic_virial=do_atomic_virial,
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["polar"] = model_ret["polar"]
             model_predict["global_polar"] = model_ret["polar_redu"]
@@ -69,7 +69,7 @@ def forward_lower(
             aparam=aparam,
             do_atomic_virial=do_atomic_virial,
         )
-        if self.fitting_net is not None:
+        if self.get_fitting_net() is not None:
             model_predict = {}
             model_predict["polar"] = model_ret["polar"]
             model_predict["global_polar"] = model_ret["polar_redu"]
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 21372888d6..ca445c8588 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 import logging
 from typing import (
     Callable,
@@ -25,6 +26,9 @@
 from deepmd.utils.path import (
     DPPath,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 log = logging.getLogger(__name__)
 
@@ -123,6 +127,12 @@ def serialize(self) -> dict:
         data["c_differentiable"] = self.c_differentiable
         return data
 
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
     def output_def(self) -> FittingOutputDef:
         return FittingOutputDef(
             [
@@ -182,3 +192,6 @@ def forward(
         # (nframes, nloc, 3)
         out = torch.bmm(out, gr).squeeze(-2).view(nframes, nloc, 3)
         return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py
new file mode 100644
index 0000000000..c37b05277a
--- /dev/null
+++ b/deepmd/pt/model/task/dos.py
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("dos")
+class DOSFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        numb_dos: int = 300,
+        neuron: List[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        bias_dos: Optional[torch.Tensor] = None,
+        trainable: Union[bool, List[bool]] = True,
+        seed: Optional[int] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        exclude_types: List[int] = [],
+        mixed_types: bool = True,
+    ):
+        if bias_dos is not None:
+            self.bias_dos = bias_dos
+        else:
+            self.bias_dos = torch.zeros(
+                (ntypes, numb_dos), dtype=dtype, device=env.DEVICE
+            )
+        super().__init__(
+            var_name="dos",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=numb_dos,
+            neuron=neuron,
+            bias_atom_e=bias_dos,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            trainable=trainable,
+        )
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DOSFittingNet":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("var_name", None)
+        data.pop("tot_ener_zero", None)
+        data.pop("layer_name", None)
+        data.pop("use_aparam_as_mask", None)
+        data.pop("spin", None)
+        data.pop("atom_ener", None)
+        data["numb_dos"] = data.pop("dim_out")
+        obj = super().deserialize(data)
+
+        return obj
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        # dd = super(InvarFitting, self).serialize()
+        dd = {
+            **InvarFitting.serialize(self),
+            "type": "dos",
+            "dim_out": self.dim_out,
+        }
+        dd["@variables"]["bias_atom_e"] = to_numpy_array(self.bias_atom_e)
+
+        return dd
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index b593ddc3cc..12c0917dd2 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -2,11 +2,9 @@
 import copy
 import logging
 from typing import (
-    Callable,
     List,
     Optional,
     Tuple,
-    Union,
 )
 
 import numpy as np
@@ -24,17 +22,17 @@
     Fitting,
     GeneralFitting,
 )
+from deepmd.pt.model.task.invar_fitting import (
+    InvarFitting,
+)
 from deepmd.pt.utils import (
     env,
 )
 from deepmd.pt.utils.env import (
     DEFAULT_PRECISION,
 )
-from deepmd.pt.utils.stat import (
-    compute_output_stats,
-)
-from deepmd.utils.path import (
-    DPPath,
+from deepmd.utils.version import (
+    check_version_compatibility,
 )
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
@@ -43,169 +41,6 @@
 log = logging.getLogger(__name__)
 
 
-@GeneralFitting.register("invar")
-@fitting_check_output
-class InvarFitting(GeneralFitting):
-    """Construct a fitting net for energy.
-
-    Parameters
-    ----------
-    var_name : str
-        The atomic property to fit, 'energy', 'dipole', and 'polar'.
-    ntypes : int
-        Element count.
-    dim_descrpt : int
-        Embedding width per atom.
-    dim_out : int
-        The output dimension of the fitting net.
-    neuron : List[int]
-        Number of neurons in each hidden layers of the fitting net.
-    bias_atom_e : torch.Tensor, optional
-        Average enery per atom for each element.
-    resnet_dt : bool
-        Using time-step in the ResNet construction.
-    numb_fparam : int
-        Number of frame parameters.
-    numb_aparam : int
-        Number of atomic parameters.
-    activation_function : str
-        Activation function.
-    precision : str
-        Numerical precision.
-    mixed_types : bool
-        If true, use a uniform fitting net for all atom types, otherwise use
-        different fitting nets for different atom types.
-    rcond : float, optional
-        The condition number for the regression of atomic energy.
-    seed : int, optional
-        Random seed.
-    exclude_types: List[int]
-        Atomic contributions of the excluded atom types are set zero.
-    atom_ener: List[float], optional
-        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
-
-    """
-
-    def __init__(
-        self,
-        var_name: str,
-        ntypes: int,
-        dim_descrpt: int,
-        dim_out: int,
-        neuron: List[int] = [128, 128, 128],
-        bias_atom_e: Optional[torch.Tensor] = None,
-        resnet_dt: bool = True,
-        numb_fparam: int = 0,
-        numb_aparam: int = 0,
-        activation_function: str = "tanh",
-        precision: str = DEFAULT_PRECISION,
-        mixed_types: bool = True,
-        rcond: Optional[float] = None,
-        seed: Optional[int] = None,
-        exclude_types: List[int] = [],
-        atom_ener: Optional[List[float]] = None,
-        **kwargs,
-    ):
-        self.dim_out = dim_out
-        self.atom_ener = atom_ener
-        super().__init__(
-            var_name=var_name,
-            ntypes=ntypes,
-            dim_descrpt=dim_descrpt,
-            neuron=neuron,
-            bias_atom_e=bias_atom_e,
-            resnet_dt=resnet_dt,
-            numb_fparam=numb_fparam,
-            numb_aparam=numb_aparam,
-            activation_function=activation_function,
-            precision=precision,
-            mixed_types=mixed_types,
-            rcond=rcond,
-            seed=seed,
-            exclude_types=exclude_types,
-            remove_vaccum_contribution=None
-            if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
-            else [x is not None for x in atom_ener],
-            **kwargs,
-        )
-
-    def _net_out_dim(self):
-        """Set the FittingNet output dim."""
-        return self.dim_out
-
-    def serialize(self) -> dict:
-        data = super().serialize()
-        data["type"] = "invar"
-        data["dim_out"] = self.dim_out
-        data["atom_ener"] = self.atom_ener
-        return data
-
-    def compute_output_stats(
-        self,
-        merged: Union[Callable[[], List[dict]], List[dict]],
-        stat_file_path: Optional[DPPath] = None,
-    ):
-        """
-        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
-
-        Parameters
-        ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
-                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
-                originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
-                only when needed. Since the sampling process can be slow and memory-intensive,
-                the lazy function helps by only sampling once.
-        stat_file_path : Optional[DPPath]
-            The path to the stat file.
-
-        """
-        bias_atom_e = compute_output_stats(
-            merged, self.ntypes, stat_file_path, self.rcond, self.atom_ener
-        )
-        self.bias_atom_e.copy_(
-            torch.tensor(bias_atom_e, device=env.DEVICE).view(
-                [self.ntypes, self.dim_out]
-            )
-        )
-
-    def output_def(self) -> FittingOutputDef:
-        return FittingOutputDef(
-            [
-                OutputVariableDef(
-                    self.var_name,
-                    [self.dim_out],
-                    reduciable=True,
-                    r_differentiable=True,
-                    c_differentiable=True,
-                ),
-            ]
-        )
-
-    def forward(
-        self,
-        descriptor: torch.Tensor,
-        atype: torch.Tensor,
-        gr: Optional[torch.Tensor] = None,
-        g2: Optional[torch.Tensor] = None,
-        h2: Optional[torch.Tensor] = None,
-        fparam: Optional[torch.Tensor] = None,
-        aparam: Optional[torch.Tensor] = None,
-    ):
-        """Based on embedding net output, alculate total energy.
-
-        Args:
-        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
-        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
-
-        Returns
-        -------
-        - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
-        """
-        return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
-
-
 @Fitting.register("ener")
 class EnergyFittingNet(InvarFitting):
     def __init__(
@@ -241,6 +76,7 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
         data.pop("var_name")
         data.pop("dim_out")
         return super().deserialize(data)
@@ -252,6 +88,9 @@ def serialize(self) -> dict:
             "type": "ener",
         }
 
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
+
 
 @Fitting.register("direct_force")
 @Fitting.register("direct_force_ener")
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 09f8563bfb..c8edee5b94 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -49,9 +49,6 @@
 from deepmd.utils.finetune import (
     change_energy_bias_lower,
 )
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 device = env.DEVICE
@@ -121,8 +118,8 @@ def change_energy_bias(
             The number of test samples in a system to change the energy bias.
         """
         log.info(
-            "Changing energy bias in pretrained model for types {}... "
-            "(this step may take long time)".format(str(new_type_map))
+            f"Changing energy bias in pretrained model for types {new_type_map!s}... "
+            "(this step may take long time)"
         )
         # data
         systems = config["training"]["training_data"]["systems"]
@@ -371,7 +368,6 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
         variables = data.pop("@variables")
         nets = data.pop("nets")
         obj = cls(**data)
@@ -461,7 +457,8 @@ def _forward_common(
     ):
         xx = descriptor
         if self.remove_vaccum_contribution is not None:
-            # TODO: Idealy, the input for vaccum should be computed;
+            # TODO: compute the input for vaccm when remove_vaccum_contribution is set
+            # Idealy, the input for vaccum should be computed;
             # we consider it as always zero for convenience.
             # Needs a compute_input_stats for vaccum passed from the
             # descriptor.
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
new file mode 100644
index 0000000000..afb1d73658
--- /dev/null
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pt.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("invar")
+@fitting_check_output
+class InvarFitting(GeneralFitting):
+    """Construct a fitting net for energy.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'energy', 'dipole', and 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    dim_out : int
+        The output dimension of the fitting net.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_e : torch.Tensor, optional
+        Average enery per atom for each element.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    exclude_types: List[int]
+        Atomic contributions of the excluded atom types are set zero.
+    atom_ener: List[float], optional
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[int] = None,
+        exclude_types: List[int] = [],
+        atom_ener: Optional[List[float]] = None,
+        **kwargs,
+    ):
+        self.dim_out = dim_out
+        self.atom_ener = atom_ener
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            remove_vaccum_contribution=None
+            if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
+            else [x is not None for x in atom_ener],
+            **kwargs,
+        )
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.dim_out
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "invar"
+        data["dim_out"] = self.dim_out
+        data["atom_ener"] = self.atom_ener
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        bias_atom_e = compute_output_stats(
+            merged, self.ntypes, stat_file_path, self.rcond, self.atom_ener
+        )
+        self.bias_atom_e.copy_(bias_atom_e.view([self.ntypes, self.dim_out]))
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index fa4f6d7f37..d7428c4d53 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 import logging
 from typing import (
     Callable,
@@ -7,6 +8,7 @@
     Union,
 )
 
+import numpy as np
 import torch
 
 from deepmd.dpmodel import (
@@ -25,9 +27,16 @@
 from deepmd.pt.utils.utils import (
     to_numpy_array,
 )
+from deepmd.utils.out_stat import (
+    compute_stats_from_atomic,
+    compute_stats_from_redu,
+)
 from deepmd.utils.path import (
     DPPath,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 log = logging.getLogger(__name__)
 
@@ -114,6 +123,9 @@ def __init__(
             self.scale, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
         ).view(ntypes, 1)
         self.shift_diag = shift_diag
+        self.constant_matrix = torch.zeros(
+            ntypes, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
         super().__init__(
             var_name=kwargs.pop("var_name", "polar"),
             ntypes=ntypes,
@@ -140,16 +152,36 @@ def _net_out_dim(self):
             else self.embedding_width * self.embedding_width
         )
 
+    def __setitem__(self, key, value):
+        if key in ["constant_matrix"]:
+            self.constant_matrix = value
+        else:
+            super().__setitem__(key, value)
+
+    def __getitem__(self, key):
+        if key in ["constant_matrix"]:
+            return self.constant_matrix
+        else:
+            return super().__getitem__(key)
+
     def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "polar"
+        data["@version"] = 2
         data["embedding_width"] = self.embedding_width
         data["old_impl"] = self.old_impl
         data["fit_diag"] = self.fit_diag
-        data["fit_diag"] = self.fit_diag
+        data["shift_diag"] = self.shift_diag
         data["@variables"]["scale"] = to_numpy_array(self.scale)
+        data["@variables"]["constant_matrix"] = to_numpy_array(self.constant_matrix)
         return data
 
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        return super().deserialize(data)
+
     def output_def(self) -> FittingOutputDef:
         return FittingOutputDef(
             [
@@ -167,7 +199,7 @@ def compute_output_stats(
         self,
         merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
-    ):
+    ) -> None:
         """
         Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
 
@@ -184,7 +216,60 @@ def compute_output_stats(
             The path to the stat file.
 
         """
-        pass
+        if self.shift_diag:
+            if stat_file_path is not None:
+                stat_file_path = stat_file_path / "constant_matrix"
+            if stat_file_path is not None and stat_file_path.is_file():
+                constant_matrix = stat_file_path.load_numpy()
+            else:
+                if callable(merged):
+                    # only get data for once
+                    sampled = merged()
+                else:
+                    sampled = merged
+
+                sys_constant_matrix = []
+                for sys in range(len(sampled)):
+                    nframs = sampled[sys]["type"].shape[0]
+
+                    if sampled[sys]["find_atomic_polarizability"] > 0.0:
+                        sys_atom_polar = compute_stats_from_atomic(
+                            sampled[sys]["atomic_polarizability"].numpy(force=True),
+                            sampled[sys]["type"].numpy(force=True),
+                        )[0]
+                    else:
+                        if not sampled[sys]["find_polarizability"] > 0.0:
+                            continue
+                        sys_type_count = np.zeros(
+                            (nframs, self.ntypes), dtype=env.GLOBAL_NP_FLOAT_PRECISION
+                        )
+                        for itype in range(self.ntypes):
+                            type_mask = sampled[sys]["type"] == itype
+                            sys_type_count[:, itype] = type_mask.sum(dim=1).numpy(
+                                force=True
+                            )
+
+                        sys_bias_redu = sampled[sys]["polarizability"].numpy(force=True)
+
+                        sys_atom_polar = compute_stats_from_redu(
+                            sys_bias_redu, sys_type_count, rcond=self.rcond
+                        )[0]
+                    cur_constant_matrix = np.zeros(
+                        self.ntypes, dtype=env.GLOBAL_NP_FLOAT_PRECISION
+                    )
+
+                    for itype in range(self.ntypes):
+                        cur_constant_matrix[itype] = np.mean(
+                            np.diagonal(sys_atom_polar[itype].reshape(3, 3))
+                        )
+                    sys_constant_matrix.append(cur_constant_matrix)
+                constant_matrix = np.stack(sys_constant_matrix).mean(axis=0)
+
+                # handle nan values.
+                constant_matrix = np.nan_to_num(constant_matrix)
+            if stat_file_path is not None:
+                stat_file_path.save_numpy(constant_matrix)
+            self.constant_matrix = torch.tensor(constant_matrix, device=env.DEVICE)
 
     def forward(
         self,
@@ -218,5 +303,19 @@ def forward(
             "bim,bmj->bij", gr.transpose(1, 2), out
         )  # (nframes * nloc, 3, 3)
         out = out.view(nframes, nloc, 3, 3)
+        if self.shift_diag:
+            bias = self.constant_matrix[atype]
+
+            # (nframes, nloc, 1)
+            bias = bias.unsqueeze(-1) * self.scale[atype]
+
+            eye = torch.eye(3, device=env.DEVICE)
+            eye = eye.repeat(nframes, nloc, 1, 1)
+            # (nframes, nloc, 3, 3)
+            bias = bias.unsqueeze(-1) * eye
+            out = out + bias
 
         return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/optimizer/LKF.py b/deepmd/pt/optimizer/LKF.py
index ebc9242d49..06b341d987 100644
--- a/deepmd/pt/optimizer/LKF.py
+++ b/deepmd/pt/optimizer/LKF.py
@@ -3,11 +3,25 @@
 import math
 
 import torch
+import torch.distributed as dist
 from torch.optim.optimizer import (
     Optimizer,
 )
 
-log = logging.getLogger(__name__)
+
+def distribute_indices(total_length, num_workers):
+    indices_per_worker = total_length // num_workers
+    remainder = total_length % num_workers
+
+    indices = []
+    start = 0
+
+    for i in range(num_workers):
+        end = start + indices_per_worker + (1 if i < remainder else 0)
+        indices.append((start, end))
+        start = end
+
+    return indices, remainder
 
 
 class LKFOptimizer(Optimizer):
@@ -18,11 +32,8 @@ def __init__(
         kalman_nue=0.9987,
         block_size=5120,
     ):
-        defaults = {
-            "lr": 0.1,
-            "kalman_nue": kalman_nue,
-            "block_size": block_size,
-        }
+        defaults = {"lr": 0.1, "kalman_nue": kalman_nue, "block_size": block_size}
+
         super().__init__(params, defaults)
 
         self._params = self.param_groups[0]["params"]
@@ -36,7 +47,10 @@ def __init__(
         # the first param, because this helps with casting in load_state_dict
         self._state = self.state[self._params[0]]
         self._state.setdefault("kalman_lambda", kalman_lambda)
-
+        self.dist_init = dist.is_initialized()
+        self.rank = dist.get_rank() if self.dist_init else 0
+        self.dindex = []
+        self.remainder = 0
         self.__init_P()
 
     def __init_P(self):
@@ -61,32 +75,84 @@ def __init_P(self):
 
         P = []
         params_packed_index = []
-        log.info("LKF parameter nums: %s" % param_nums)
-        for param_num in param_nums:
-            if param_num >= block_size:
-                block_num = math.ceil(param_num / block_size)
-                for i in range(block_num):
-                    if i != block_num - 1:
+        logging.info("LKF parameter nums: %s" % param_nums)
+        if self.dist_init:
+            block_num = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num += math.ceil(param_num / block_size)
+                else:
+                    block_num += 1
+            num_workers = dist.get_world_size()
+            self.dindex, self.remainder = distribute_indices(block_num, num_workers)
+            index = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        device_id = self.get_device_id(index)
+                        index += 1
+                        dist_device = torch.device("cuda:" + str(device_id))
+                        if i != block_num - 1:
+                            params_packed_index.append(block_size)
+                            if self.rank == device_id:
+                                P.append(
+                                    torch.eye(
+                                        block_size,
+                                        dtype=data_type,
+                                        device=dist_device,
+                                    )
+                                )
+                            else:
+                                continue
+                        else:
+                            params_packed_index.append(param_num - block_size * i)
+                            if self.rank == device_id:
+                                P.append(
+                                    torch.eye(
+                                        param_num - block_size * i,
+                                        dtype=data_type,
+                                        device=dist_device,
+                                    )
+                                )
+                            else:
+                                continue
+
+                else:
+                    device_id = self.get_device_id(index)
+                    index += 1
+                    params_packed_index.append(param_num)
+                    if self.rank == device_id:
+                        dist_device = torch.device("cuda:" + str(device_id))
                         P.append(
-                            torch.eye(
-                                block_size,
-                                dtype=data_type,
-                                device=device,
-                            )
+                            torch.eye(param_num, dtype=data_type, device=dist_device)
                         )
-                        params_packed_index.append(block_size)
-                    else:
-                        P.append(
-                            torch.eye(
-                                param_num - block_size * i,
-                                dtype=data_type,
-                                device=device,
+        else:
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        if i != block_num - 1:
+                            P.append(
+                                torch.eye(
+                                    block_size,
+                                    dtype=data_type,
+                                    device=device,
+                                )
                             )
-                        )
-                        params_packed_index.append(param_num - block_size * i)
-            else:
-                P.append(torch.eye(param_num, dtype=data_type, device=device))
-                params_packed_index.append(param_num)
+                            params_packed_index.append(block_size)
+                        else:
+                            P.append(
+                                torch.eye(
+                                    param_num - block_size * i,
+                                    dtype=data_type,
+                                    device=device,
+                                )
+                            )
+                            params_packed_index.append(param_num - block_size * i)
+                else:
+                    P.append(torch.eye(param_num, dtype=data_type, device=device))
+                    params_packed_index.append(param_num)
 
         self._state.setdefault("P", P)
         self._state.setdefault("weights_num", len(P))
@@ -125,16 +191,35 @@ def __update(self, H, error, weights):
         tmp = 0
         for i in range(weights_num):
             tmp = tmp + (kalman_lambda + torch.matmul(torch.matmul(H[i].T, P[i]), H[i]))
-
+        if self.dist_init:
+            dist.all_reduce(tmp, op=dist.ReduceOp.SUM)
         A = 1 / tmp
-
         for i in range(weights_num):
             K = torch.matmul(P[i], H[i])
 
             weights[i] = weights[i] + A * error * K
 
             P[i] = (1 / kalman_lambda) * (P[i] - A * torch.matmul(K, K.T))
-
+        if self.dist_init:
+            device = torch.device("cuda:" + str(self.rank))
+            local_shape = [tensor.shape[0] for tensor in weights]
+            shape_list = [
+                torch.zeros_like(torch.empty(1), dtype=torch.float64, device=device)
+                for _ in range(dist.get_world_size())
+            ]
+            dist.all_gather_object(shape_list, local_shape)
+            weight_tensor = torch.cat(weights)
+            world_shape = [sum(inner_list) for inner_list in shape_list]
+            weight_list = [None] * len(world_shape)
+            for i in range(len(world_shape)):
+                weight_list[i] = torch.zeros(
+                    world_shape[i], dtype=torch.float64, device=device
+                )
+            dist.all_gather(weight_list, weight_tensor)
+            result = []
+            for i in range(dist.get_world_size()):
+                result = result + list(torch.split(weight_list[i], shape_list[i]))
+            weights = result
         kalman_lambda = kalman_nue * kalman_lambda + 1 - kalman_nue
         self._state.update({"kalman_lambda": kalman_lambda})
 
@@ -215,9 +300,21 @@ def step(self, error):
                 param_sum += nelement
 
                 if param_sum == params_packed_index[param_index]:
-                    H.append(res_grad)
-                    weights.append(res)
                     param_sum = 0
+                    if self.dist_init:
+                        device_id = self.get_device_id(param_index)
+                        if self.rank == device_id:
+                            weights.append(res)
+                            H.append(res_grad)
+                    else:
+                        weights.append(res)
+                        H.append(res_grad)
                     param_index += 1
 
         self.__update(H, error, weights)
+
+    def get_device_id(self, index):
+        for i, (start, end) in enumerate(self.dindex):
+            if start <= index < end:
+                return i
+        return None
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index fb28f0c4f2..9fd675a8f2 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -274,6 +274,9 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
             if loss_type == "ener":
                 loss_params["starter_learning_rate"] = start_lr
                 return EnergyStdLoss(**loss_params)
+            elif loss_type == "dos":
+                loss_params["starter_learning_rate"] = start_lr
+                raise NotImplementedError()
             elif loss_type == "ener_spin":
                 loss_params["starter_learning_rate"] = start_lr
                 return EnergySpinLoss(**loss_params)
@@ -520,8 +523,11 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
                         model_params["type_map"],
                         model_params["new_type_map"],
                     )
-                    if hasattr(self.model, "fitting_net"):
-                        self.model.fitting_net.change_energy_bias(
+                    # TODO: need an interface instead of fetching fitting_net!!!!!!!!!
+                    if hasattr(self.model, "atomic_model") and hasattr(
+                        self.model.atomic_model, "fitting_net"
+                    ):
+                        self.model.atomic_model.fitting_net.change_energy_bias(
                             config,
                             self.model,
                             old_type_map,
@@ -531,7 +537,7 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
                         )
                     elif isinstance(self.model, DPZBLModel):
                         # need to updated
-                        self.model.change_energy_bias()
+                        self.model.atomic_model.change_energy_bias()
                     else:
                         raise NotImplementedError
         if init_frz_model is not None:
@@ -552,14 +558,16 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
                 output_device=LOCAL_RANK,
             )
 
-        # TODO ZD add lr warmups for multitask
+        # TODO add lr warmups for multitask
+        # author: iProzd
         def warm_up_linear(step, warmup_steps):
             if step < warmup_steps:
                 return step / warmup_steps
             else:
                 return self.lr_exp.value(step - warmup_steps) / self.lr_exp.start_lr
 
-        # TODO ZD add optimizers for multitask
+        # TODO add optimizers for multitask
+        # author: iProzd
         if self.opt_type == "Adam":
             self.optimizer = torch.optim.Adam(
                 self.wrapper.parameters(), lr=self.lr_exp.start_lr
@@ -688,8 +696,13 @@ def step(_step_id, task_key="Default"):
                     module = (
                         self.wrapper.module if dist.is_initialized() else self.wrapper
                     )
-                    loss, more_loss = module.loss[task_key](
-                        model_pred,
+
+                    def fake_model():
+                        return model_pred
+
+                    _, loss, more_loss = module.loss[task_key](
+                        {},
+                        fake_model,
                         label_dict,
                         int(input_dict["atype"].shape[-1]),
                         learning_rate=pref_lr,
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index c1040fb9e3..6bc7cdc87a 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -75,12 +75,12 @@ def share_params(self, shared_links, resume=False):
             shared_level_base = shared_base["shared_level"]
             if "descriptor" in class_type_base:
                 if class_type_base == "descriptor":
-                    base_class = self.model[model_key_base].__getattr__("descriptor")
+                    base_class = self.model[model_key_base].get_descriptor()
                 elif "hybrid" in class_type_base:
                     hybrid_index = int(class_type_base.split("_")[-1])
                     base_class = (
                         self.model[model_key_base]
-                        .__getattr__("descriptor")
+                        .get_descriptor()
                         .descriptor_list[hybrid_index]
                     )
                 else:
@@ -96,14 +96,12 @@ def share_params(self, shared_links, resume=False):
                         "descriptor" in class_type_link
                     ), f"Class type mismatched: {class_type_base} vs {class_type_link}!"
                     if class_type_link == "descriptor":
-                        link_class = self.model[model_key_link].__getattr__(
-                            "descriptor"
-                        )
+                        link_class = self.model[model_key_link].get_descriptor()
                     elif "hybrid" in class_type_link:
                         hybrid_index = int(class_type_link.split("_")[-1])
                         link_class = (
                             self.model[model_key_link]
-                            .__getattr__("descriptor")
+                            .get_descriptor()
                             .descriptor_list[hybrid_index]
                         )
                     else:
@@ -170,15 +168,20 @@ def forward(
             has_spin = has_spin()
         if has_spin:
             input_dict["spin"] = spin
-        model_pred = self.model[task_key](**input_dict)
-        natoms = atype.shape[-1]
-        if not self.inference_only and not inference_only:
-            loss, more_loss = self.loss[task_key](
-                model_pred, label, natoms=natoms, learning_rate=cur_lr
+
+        if self.inference_only or inference_only:
+            model_pred = self.model[task_key](**input_dict)
+            return model_pred, None, None
+        else:
+            natoms = atype.shape[-1]
+            model_pred, loss, more_loss = self.loss[task_key](
+                input_dict,
+                self.model[task_key],
+                label,
+                natoms=natoms,
+                learning_rate=cur_lr,
             )
             return model_pred, loss, more_loss
-        else:
-            return model_pred, None, None
 
     def set_extra_state(self, state: Dict):
         self.model_params = state["model_params"]
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
index eca3c6ad87..94c657abd4 100644
--- a/deepmd/pt/utils/learning_rate.py
+++ b/deepmd/pt/utils/learning_rate.py
@@ -3,14 +3,35 @@
 
 
 class LearningRateExp:
-    def __init__(self, start_lr, stop_lr, decay_steps, stop_steps, **kwargs):
-        """Construct an exponential-decayed learning rate.
+    def __init__(
+        self,
+        start_lr,
+        stop_lr,
+        decay_steps,
+        stop_steps,
+        decay_rate=None,
+        **kwargs,
+    ):
+        """
+        Construct an exponential-decayed learning rate.
 
-        Args:
-        - start_lr: Initial learning rate.
-        - stop_lr: Learning rate at the last step.
-        - decay_steps: Decay learning rate every N steps.
-        - stop_steps: When is the last step.
+        Parameters
+        ----------
+        start_lr
+            The learning rate at the start of the training.
+        stop_lr
+            The desired learning rate at the end of the training.
+            When decay_rate is explicitly set, this value will serve as
+            the minimum learning rate during training. In other words,
+            if the learning rate decays below stop_lr, stop_lr will be applied instead.
+        decay_steps
+            The learning rate is decaying every this number of training steps.
+        stop_steps
+            The total training steps for learning rate scheduler.
+        decay_rate
+            The decay rate for the learning rate.
+            If provided, the decay rate will be set instead of
+            calculating it through interpolation between start_lr and stop_lr.
         """
         self.start_lr = start_lr
         default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
@@ -20,12 +41,9 @@ def __init__(self, start_lr, stop_lr, decay_steps, stop_steps, **kwargs):
         self.decay_rate = np.exp(
             np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps)
         )
-        if "decay_rate" in kwargs:
-            self.decay_rate = kwargs["decay_rate"]
-        if "min_lr" in kwargs:
-            self.min_lr = kwargs["min_lr"]
-        else:
-            self.min_lr = 3e-10
+        if decay_rate is not None:
+            self.decay_rate = decay_rate
+        self.min_lr = stop_lr
 
     def value(self, step):
         """Get the learning rate at the given step."""
diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
index d37931b65a..cdee6e3722 100644
--- a/deepmd/pt/utils/nlist.py
+++ b/deepmd/pt/utils/nlist.py
@@ -51,7 +51,7 @@ def extend_input_and_build_neighbor_list(
 
 
 def build_neighbor_list(
-    coord1: torch.Tensor,
+    coord: torch.Tensor,
     atype: torch.Tensor,
     nloc: int,
     rcut: float,
@@ -62,10 +62,11 @@ def build_neighbor_list(
 
     Parameters
     ----------
-    coord1 : torch.Tensor
+    coord : torch.Tensor
         exptended coordinates of shape [batch_size, nall x 3]
     atype : torch.Tensor
         extended atomic types of shape [batch_size, nall]
+        if type < 0 the atom is treat as virtual atoms.
     nloc : int
         number of local atoms.
     rcut : float
@@ -90,11 +91,20 @@ def build_neighbor_list(
         if distinguish_types==True and we have two types
         |---- nsel[0] -----| |---- nsel[1] -----|
         xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
 
     """
-    batch_size = coord1.shape[0]
-    coord1 = coord1.view(batch_size, -1)
-    nall = coord1.shape[1] // 3
+    batch_size = coord.shape[0]
+    coord = coord.view(batch_size, -1)
+    nall = coord.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    xmax = torch.max(coord) + 2.0 * rcut
+    # nf x nall
+    is_vir = atype < 0
+    coord1 = torch.where(is_vir[:, :, None], xmax, coord.view(-1, nall, 3)).view(
+        -1, nall * 3
+    )
     if isinstance(sel, int):
         sel = [sel]
     nsel = sum(sel)
@@ -133,7 +143,9 @@ def build_neighbor_list(
             dim=-1,
         )
     assert list(nlist.shape) == [batch_size, nloc, nsel]
-    nlist = nlist.masked_fill((rr > rcut), -1)
+    nlist = torch.where(
+        torch.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist
+    )
 
     if distinguish_types:
         return nlist_distinguish_types(nlist, atype, sel)
@@ -256,7 +268,7 @@ def build_multiple_neighbor_list(
     nlist0 = nlist
     ret = {}
     for rc, ns in zip(rcuts[::-1], nsels[::-1]):
-        nlist0 = nlist0[:, :, :ns].masked_fill(rr[:, :, :ns] > rc, int(-1))
+        nlist0 = nlist0[:, :, :ns].masked_fill(rr[:, :, :ns] > rc, -1)
         ret[get_multiple_nlist_key(rc, ns)] = nlist0
     return ret
 
diff --git a/deepmd/pt/utils/plugin.py b/deepmd/pt/utils/plugin.py
index c24f36f574..aa901c06e8 100644
--- a/deepmd/pt/utils/plugin.py
+++ b/deepmd/pt/utils/plugin.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Base of plugin systems."""
+
 from deepmd.utils.plugin import (
     Plugin,
     PluginVariant,
diff --git a/deepmd/pt/utils/update_sel.py b/deepmd/pt/utils/update_sel.py
index 2d077acac1..8c2d0699f2 100644
--- a/deepmd/pt/utils/update_sel.py
+++ b/deepmd/pt/utils/update_sel.py
@@ -17,5 +17,5 @@ def neighbor_stat(self) -> Type[NeighborStat]:
         return NeighborStat
 
     def hook(self, min_nbor_dist, max_nbor_size):
-        # TODO: save to the model
+        # TODO: save to the model in UpdateSel.hook
         pass
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
index 10dcadadac..3337036ca9 100644
--- a/deepmd/pt/utils/utils.py
+++ b/deepmd/pt/utils/utils.py
@@ -65,13 +65,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 @overload
-def to_numpy_array(xx: torch.Tensor) -> np.ndarray:
-    ...
+def to_numpy_array(xx: torch.Tensor) -> np.ndarray: ...
 
 
 @overload
-def to_numpy_array(xx: None) -> None:
-    ...
+def to_numpy_array(xx: None) -> None: ...
 
 
 def to_numpy_array(
@@ -91,13 +89,11 @@ def to_numpy_array(
 
 
 @overload
-def to_torch_tensor(xx: np.ndarray) -> torch.Tensor:
-    ...
+def to_torch_tensor(xx: np.ndarray) -> torch.Tensor: ...
 
 
 @overload
-def to_torch_tensor(xx: None) -> None:
-    ...
+def to_torch_tensor(xx: None) -> None: ...
 
 
 def to_torch_tensor(
diff --git a/deepmd/tf/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py
index dbf260bfe8..82b09c95fb 100644
--- a/deepmd/tf/descriptor/descriptor.py
+++ b/deepmd/tf/descriptor/descriptor.py
@@ -102,9 +102,6 @@ def get_dim_rot_mat_1(self) -> int:
         int
             the first dimension of the rotation matrix
         """
-        # TODO: I think this method should be implemented as it's called by dipole and
-        # polar fitting network. However, currently not all descriptors have this
-        # method.
         raise NotImplementedError
 
     def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
@@ -121,8 +118,6 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         sel_r : list[int]
             The number of neighbors with only radial information
         """
-        # TODO: I think this method should be implemented as it's called by energy
-        # model. However, se_ar and hybrid doesn't have this method.
         raise NotImplementedError
 
     @abstractmethod
diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py
index 4635554610..8b6ae3539b 100644
--- a/deepmd/tf/descriptor/se_a.py
+++ b/deepmd/tf/descriptor/se_a.py
@@ -1426,7 +1426,8 @@ def serialize(self, suffix: str = "") -> dict:
             raise NotImplementedError("spin is unsupported")
         assert self.davg is not None
         assert self.dstd is not None
-        # TODO: not sure how to handle type embedding - type embedding is not a model parameter,
+        # TODO: tf: handle type embedding in DescrptSeA.serialize
+        # not sure how to handle type embedding - type embedding is not a model parameter,
         # but instead a part of the input data. Maybe the interface should be refactored...
 
         return {
diff --git a/deepmd/tf/descriptor/se_a_mask.py b/deepmd/tf/descriptor/se_a_mask.py
index 55b34adf48..ace8a47bbc 100644
--- a/deepmd/tf/descriptor/se_a_mask.py
+++ b/deepmd/tf/descriptor/se_a_mask.py
@@ -249,10 +249,9 @@ def compute_input_stats(
         **kwargs
             Additional keyword arguments.
         """
-        """
-        TODO: Since not all input atoms are real in se_a_mask,
-        statistics should be reimplemented for se_a_mask descriptor.
-        """
+        # TODO: implement compute_input_stats for DescrptSeAMask
+        # Since not all input atoms are real in se_a_mask,
+        # statistics should be reimplemented for se_a_mask descriptor.
 
         self.davg = None
         self.dstd = None
diff --git a/deepmd/tf/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py
index 9f88ebe37d..8ef48c0de2 100644
--- a/deepmd/tf/descriptor/se_r.py
+++ b/deepmd/tf/descriptor/se_r.py
@@ -766,7 +766,8 @@ def serialize(self, suffix: str = "") -> dict:
             raise NotImplementedError("spin is unsupported")
         assert self.davg is not None
         assert self.dstd is not None
-        # TODO: not sure how to handle type embedding - type embedding is not a model parameter,
+        # TODO: tf: handle type embedding in DescrptSeR.serialize
+        # not sure how to handle type embedding - type embedding is not a model parameter,
         # but instead a part of the input data. Maybe the interface should be refactored...
         return {
             "@class": "Descriptor",
diff --git a/deepmd/tf/entrypoints/freeze.py b/deepmd/tf/entrypoints/freeze.py
index c7ab1023fa..3d2a609797 100755
--- a/deepmd/tf/entrypoints/freeze.py
+++ b/deepmd/tf/entrypoints/freeze.py
@@ -152,10 +152,8 @@ def _modify_model_suffix(output_graph_def, out_suffix, freeze_type):
             else:
                 jdata["training"]["training_data"] = {}
                 log.warning(
-                    "The fitting net {} has no training data in input script, resulting in "
-                    "untrained frozen model, and cannot be compressed directly! ".format(
-                        out_suffix
-                    )
+                    f"The fitting net {out_suffix} has no training data in input script, resulting in "
+                    "untrained frozen model, and cannot be compressed directly! "
                 )
             # loss
             if "loss_dict" in jdata:
diff --git a/deepmd/tf/entrypoints/ipi.py b/deepmd/tf/entrypoints/ipi.py
index 49f72434f3..1631a35c2e 100644
--- a/deepmd/tf/entrypoints/ipi.py
+++ b/deepmd/tf/entrypoints/ipi.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Use dp_ipi inside the Python package."""
+
 import os
 import subprocess
 import sys
diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
index 3127e01e97..8cc1cacad1 100644
--- a/deepmd/tf/env.py
+++ b/deepmd/tf/env.py
@@ -157,7 +157,8 @@ def dlopen_library(module: str, filename: str):
     r"(final)_layer_type_(\d+)/(matrix)|"
     r"(final)_layer/(bias)|"
     r"(final)_layer_type_(\d+)/(bias)|"
-    # TODO: not sure how to parse for shared layers...
+    # TODO: supporting extracting parameters for shared layers
+    # not sure how to parse for shared layers...
     # layer_name
     r"share_.+_type_\d/matrix|"
     r"share_.+_type_\d/bias|"
@@ -376,20 +377,14 @@ def get_module(module_name: str) -> "ModuleType":
             if TF_VERSION != tf_py_version:
                 raise RuntimeError(
                     "The version of TensorFlow used to compile this "
-                    "deepmd-kit package is {}, but the version of TensorFlow "
-                    "runtime you are using is {}. These two versions are "
-                    "incompatible and thus an error is raised when loading {}. "
-                    "You need to install TensorFlow {}, or rebuild deepmd-kit "
-                    "against TensorFlow {}.\nIf you are using a wheel from "
+                    f"deepmd-kit package is {TF_VERSION}, but the version of TensorFlow "
+                    f"runtime you are using is {tf_py_version}. These two versions are "
+                    f"incompatible and thus an error is raised when loading {module_name}. "
+                    f"You need to install TensorFlow {TF_VERSION}, or rebuild deepmd-kit "
+                    f"against TensorFlow {tf_py_version}.\nIf you are using a wheel from "
                     "pypi, you may consider to install deepmd-kit execuating "
                     "`pip install deepmd-kit --no-binary deepmd-kit` "
-                    "instead.".format(
-                        TF_VERSION,
-                        tf_py_version,
-                        module_name,
-                        TF_VERSION,
-                        tf_py_version,
-                    )
+                    "instead."
                 ) from e
             error_message = (
                 "This deepmd-kit package is inconsitent with TensorFlow "
diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py
index f503789308..978fd958fb 100644
--- a/deepmd/tf/fit/dipole.py
+++ b/deepmd/tf/fit/dipole.py
@@ -355,7 +355,7 @@ def serialize(self, suffix: str) -> dict:
             "dim_descrpt": self.dim_descrpt,
             "embedding_width": self.dim_rot_mat_1,
             # very bad design: type embedding is not passed to the class
-            # TODO: refactor the class
+            # TODO: refactor the class for type embedding and dipole fitting
             "mixed_types": False,
             "dim_out": 3,
             "neuron": self.n_neuron,
@@ -365,7 +365,7 @@ def serialize(self, suffix: str) -> dict:
             "exclude_types": [],
             "nets": self.serialize_network(
                 ntypes=self.ntypes,
-                # TODO: consider type embeddings
+                # TODO: consider type embeddings in dipole fitting
                 ndim=1,
                 in_dim=self.dim_descrpt,
                 out_dim=self.dim_rot_mat_1,
diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py
index 0cc5a7df62..292db8d5b4 100644
--- a/deepmd/tf/fit/dos.py
+++ b/deepmd/tf/fit/dos.py
@@ -46,6 +46,9 @@
 from deepmd.utils.out_stat import (
     compute_stats_from_redu,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 log = logging.getLogger(__name__)
 
@@ -57,8 +60,10 @@ class DOSFitting(Fitting):
 
     Parameters
     ----------
-    descrpt
-            The descrptor :math:`\mathcal{D}`
+    ntypes
+            The ntypes of the descrptor :math:`\mathcal{D}`
+    dim_descrpt
+            The dimension of the descrptor :math:`\mathcal{D}`
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     resnet_dt
@@ -94,7 +99,8 @@ class DOSFitting(Fitting):
 
     def __init__(
         self,
-        descrpt: tf.Tensor,
+        ntypes: int,
+        dim_descrpt: int,
         neuron: List[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -112,8 +118,8 @@ def __init__(
     ) -> None:
         """Constructor."""
         # model param
-        self.ntypes = descrpt.get_ntypes()
-        self.dim_descrpt = descrpt.get_dim_out()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
         self.use_aparam_as_mask = use_aparam_as_mask
 
         self.numb_fparam = numb_fparam
@@ -127,6 +133,7 @@ def __init__(
         self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = one_layer_rand_seed_shift()
+        self.activation_function = activation_function
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
         self.trainable = trainable
@@ -145,16 +152,16 @@ def __init__(
             add_data_requirement(
                 "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False
             )
-            self.fparam_avg = None
-            self.fparam_std = None
-            self.fparam_inv_std = None
+        self.fparam_avg = None
+        self.fparam_std = None
+        self.fparam_inv_std = None
         if self.numb_aparam > 0:
             add_data_requirement(
                 "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False
             )
-            self.aparam_avg = None
-            self.aparam_std = None
-            self.aparam_inv_std = None
+        self.aparam_avg = None
+        self.aparam_std = None
+        self.aparam_inv_std = None
 
         self.fitting_net_variables = None
         self.mixed_prec = None
@@ -521,7 +528,11 @@ def build(
 
                 final_layer = tf.reshape(
                     final_layer,
-                    [tf.shape(inputs)[0] * self.numb_dos, natoms[2 + type_i]],
+                    [
+                        tf.shape(inputs)[0],
+                        natoms[2 + type_i],
+                        self.numb_dos,
+                    ],
                 )
                 outs_list.append(final_layer)
                 start_index += natoms[2 + type_i]
@@ -550,7 +561,8 @@ def build(
             )
 
             outs = tf.reshape(
-                final_layer, [tf.shape(inputs)[0] * self.numb_dos, natoms[0]]
+                final_layer,
+                [tf.shape(inputs)[0], natoms[0], self.numb_dos],
             )
         # add bias
         # self.atom_ener_before = outs
@@ -562,7 +574,7 @@ def build(
         # self.atom_ener_after = outs
 
         tf.summary.histogram("fitting_net_output", outs)
-        return tf.reshape(outs, [-1])
+        return outs
 
     def init_variables(
         self,
@@ -641,3 +653,84 @@ def get_loss(self, loss: dict, lr) -> Loss:
         return DOSLoss(
             **loss, starter_learning_rate=lr.start_lr(), numb_dos=self.get_numb_dos()
         )
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data["numb_dos"] = data.pop("dim_out")
+        fitting = cls(**data)
+        fitting.fitting_net_variables = cls.deserialize_network(
+            data["nets"],
+            suffix=suffix,
+        )
+        fitting.bias_dos = data["@variables"]["bias_atom_e"]
+        if fitting.numb_fparam > 0:
+            fitting.fparam_avg = data["@variables"]["fparam_avg"]
+            fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
+        if fitting.numb_aparam > 0:
+            fitting.aparam_avg = data["@variables"]["aparam_avg"]
+            fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"]
+        return fitting
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        data = {
+            "@class": "Fitting",
+            "type": "dos",
+            "@version": 1,
+            "var_name": "dos",
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            # very bad design: type embedding is not passed to the class
+            # TODO: refactor the class for DOSFitting and type embedding
+            "mixed_types": False,
+            "dim_out": self.numb_dos,
+            "neuron": self.n_neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "rcond": self.rcond,
+            "trainable": self.trainable,
+            "activation_function": self.activation_function,
+            "precision": self.fitting_precision.name,
+            "exclude_types": [],
+            "nets": self.serialize_network(
+                ntypes=self.ntypes,
+                # TODO: consider type embeddings for DOSFitting
+                ndim=1,
+                in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
+                out_dim=self.numb_dos,
+                neuron=self.n_neuron,
+                activation_function=self.activation_function,
+                resnet_dt=self.resnet_dt,
+                variables=self.fitting_net_variables,
+                suffix=suffix,
+            ),
+            "@variables": {
+                "bias_atom_e": self.bias_dos,
+                "fparam_avg": self.fparam_avg,
+                "fparam_inv_std": self.fparam_inv_std,
+                "aparam_avg": self.aparam_avg,
+                "aparam_inv_std": self.aparam_inv_std,
+            },
+        }
+        return data
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
index 780ae76c96..b391b00052 100644
--- a/deepmd/tf/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -868,7 +868,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
             data["nets"],
             suffix=suffix,
         )
-        fitting.bias_atom_e = data["@variables"]["bias_atom_e"]
+        fitting.bias_atom_e = data["@variables"]["bias_atom_e"].ravel()
         if fitting.numb_fparam > 0:
             fitting.fparam_avg = data["@variables"]["fparam_avg"]
             fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
@@ -893,7 +893,7 @@ def serialize(self, suffix: str = "") -> dict:
             "ntypes": self.ntypes,
             "dim_descrpt": self.dim_descrpt,
             # very bad design: type embedding is not passed to the class
-            # TODO: refactor the class
+            # TODO: refactor the class for energy fitting and type embedding
             "mixed_types": False,
             "dim_out": 1,
             "neuron": self.n_neuron,
@@ -912,7 +912,7 @@ def serialize(self, suffix: str = "") -> dict:
             "exclude_types": [],
             "nets": self.serialize_network(
                 ntypes=self.ntypes,
-                # TODO: consider type embeddings
+                # TODO: consider type embeddings for type embedding
                 ndim=1,
                 in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
                 neuron=self.n_neuron,
@@ -922,7 +922,7 @@ def serialize(self, suffix: str = "") -> dict:
                 suffix=suffix,
             ),
             "@variables": {
-                "bias_atom_e": self.bias_atom_e,
+                "bias_atom_e": self.bias_atom_e.reshape(-1, 1),
                 "fparam_avg": self.fparam_avg,
                 "fparam_inv_std": self.fparam_inv_std,
                 "aparam_avg": self.aparam_avg,
diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py
index 6a7398daac..0f73230bc8 100644
--- a/deepmd/tf/fit/fitting.py
+++ b/deepmd/tf/fit/fitting.py
@@ -246,9 +246,9 @@ def deserialize_network(cls, data: dict, suffix: str = "") -> dict:
                 fitting_net_variables[f"{layer_name}{key}{suffix}/matrix"] = layer.w
                 fitting_net_variables[f"{layer_name}{key}{suffix}/bias"] = layer.b
                 if layer.idt is not None:
-                    fitting_net_variables[
-                        f"{layer_name}{key}{suffix}/idt"
-                    ] = layer.idt.reshape(1, -1)
+                    fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = (
+                        layer.idt.reshape(1, -1)
+                    )
                 else:
                     # prevent keyError
                     fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = 0.0
diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py
index 7ac31809f3..21b9587b88 100644
--- a/deepmd/tf/fit/polar.py
+++ b/deepmd/tf/fit/polar.py
@@ -183,6 +183,7 @@ def compute_output_stats(self, all_stat):
             mean_polar = np.zeros([len(self.sel_type), 9])
             sys_matrix, polar_bias = [], []
             for ss in range(len(all_stat["type"])):
+                nframes = all_stat["type"][ss].shape[0]
                 atom_has_polar = [
                     w for w in all_stat["type"][ss][0] if (w in self.sel_type)
                 ]  # select atom with polar
@@ -193,7 +194,7 @@ def compute_output_stats(self, all_stat):
                         index_lis = [
                             index
                             for index, w in enumerate(atom_has_polar)
-                            if atom_has_polar[index] == self.sel_type[itype]
+                            if w == self.sel_type[itype]
                         ]  # select index in this type
 
                         sys_matrix.append(np.zeros((1, len(self.sel_type))))
@@ -201,10 +202,9 @@ def compute_output_stats(self, all_stat):
 
                         polar_bias.append(
                             np.sum(
-                                all_stat["atomic_polarizability"][ss].reshape((-1, 9))[
-                                    index_lis
-                                ],
-                                axis=0,
+                                all_stat["atomic_polarizability"][ss][:, index_lis, :]
+                                / nframes,
+                                axis=(0, 1),
                             ).reshape((1, 9))
                         )
                 else:  # No atomic polar in this system, so it should have global polar
@@ -228,7 +228,9 @@ def compute_output_stats(self, all_stat):
                         sys_matrix[-1][0, itype] = len(index_lis)
 
                     # add polar_bias
-                    polar_bias.append(all_stat["polarizability"][ss].reshape((1, 9)))
+                    polar_bias.append(
+                        np.mean(all_stat["polarizability"][ss], axis=0).reshape((1, 9))
+                    )
 
             matrix, bias = (
                 np.concatenate(sys_matrix, axis=0),
@@ -543,7 +545,7 @@ def serialize(self, suffix: str) -> dict:
             "dim_descrpt": self.dim_descrpt,
             "embedding_width": self.dim_rot_mat_1,
             # very bad design: type embedding is not passed to the class
-            # TODO: refactor the class
+            # TODO: refactor the class for polar fitting and type embedding
             "mixed_types": False,
             "dim_out": 3,
             "neuron": self.n_neuron,
@@ -556,7 +558,7 @@ def serialize(self, suffix: str) -> dict:
             "shift_diag": self.shift_diag,
             "nets": self.serialize_network(
                 ntypes=self.ntypes,
-                # TODO: consider type embeddings
+                # TODO: consider type embeddings for polar fitting
                 ndim=1,
                 in_dim=self.dim_descrpt,
                 out_dim=self.dim_rot_mat_1,
@@ -584,7 +586,9 @@ def deserialize(cls, data: dict, suffix: str):
             The deserialized model
         """
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(
+            data.pop("@version", 1), 2, 1
+        )  # to allow PT version.
         fitting = cls(**data)
         fitting.fitting_net_variables = cls.deserialize_network(
             data["nets"],
diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py
index b9db0863b5..ccbd44cf97 100644
--- a/deepmd/tf/infer/deep_eval.py
+++ b/deepmd/tf/infer/deep_eval.py
@@ -489,6 +489,11 @@ def make_natoms_vec(
         natoms_vec[1] = natoms
         for ii in range(self.ntypes):
             natoms_vec[ii + 2] = np.count_nonzero(atom_types[0] == ii)
+        if np.count_nonzero(atom_types[0] == -1) > 0:
+            # contains virtual atoms
+            # energy fitting sums over natoms_vec[2:] instead of reading from natoms_vec[0]
+            # causing errors for shape mismatch
+            natoms_vec[2] += np.count_nonzero(atom_types[0] == -1)
         return natoms_vec
 
     def eval_typeebd(self) -> np.ndarray:
diff --git a/deepmd/tf/infer/deep_tensor.py b/deepmd/tf/infer/deep_tensor.py
index 9e8acf8241..59fdab7cd1 100644
--- a/deepmd/tf/infer/deep_tensor.py
+++ b/deepmd/tf/infer/deep_tensor.py
@@ -412,7 +412,6 @@ def eval_full(
         if ghost_map is not None:
             # add the value of ghost atoms to real atoms
             force = np.reshape(force, [nframes * nout, -1, 3])
-            # TODO: is there some way not to use for loop?
             for ii in range(nframes * nout):
                 np.add.at(force[ii], ghost_map, force[ii, nloc:])
             if atomic:
diff --git a/deepmd/tf/lmp.py b/deepmd/tf/lmp.py
index f8497bef59..b2e47308ed 100644
--- a/deepmd/tf/lmp.py
+++ b/deepmd/tf/lmp.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Register entry points for lammps-wheel."""
+
 import os
 import platform
 from importlib import (
diff --git a/deepmd/tf/loggers/loggers.py b/deepmd/tf/loggers/loggers.py
index eae99f5367..be948c9858 100644
--- a/deepmd/tf/loggers/loggers.py
+++ b/deepmd/tf/loggers/loggers.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias of deepmd.loggers.loggers for backward compatibility."""
+
 from deepmd.loggers.loggers import (
     set_log_handles,
 )
diff --git a/deepmd/tf/loss/ener.py b/deepmd/tf/loss/ener.py
index 48a13319e4..baa4aa3e02 100644
--- a/deepmd/tf/loss/ener.py
+++ b/deepmd/tf/loss/ener.py
@@ -120,7 +120,6 @@ def __init__(
             "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3
         )
         # drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates
-        # TODO: could numb_generalized_coord decided from the training data?
         if self.has_gf > 0:
             add_data_requirement(
                 "drdq",
diff --git a/deepmd/tf/model/frozen.py b/deepmd/tf/model/frozen.py
index 1933690ca7..86676bfe0b 100644
--- a/deepmd/tf/model/frozen.py
+++ b/deepmd/tf/model/frozen.py
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import tempfile
 from enum import (
     Enum,
 )
@@ -7,6 +10,9 @@
     Union,
 )
 
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
 from deepmd.infer.deep_pot import (
     DeepPot,
 )
@@ -24,6 +30,10 @@
 from deepmd.tf.loss.loss import (
     Loss,
 )
+from deepmd.tf.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
 
 from .model import (
     Model,
@@ -43,7 +53,14 @@ class FrozenModel(Model):
     def __init__(self, model_file: str, **kwargs):
         super().__init__(**kwargs)
         self.model_file = model_file
-        self.model = DeepPotential(model_file)
+        if not model_file.endswith(".pb"):
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(
+                suffix=".pb", dir=os.curdir, delete=False
+            ) as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model_file = f.name
+        self.model = DeepPotential(self.model_file)
         if isinstance(self.model, DeepPot):
             self.model_type = "ener"
         else:
@@ -228,3 +245,19 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """
         # we don't know how to compress it, so no neighbor statistics here
         return local_jdata
+
+    def serialize(self, suffix: str = "") -> dict:
+        # try to recover the original model
+        # the current graph contains a prefix "load",
+        # so it cannot used to recover the original model
+        graph, graph_def = load_graph_def(self.model_file)
+        t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+        jdata = json.loads(t_jdata)
+        model = Model(**jdata["model"])
+        # important! must be called before serialize
+        model.init_variables(graph=graph, graph_def=graph_def)
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        raise RuntimeError("Should not touch here.")
diff --git a/deepmd/tf/model/linear.py b/deepmd/tf/model/linear.py
index da866ccc5f..ae1b0b5c78 100644
--- a/deepmd/tf/model/linear.py
+++ b/deepmd/tf/model/linear.py
@@ -54,7 +54,6 @@ def __init__(self, models: List[dict], weights: List[float], **kwargs):
             self.weights = [1 / len(models) for _ in range(len(models))]
         elif weights == "sum":
             self.weights = [1 for _ in range(len(models))]
-        # TODO: add more weights, for example, so-called committee models
         else:
             raise ValueError(f"Invalid weights {weights}")
 
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
index ca660f8e95..a0e234a547 100644
--- a/deepmd/tf/model/model.py
+++ b/deepmd/tf/model/model.py
@@ -566,7 +566,8 @@ def deserialize(cls, data: dict, suffix: str = "") -> "Model":
         """
         if cls is Model:
             return Model.get_class_by_type(data.get("type", "standard")).deserialize(
-                data
+                data,
+                suffix=suffix,
             )
         raise NotImplementedError("Not implemented in class %s" % cls.__name__)
 
diff --git a/deepmd/tf/model/model_stat.py b/deepmd/tf/model/model_stat.py
index 9149c0b666..db70262d50 100644
--- a/deepmd/tf/model/model_stat.py
+++ b/deepmd/tf/model/model_stat.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.model_stat import (
     _make_all_stat_ref,
     make_stat_input,
diff --git a/deepmd/tf/model/multi.py b/deepmd/tf/model/multi.py
index 6280fcd2f6..8fd4b539f1 100644
--- a/deepmd/tf/model/multi.py
+++ b/deepmd/tf/model/multi.py
@@ -135,9 +135,9 @@ def __init__(
                 fitting_dict[item] = item_fitting_param
             else:
                 if item_fitting_param["type"] in ["dipole", "polar"]:
-                    item_fitting_param[
-                        "embedding_width"
-                    ] = self.descrpt.get_dim_rot_mat_1()
+                    item_fitting_param["embedding_width"] = (
+                        self.descrpt.get_dim_rot_mat_1()
+                    )
                 fitting_dict[item] = Fitting(
                     **item_fitting_param,
                     descrpt=self.descrpt,
diff --git a/deepmd/tf/nvnmd/utils/argcheck.py b/deepmd/tf/nvnmd/utils/argcheck.py
index c22d9e0cd4..1f10a1c03e 100644
--- a/deepmd/tf/nvnmd/utils/argcheck.py
+++ b/deepmd/tf/nvnmd/utils/argcheck.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.argcheck_nvnmd import (
     nvnmd_args,
 )
diff --git a/deepmd/tf/op/_gelu.py b/deepmd/tf/op/_gelu.py
index fcfd2d49fa..04ae124f70 100644
--- a/deepmd/tf/op/_gelu.py
+++ b/deepmd/tf/op/_gelu.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """First-order derivatives and second-order derivatives for gelu function."""
+
 import tensorflow
 from tensorflow.python.framework import (
     ops,
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
index 27478abaa1..125b795d2e 100644
--- a/deepmd/tf/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -236,9 +236,7 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
                 if data[fitting_key].mixed_type:
                     assert isinstance(
                         self.fitting[fitting_key], EnerFitting
-                    ), "Data for fitting net {} in mixed_type format must use ener fitting!".format(
-                        fitting_key
-                    )
+                    ), f"Data for fitting net {fitting_key} in mixed_type format must use ener fitting!"
                 if self.numb_fparam_dict[fitting_key] > 0:
                     log.info(
                         "fitting net %s training with %d frame parameter(s)"
@@ -298,8 +296,6 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
                 )
 
             # neighbor_stat is moved to train.py as duplicated
-            # TODO: this is a simple fix but we should have a clear
-            #       architecture to call neighbor stat
         else:
             self.model.enable_compression()
 
@@ -1086,10 +1082,7 @@ def _init_from_frz_model(self):
         except FileNotFoundError as e:
             # throw runtime error if there's no frozen model
             raise RuntimeError(
-                "The input frozen model {} ({}) does not exist! Please check the path of the frozen model. ".format(
-                    self.run_opt.init_frz_model,
-                    os.path.abspath(self.run_opt.init_frz_model),
-                )
+                f"The input frozen model {self.run_opt.init_frz_model} ({os.path.abspath(self.run_opt.init_frz_model)}) does not exist! Please check the path of the frozen model. "
             ) from e
         # get the model type from the frozen model(self.run_opt.init_frz_model)
         try:
@@ -1142,10 +1135,8 @@ def _init_from_pretrained_model(
         except FileNotFoundError as e:
             # throw runtime error if there's no frozen model
             raise RuntimeError(
-                "The input frozen pretrained model {} ({}) does not exist! "
-                "Please check the path of the frozen pretrained model. ".format(
-                    self.run_opt.finetune, os.path.abspath(self.run_opt.finetune)
-                )
+                f"The input frozen pretrained model {self.run_opt.finetune} ({os.path.abspath(self.run_opt.finetune)}) does not exist! "
+                "Please check the path of the frozen pretrained model. "
             ) from e
         # get the model type from the frozen model(self.run_opt.finetune)
         try:
@@ -1164,8 +1155,8 @@ def _init_from_pretrained_model(
         ), "Compressed models are not supported for finetuning!"
         self.model.init_variables(graph, graph_def, model_type=self.model_type)
         log.info(
-            "Changing energy bias in pretrained model for types {}... "
-            "(this step may take long time)".format(str(origin_type_map))
+            f"Changing energy bias in pretrained model for types {origin_type_map!s}... "
+            "(this step may take long time)"
         )
         self._change_energy_bias(
             data, self.run_opt.finetune, origin_type_map, bias_shift
diff --git a/deepmd/tf/utils/argcheck.py b/deepmd/tf/utils/argcheck.py
index c3c0ed4f22..caec33c319 100644
--- a/deepmd/tf/utils/argcheck.py
+++ b/deepmd/tf/utils/argcheck.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.argcheck import (
     gen_args,
     gen_doc,
diff --git a/deepmd/tf/utils/batch_size.py b/deepmd/tf/utils/batch_size.py
index 8436934cee..33f1ec0da0 100644
--- a/deepmd/tf/utils/batch_size.py
+++ b/deepmd/tf/utils/batch_size.py
@@ -35,6 +35,4 @@ def is_oom_error(self, e: Exception) -> bool:
         e : Exception
             Exception
         """
-        # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
-        # but luckily we only need to catch once
         return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError))
diff --git a/deepmd/tf/utils/compat.py b/deepmd/tf/utils/compat.py
index 6c95476ac8..e80a366b83 100644
--- a/deepmd/tf/utils/compat.py
+++ b/deepmd/tf/utils/compat.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.compat import (
     convert_input_v0_v1,
     convert_input_v1_v2,
diff --git a/deepmd/tf/utils/data.py b/deepmd/tf/utils/data.py
index 3c2eb4298d..54130c18f4 100644
--- a/deepmd/tf/utils/data.py
+++ b/deepmd/tf/utils/data.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.data import (
     DeepmdData,
 )
diff --git a/deepmd/tf/utils/data_system.py b/deepmd/tf/utils/data_system.py
index 88c38d3dd4..da0cce28e8 100644
--- a/deepmd/tf/utils/data_system.py
+++ b/deepmd/tf/utils/data_system.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
     prob_sys_size_ext,
diff --git a/deepmd/tf/utils/finetune.py b/deepmd/tf/utils/finetune.py
index 01b5eaaafe..3d11130ba7 100644
--- a/deepmd/tf/utils/finetune.py
+++ b/deepmd/tf/utils/finetune.py
@@ -63,9 +63,7 @@ def replace_model_params_with_pretrained_model(
     )
     if cur_type_map != pretrained_type_map:
         log.info(
-            "Change the type_map from {} to {}.".format(
-                str(cur_type_map), str(pretrained_type_map)
-            )
+            f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}."
         )
         jdata["model"]["type_map"] = pretrained_type_map
 
@@ -102,7 +100,7 @@ def replace_model_params_with_pretrained_model(
         ):
             target_para = pretrained_jdata["model"][config_key]
             cur_para = jdata["model"][config_key]
-            # keep some params that are irrelevant to model structures (need to discuss) TODO
+            # TODO: keep some params that are irrelevant to model structures (need to discuss)
             if "trainable" in cur_para.keys():
                 target_para["trainable"] = cur_para["trainable"]
             log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.")
diff --git a/deepmd/tf/utils/graph.py b/deepmd/tf/utils/graph.py
index 8c4b0fcc84..3ed43343fa 100644
--- a/deepmd/tf/utils/graph.py
+++ b/deepmd/tf/utils/graph.py
@@ -22,7 +22,6 @@
 )
 
 
-# TODO (JZ): I think in this file we can merge some duplicated lines into one method...
 def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
     """Load graph as well as the graph_def from the frozen model(model_file).
 
@@ -308,13 +307,13 @@ def get_extra_embedding_net_variables_from_graph_def(
     extra_embedding_net_variables = {}
     for i in range(1, layer_size + 1):
         matrix_pattern = f"filter_type_all{suffix}/matrix_{i}{extra_suffix}"
-        extra_embedding_net_variables[
-            matrix_pattern
-        ] = get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern)
+        extra_embedding_net_variables[matrix_pattern] = (
+            get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern)
+        )
         bias_pattern = f"filter_type_all{suffix}/bias_{i}{extra_suffix}"
-        extra_embedding_net_variables[
-            bias_pattern
-        ] = get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern)
+        extra_embedding_net_variables[bias_pattern] = (
+            get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern)
+        )
     return extra_embedding_net_variables
 
 
diff --git a/deepmd/tf/utils/multi_init.py b/deepmd/tf/utils/multi_init.py
index 056a6694e8..aafa9461b0 100644
--- a/deepmd/tf/utils/multi_init.py
+++ b/deepmd/tf/utils/multi_init.py
@@ -59,9 +59,7 @@ def replace_model_params_with_frz_multi_model(
     )
     if cur_type_map != pretrained_type_map:
         log.info(
-            "Change the type_map from {} to {}.".format(
-                str(cur_type_map), str(pretrained_type_map)
-            )
+            f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}."
         )
         jdata["model"]["type_map"] = pretrained_type_map
 
@@ -166,7 +164,7 @@ def replace_model_params_with_frz_multi_model(
 def _change_sub_config(jdata: Dict[str, Any], src_jdata: Dict[str, Any], sub_key: str):
     target_para = src_jdata[sub_key]
     cur_para = jdata[sub_key]
-    # keep some params that are irrelevant to model structures (need to discuss) TODO
+    # TODO: keep some params that are irrelevant to model structures (need to discuss)
     if "trainable" in cur_para.keys():
         target_para["trainable"] = cur_para["trainable"]
     log.info(f"Change the '{sub_key}' from {cur_para!s} to {target_para!s}.")
diff --git a/deepmd/tf/utils/pair_tab.py b/deepmd/tf/utils/pair_tab.py
index a5f5e64aae..a9747c4367 100644
--- a/deepmd/tf/utils/pair_tab.py
+++ b/deepmd/tf/utils/pair_tab.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.pair_tab import (
     PairTab,
 )
diff --git a/deepmd/tf/utils/path.py b/deepmd/tf/utils/path.py
index 63c82b9da0..67990543ae 100644
--- a/deepmd/tf/utils/path.py
+++ b/deepmd/tf/utils/path.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.path import (
     DPH5Path,
     DPOSPath,
diff --git a/deepmd/tf/utils/plugin.py b/deepmd/tf/utils/plugin.py
index 436a80a819..f2f0336691 100644
--- a/deepmd/tf/utils/plugin.py
+++ b/deepmd/tf/utils/plugin.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.plugin import (
     Plugin,
     PluginVariant,
diff --git a/deepmd/tf/utils/random.py b/deepmd/tf/utils/random.py
index 6d875df224..55b8eba91e 100644
--- a/deepmd/tf/utils/random.py
+++ b/deepmd/tf/utils/random.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.random import (
     choice,
     random,
diff --git a/deepmd/tf/utils/update_sel.py b/deepmd/tf/utils/update_sel.py
index bed6274f56..db0420dde8 100644
--- a/deepmd/tf/utils/update_sel.py
+++ b/deepmd/tf/utils/update_sel.py
@@ -24,8 +24,6 @@ def neighbor_stat(self) -> Type[NeighborStat]:
 
     def hook(self, min_nbor_dist, max_nbor_size):
         # moved from traier.py as duplicated
-        # TODO: this is a simple fix but we should have a clear
-        #       architecture to call neighbor stat
         tf.constant(
             min_nbor_dist,
             name="train_attr/min_nbor_dist",
diff --git a/deepmd/tf/utils/weight_avg.py b/deepmd/tf/utils/weight_avg.py
index fe162aa1ea..fb3ae27934 100644
--- a/deepmd/tf/utils/weight_avg.py
+++ b/deepmd/tf/utils/weight_avg.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
+
 from deepmd.utils.weight_avg import (
     weighted_average,
 )
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index e822e18d50..564039ccd0 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -93,7 +93,12 @@ def type_embedding_args():
 
 
 def spin_args():
-    doc_use_spin = "Whether to use atomic spin model for each atom type"
+    doc_use_spin = (
+        "Whether to use atomic spin model for each atom type. "
+        "List of boolean values with the shape of [ntypes] to specify which types use spin, "
+        f"or a list of integer values {doc_only_pt_supported} "
+        "to indicate the index of the type that uses spin."
+    )
     doc_spin_norm = "The magnitude of atomic spin for each atom type with spin"
     doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
     doc_virtual_scale = (
@@ -106,7 +111,7 @@ def spin_args():
     )
 
     return [
-        Argument("use_spin", List[bool], doc=doc_use_spin),
+        Argument("use_spin", [List[bool], List[int]], doc=doc_use_spin),
         Argument(
             "spin_norm",
             List[float],
@@ -121,7 +126,7 @@ def spin_args():
         ),
         Argument(
             "virtual_scale",
-            List[float],
+            [List[float], float],
             optional=True,
             doc=doc_only_pt_supported + doc_virtual_scale,
         ),
@@ -1461,7 +1466,6 @@ def frozen_model_args() -> Argument:
         [
             Argument("model_file", str, optional=False, doc=doc_model_file),
         ],
-        doc=doc_only_tf_supported,
     )
     return ca
 
@@ -1518,15 +1522,32 @@ def linear_ener_model_args() -> Argument:
 #  --- Learning rate configurations: --- #
 def learning_rate_exp():
     doc_start_lr = "The learning rate at the start of the training."
-    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_stop_lr = (
+        "The desired learning rate at the end of the training. "
+        f"When decay_rate {doc_only_pt_supported}is explicitly set, "
+        "this value will serve as the minimum learning rate during training. "
+        "In other words, if the learning rate decays below stop_lr, stop_lr will be applied instead."
+    )
     doc_decay_steps = (
         "The learning rate is decaying every this number of training steps."
     )
+    doc_decay_rate = (
+        "The decay rate for the learning rate. "
+        "If this is provided, it will be used directly as the decay rate for learning rate "
+        "instead of calculating it through interpolation between start_lr and stop_lr."
+    )
 
     args = [
         Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
         Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
         Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
+        Argument(
+            "decay_rate",
+            float,
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_decay_rate,
+        ),
     ]
     return args
 
@@ -2386,10 +2407,10 @@ def normalize_multi_task(data):
                 data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"]
             )
         elif single_learning_rate:
-            data[
-                "learning_rate_dict"
-            ] = normalize_learning_rate_dict_with_single_learning_rate(
-                data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
+            data["learning_rate_dict"] = (
+                normalize_learning_rate_dict_with_single_learning_rate(
+                    data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
+                )
             )
         fitting_weight = (
             data["training"]["fitting_weight"] if multi_fitting_weight else None
@@ -2432,11 +2453,7 @@ def normalize_data_dict(data_dict):
 def normalize_loss_dict(fitting_keys, loss_dict):
     # check the loss dict
     failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
-    assert (
-        not failed_loss_keys
-    ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_loss_keys), str(list(fitting_keys))
-    )
+    assert not failed_loss_keys, f"Loss dict key(s) {failed_loss_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
     new_dict = {}
     base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
     for item in loss_dict:
@@ -2451,9 +2468,7 @@ def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
     failed_learning_rate_keys = [
         item for item in learning_rate_dict if item not in fitting_keys
     ]
-    assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_learning_rate_keys), str(list(fitting_keys))
-    )
+    assert not failed_learning_rate_keys, f"Learning rate dict key(s) {failed_learning_rate_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
     new_dict = {}
     base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
     for item in learning_rate_dict:
@@ -2476,11 +2491,7 @@ def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learnin
 def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
     # check the mapping
     failed_data_keys = [item for item in data_keys if item not in fitting_keys]
-    assert (
-        not failed_data_keys
-    ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_data_keys), str(list(fitting_keys))
-    )
+    assert not failed_data_keys, f"Data dict key(s) {failed_data_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
     empty_fitting_keys = []
     valid_fitting_keys = []
     for item in fitting_keys:
@@ -2490,9 +2501,7 @@ def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
             valid_fitting_keys.append(item)
     if empty_fitting_keys:
         log.warning(
-            "Fitting net(s) {} have no data and will not be used in training.".format(
-                str(empty_fitting_keys)
-            )
+            f"Fitting net(s) {empty_fitting_keys!s} have no data and will not be used in training."
         )
     num_pair = len(valid_fitting_keys)
     assert num_pair > 0, "No valid training data systems for fitting nets!"
@@ -2507,9 +2516,7 @@ def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
         failed_weight_keys = [
             item for item in fitting_weight if item not in fitting_keys
         ]
-        assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
-            str(failed_weight_keys), str(list(fitting_keys))
-        )
+        assert not failed_weight_keys, f"Fitting weight key(s) {failed_weight_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
         sum_prob = 0.0
         for item in fitting_keys:
             if item in valid_fitting_keys:
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index c85806458f..b35d9833d5 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -51,7 +51,6 @@ class AutoBatchSize(ABC):
 
     def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
         # See also PyTorchLightning/pytorch-lightning#1638
-        # TODO: discuss a proper initial batch size
         self.current_batch_size = initial_batch_size
         DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
         if DP_INFER_BATCH_SIZE > 0:
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 1e1d7c2251..6ad76be3c7 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -549,9 +549,7 @@ def _load_set(self, set_name: DPPath):
                     atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32)
                 except IndexError as e:
                     raise IndexError(
-                        "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                            set_name, self.get_ntypes()
-                        )
+                        f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!"
                     ) from e
                 atom_type_mix = atom_type_mix_
             real_type = atom_type_mix.reshape([nframes, self.natoms])
@@ -568,9 +566,7 @@ def _load_set(self, set_name: DPPath):
             ).T
             assert (
                 atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
-            ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                set_name, self.get_ntypes()
-            )
+            ).all(), f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!"
             data["real_natoms_vec"] = np.concatenate(
                 (
                     np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 0c74abfed1..640083bc33 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -670,7 +670,6 @@ def print_summary(
             % (
                 _format_name_length(system_dirs[ii], sys_width),
                 natoms[ii],
-                # TODO batch size * nbatches = number of structures
                 batch_size[ii],
                 nbatches[ii],
                 sys_probs[ii],
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index b6d04b9bc5..a454ad72ea 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -135,8 +135,6 @@ def change_energy_bias_lower(
     else:
         raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
     log.info(
-        "Change energy bias of {} from {} to {}.".format(
-            str(origin_type_map), str(old_bias), str(bias_atom_e[idx_type_map])
-        )
+        f"Change energy bias of {origin_type_map!s} from {old_bias!s} to {bias_atom_e[idx_type_map]!s}."
     )
     return bias_atom_e
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
index 8f68e32417..3659e57305 100644
--- a/deepmd/utils/out_stat.py
+++ b/deepmd/utils/out_stat.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Output statistics."""
+
 from typing import (
     Optional,
     Tuple,
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index 5887e91850..afe14703a0 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -39,7 +39,6 @@ def __new__(cls, path: str, mode: str = "r"):
                 return super().__new__(DPOSPath)
             elif os.path.isfile(path.split("#")[0]):
                 # assume h5 if it is not dir
-                # TODO: check if it is a real h5? or just check suffix?
                 return super().__new__(DPH5Path)
             raise FileNotFoundError("%s not found" % path)
         return super().__new__(cls)
@@ -217,7 +216,6 @@ def glob(self, pattern: str) -> List["DPPath"]:
             list of paths
         """
         # currently DPOSPath will only derivative DPOSPath
-        # TODO: discuss if we want to mix DPOSPath and DPH5Path?
         return [type(self)(p, mode=self.mode) for p in self.path.glob(pattern)]
 
     def rglob(self, pattern: str) -> List["DPPath"]:
diff --git a/doc/install/build-conda.md b/doc/install/build-conda.md
index 41c9f90a6e..fee9f77acc 100644
--- a/doc/install/build-conda.md
+++ b/doc/install/build-conda.md
@@ -1,5 +1,12 @@
 # Building conda packages
 
+::::{danger}
+:::{deprecated} 3.0.0
+The official channel has been deprecated since 3.0.0.
+Refer to [conda-forge documentation](https://conda-forge.org/docs/maintainer/adding_pkgs/) for how to contribute and build packages locally.
+:::
+::::
+
 One may want to keep both convenience and personalization of the DeePMD-kit. To achieve this goal, one can consider building conda packages. We provide building scripts in [deepmd-kit-recipes organization](https://github.com/deepmd-kit-recipes/). These building tools are driven by [conda-build](https://github.com/conda/conda-build) and [conda-smithy](https://github.com/conda-forge/conda-smithy).
 
 For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-feedstock`](https://github.com/deepmd-kit-recipes/lammps-feedstock/) repository and modify `recipe/build.sh`. `-D PKG_MPIIO=OFF` should be changed to `-D PKG_MPIIO=ON`. Then go to the main directory and execute
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index 43ff1c80a5..bb68272ace 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -35,3 +35,8 @@ The [pre-comiled C library](./install-from-c-library.md) can be downloaded from
 ```sh
 wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip && unzip libdeepmd_c-0-libdeepmd_c.tar.gz.zip
 ```
+
+## Pre-release conda-forge packages
+
+Pre-release conda-forge packages are in `conda-forge/label/deepmd-kit_dev` or `conda-forge/label/deepmd-kit_rc` channels, other than the `conda-forge` channel.
+See [conda-forge documentation](https://conda-forge.org/docs/maintainer/knowledge_base/#pre-release-builds) for more information.
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index e1861a6096..6acfd98cb0 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -6,6 +6,7 @@ After your easy installation, DeePMD-kit (`dp`) and LAMMPS (`lmp`) will be avail
 
 :::{note}
 Note: The off-line packages and conda packages require the [GNU C Library](https://www.gnu.org/software/libc/) 2.17 or above. The GPU version requires [compatible NVIDIA driver](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#minor-version-compatibility) to be installed in advance. It is possible to force conda to [override detection](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html#overriding-detected-packages) when installation, but these requirements are still necessary during runtime.
+You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for more information.
 :::
 
 :::{note}
@@ -23,7 +24,7 @@ Both CPU and GPU version offline packages are available on [the Releases page](h
 
 Some packages are split into two files due to the size limit of GitHub. One may merge them into one after downloading:
 ```bash
-cat deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.0 deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.1 > deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh
+cat deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.0 deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.1 > deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh
 ```
 
 One may enable the environment using
@@ -32,9 +33,29 @@ conda activate /path/to/deepmd-kit
 ```
 
 ## Install with conda
-DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html) first.
+DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section), [Miniconda](https://docs.conda.io/en/latest/miniconda.html), or [miniforge](https://conda-forge.org/download/) first.
+You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for how to setup a conda environment.
 
-### Official channel
+### conda-forge channel
+
+DeePMD-kit is available on the [conda-forge](https://conda-forge.org/) channel:
+
+```bash
+conda create -n deepmd deepmd-kit lammps horovod -c conda-forge
+```
+
+The supported platforms include Linux x86-64, macOS x86-64, and macOS arm64.
+Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages.
+
+### Official channel (deprecated)
+
+::::{danger}
+:::{deprecated} 3.0.0
+The official channel has been deprecated since 3.0.0, due to the challenging work of building dependencies for [multiple backends](../backend.md).
+Old packages will still be available at https://conda.deepmodeling.com.
+Maintainers will build packages in the conda-forge organization together with other conda-forge members.
+:::
+::::
 
 One may create an environment that contains the CPU version of DeePMD-kit and LAMMPS:
 ```bash
@@ -47,9 +68,9 @@ conda create -n deepmd deepmd-kit=*=*gpu libdeepmd=*=*gpu lammps cudatoolkit=11.
 ```
 One could change the CUDA Toolkit version from `10.2` or `11.6`.
 
-One may specify the DeePMD-kit version such as `2.1.1` using
+One may specify the DeePMD-kit version such as `2.2.9` using
 ```bash
-conda create -n deepmd deepmd-kit=2.1.1=*cpu libdeepmd=2.1.1=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults
+conda create -n deepmd deepmd-kit=2.2.9=*cpu libdeepmd=2.2.9=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults
 ```
 
 One may enable the environment using
@@ -57,19 +78,8 @@ One may enable the environment using
 conda activate deepmd
 ```
 
-### conda-forge channel
-
-DeePMD-kit is also available on the [conda-forge](https://conda-forge.org/) channel:
-
-```bash
-conda create -n deepmd deepmd-kit lammps horovod -c conda-forge
-```
-
-The supported platform includes Linux x86-64, macOS x86-64, and macOS arm64.
-Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages.
-
 ## Install with docker
-A docker for installing the DeePMD-kit is available [here](https://github.com/orgs/deepmodeling/packages/container/package/deepmd-kit).
+A docker for installing the DeePMD-kit is available [here](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit).
 
 To pull the CPU version:
 ```bash
diff --git a/pyproject.toml b/pyproject.toml
index 84cc7237bc..128364249a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [build-system]
 requires = [
+    # TODO: unpin the upper bound when scikit-build dynamic metadata API is stable
     # dynamic metadata API is still unstable
-    # TODO: unpin the upper bound when it is stable
     "scikit-build-core>=0.5,<0.9,!=0.6.0",
     "packaging",
 ]
@@ -134,7 +134,7 @@ test-command = [
 test-extras = ["cpu", "test", "lmp", "ipi"]
 build = ["cp310-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
-# TODO: uncomment when CUDA 11 is deprecated
+# TODO: uncomment to use the latest image when CUDA 11 is deprecated
 # manylinux-x86_64-image = "manylinux_2_28"
 manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
 manylinux-aarch64-image = "manylinux_2_28"
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 442e2d90cc..498f35f46b 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -1,12 +1,11 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DeepPot.h"
 
-#include "common.h"
-// TODO: only include when TF backend is built
 #include <memory>
 #include <stdexcept>
 
 #include "AtomMap.h"
+#include "common.h"
 #ifdef BUILD_TENSORFLOW
 #include "DeepPotTF.h"
 #endif
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index f104433468..aa1e27ace1 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -329,8 +329,8 @@ void deepmd::check_status(const tensorflow::Status& status) {
 
 void throw_env_not_set_warning(std::string env_name) {
   std::cerr << "DeePMD-kit WARNING: Environmental variable " << env_name
-            << " is not set. "
-            << "Tune " << env_name << " for the best performance. "
+            << " is not set. " << "Tune " << env_name
+            << " for the best performance. "
             << "See https://deepmd.rtfd.io/parallelism/ for more information."
             << std::endl;
 }
@@ -1341,14 +1341,11 @@ void deepmd::print_summary(const std::string& pre) {
   std::cout << pre << "source commit at:   " + global_git_date << "\n";
   std::cout << pre << "support model ver.: " + global_model_version << "\n";
 #if defined(GOOGLE_CUDA)
-  std::cout << pre << "build variant:      cuda"
-            << "\n";
+  std::cout << pre << "build variant:      cuda" << "\n";
 #elif defined(TENSORFLOW_USE_ROCM)
-  std::cout << pre << "build variant:      rocm"
-            << "\n";
+  std::cout << pre << "build variant:      rocm" << "\n";
 #else
-  std::cout << pre << "build variant:      cpu"
-            << "\n";
+  std::cout << pre << "build variant:      cpu" << "\n";
 #endif
 #ifdef BUILD_TENSORFLOW
   std::cout << pre << "build with tf inc:  " + global_tf_include_dir << "\n";
diff --git a/source/api_cc/tests/test_ewald.cc b/source/api_cc/tests/test_ewald.cc
index 7eb433816d..d5aa6993a9 100644
--- a/source/api_cc/tests/test_ewald.cc
+++ b/source/api_cc/tests/test_ewald.cc
@@ -18,8 +18,8 @@ class TestInferEwald : public ::testing::Test {
                                   3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
   std::vector<VALUETYPE> charge = {-2, 1, 1, -2, 1, 1};
   std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
-  void SetUp() override{};
-  void TearDown() override{};
+  void SetUp() override {};
+  void TearDown() override {};
 };
 
 TYPED_TEST_SUITE(TestInferEwald, ValueTypes);
diff --git a/source/ipi/driver.cc b/source/ipi/driver.cc
index 9a91a27ad3..977d76011a 100644
--- a/source/ipi/driver.cc
+++ b/source/ipi/driver.cc
@@ -126,20 +126,17 @@ int main(int argc, char *argv[]) {
       if (!isinit) {
         writebuffer_(&socket, msg_needinit, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "NEEDINIT" << std::endl;
+          std::cout << "# send back  " << "NEEDINIT" << std::endl;
         }
       } else if (hasdata) {
         writebuffer_(&socket, msg_havedata, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "HAVEDATA" << std::endl;
+          std::cout << "# send back  " << "HAVEDATA" << std::endl;
         }
       } else {
         writebuffer_(&socket, msg_ready, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "READY" << std::endl;
+          std::cout << "# send back  " << "READY" << std::endl;
         }
       }
     } else if (header_str == "INIT") {
diff --git a/source/lib/tests/test_ewald.cc b/source/lib/tests/test_ewald.cc
index 45c8ea7bf1..ca6f3a845e 100644
--- a/source/lib/tests/test_ewald.cc
+++ b/source/lib/tests/test_ewald.cc
@@ -30,7 +30,7 @@ class TestEwald : public ::testing::Test {
       1.9076542856278367e+00,  1.3101841366497322e+00, 1.9794445391572657e-01,
       -9.8010077026955389e-01, 1.9794445391572657e-01, 1.9232614011636004e+00};
 
-  void SetUp() override{};
+  void SetUp() override {};
 };
 
 TEST_F(TestEwald, cpu) {
diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt
index 7cf9c829fa..4b0ccb5355 100644
--- a/source/lmp/plugin/CMakeLists.txt
+++ b/source/lmp/plugin/CMakeLists.txt
@@ -127,7 +127,7 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
       install(
         CODE "execute_process( \
         COMMAND ${CMAKE_COMMAND} -E create_symlink \
-		../${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${CMAKE_SHARED_LIBRARY_SUFFIX} \
+		../${CMAKE_SHARED_MODULE_PREFIX}${libname}${CMAKE_SHARED_MODULE_SUFFIX} \
         ${CMAKE_INSTALL_PREFIX}/lib/${libname}/${PLUGINNAME}   \
         )")
     endif()
diff --git a/source/lmp/tests/test_deeptensor.py b/source/lmp/tests/test_deeptensor.py
index 3e684b386e..6df0a8617a 100644
--- a/source/lmp/tests/test_deeptensor.py
+++ b/source/lmp/tests/test_deeptensor.py
@@ -57,19 +57,11 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_dplr.py b/source/lmp/tests/test_dplr.py
index 9c8f1c0d4f..2dd3531894 100644
--- a/source/lmp/tests/test_dplr.py
+++ b/source/lmp/tests/test_dplr.py
@@ -264,11 +264,7 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_lammps.py b/source/lmp/tests/test_lammps.py
index 028b403abf..c495f16ffd 100644
--- a/source/lmp/tests/test_lammps.py
+++ b/source/lmp/tests/test_lammps.py
@@ -219,18 +219,10 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
@@ -348,9 +340,7 @@ def test_pair_deepmd_virial(lammps):
 
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -376,9 +366,7 @@ def test_pair_deepmd_model_devi(lammps):
 
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
@@ -417,9 +405,7 @@ def test_pair_deepmd_model_devi_virial(lammps):
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -448,9 +434,7 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -535,9 +519,7 @@ def test_pair_deepmd_virial_real(lammps_real):
 
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -567,9 +549,7 @@ def test_pair_deepmd_model_devi_real(lammps_real):
 
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.compute("virial all centroid/stress/atom NULL pair")
@@ -614,12 +594,7 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.force_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative * constants.force_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -652,12 +627,7 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.ener_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative * constants.ener_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
diff --git a/source/lmp/tests/test_lammps_3types.py b/source/lmp/tests/test_lammps_3types.py
index 46e1a00c8f..e4e64d9ecf 100644
--- a/source/lmp/tests/test_lammps_3types.py
+++ b/source/lmp/tests/test_lammps_3types.py
@@ -245,18 +245,10 @@
 nktv2p = 1.6021765e6
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
@@ -337,9 +329,7 @@ def test_pair_deepmd_virial(lammps):
 
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -365,9 +355,7 @@ def test_pair_deepmd_model_devi(lammps):
 
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
@@ -406,9 +394,7 @@ def test_pair_deepmd_model_devi_virial(lammps):
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -437,9 +423,7 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
diff --git a/source/lmp/tests/test_lammps_faparam.py b/source/lmp/tests/test_lammps_faparam.py
index 064928eeb1..f78639a96b 100644
--- a/source/lmp/tests/test_lammps_faparam.py
+++ b/source/lmp/tests/test_lammps_faparam.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test LAMMPS fparam and aparam input."""
+
 import os
 import subprocess as sp
 import sys
@@ -134,11 +135,7 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_lammps_pt.py b/source/lmp/tests/test_lammps_pt.py
index bf1ef97e2b..55eaf4fde7 100644
--- a/source/lmp/tests/test_lammps_pt.py
+++ b/source/lmp/tests/test_lammps_pt.py
@@ -218,11 +218,7 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
@@ -340,9 +336,7 @@ def test_pair_deepmd_virial(lammps):
 
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -368,9 +362,7 @@ def test_pair_deepmd_model_devi(lammps):
 
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
@@ -409,9 +401,7 @@ def test_pair_deepmd_model_devi_virial(lammps):
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -440,9 +430,7 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -527,9 +515,7 @@ def test_pair_deepmd_virial_real(lammps_real):
 
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -559,9 +545,7 @@ def test_pair_deepmd_model_devi_real(lammps_real):
 
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.compute("virial all centroid/stress/atom NULL pair")
@@ -606,12 +590,7 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.force_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative * constants.force_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -644,12 +623,7 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.ener_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative * constants.ener_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
diff --git a/source/md/src/GroFileManager.cc b/source/md/src/GroFileManager.cc
index 5969168a72..d61fbb7b97 100644
--- a/source/md/src/GroFileManager.cc
+++ b/source/md/src/GroFileManager.cc
@@ -125,8 +125,7 @@ void GroFileManager::read(const std::string &name,
     std::cerr << "cannot open file " << name << std::endl;
     return;
   }
-  while (fgetc(fp) != '\n')
-    ;
+  while (fgetc(fp) != '\n');
   int npart;
   fscanf(fp, "%d\n", &npart);
   fclose(fp);
@@ -141,10 +140,8 @@ void GroFileManager::read(const std::string &name,
   boxsize.resize(3);
 
   fp = fopen(name.c_str(), "r");
-  while (fgetc(fp) != '\n')
-    ;
-  while (fgetc(fp) != '\n')
-    ;
+  while (fgetc(fp) != '\n');
+  while (fgetc(fp) != '\n');
   char line[1024];
   for (int i = 0; i < npart; ++i) {
     fgets(line, 1024, fp);
diff --git a/source/md/src/Poly.cpp b/source/md/src/Poly.cpp
index 49d2897f14..80db3a139f 100644
--- a/source/md/src/Poly.cpp
+++ b/source/md/src/Poly.cpp
@@ -38,11 +38,9 @@ double PiecewisePoly::value_periodic(const double& xx_) const {
   double xx(xx_);
   double T = x.back() - x.front();
   if (xx < x.front()) {
-    while ((xx += T) < x.front())
-      ;
+    while ((xx += T) < x.front());
   } else if (xx >= x.back()) {
-    while ((xx -= T) >= x.back())
-      ;
+    while ((xx -= T) >= x.back());
   }
   unsigned begin = 0;
   unsigned end = x.size() - 1;
@@ -126,11 +124,9 @@ void PiecewisePoly::value_periodic(const std::vector<double>& r,
     presentStart = presentEnd;
     double shift = 0;
     if (r[presentStart] < x.front()) {
-      while (r[presentStart] + (shift += T) < x.front())
-        ;
+      while (r[presentStart] + (shift += T) < x.front());
     } else if (r[presentStart] >= x.back()) {
-      while (r[presentStart] + (shift -= T) >= x.back())
-        ;
+      while (r[presentStart] + (shift -= T) >= x.back());
     }
     while (presentEnd < r.size() && r[presentEnd] + shift >= x.front() &&
            r[presentEnd] + shift < x.back()) {
diff --git a/source/nodejs/prepublish.py b/source/nodejs/prepublish.py
index 2f607a7d07..cb60659f02 100644
--- a/source/nodejs/prepublish.py
+++ b/source/nodejs/prepublish.py
@@ -4,6 +4,7 @@
 The NPM package downloads the C library binary from GitHub releases.
 This script changes the package.json to make it work.
 """
+
 import json
 import shutil
 
diff --git a/source/op/dotmul_flt_nvnmd.cc b/source/op/dotmul_flt_nvnmd.cc
index fd7c831ef1..d7c2c8d3c3 100644
--- a/source/op/dotmul_flt_nvnmd.cc
+++ b/source/op/dotmul_flt_nvnmd.cc
@@ -159,7 +159,7 @@ class DotmulFltNvnmdOp : public OpKernel {
       ufi3.nint &= FLT_MASK;
       y[ii] = ufi3.nflt;
     }  // loop ii
-  }    // Compute
+  }  // Compute
 
 };  // DotmulFltNvnmdOp
 
diff --git a/source/op/map_flt_nvnmd.cc b/source/op/map_flt_nvnmd.cc
index b23deac9c8..77b788e537 100644
--- a/source/op/map_flt_nvnmd.cc
+++ b/source/op/map_flt_nvnmd.cc
@@ -141,10 +141,10 @@ class MapFltNvnmdOp : public OpKernel {
           add_flt_nvnmd(ytmp, d, ytmp);
           y[ii * M + jj] = ytmp;
         }  // jj
-      }    // ii
-    }      // ss
-  }        // Compute
-};         // MapFltNvnmdOp
+      }  // ii
+    }  // ss
+  }  // Compute
+};  // MapFltNvnmdOp
 
 #define REGISTER_CPU(T)                                              \
   REGISTER_KERNEL_BUILDER(                                           \
diff --git a/source/op/matmul_fitnet_nvnmd.cc b/source/op/matmul_fitnet_nvnmd.cc
index b5dc32a642..acc8e4b591 100644
--- a/source/op/matmul_fitnet_nvnmd.cc
+++ b/source/op/matmul_fitnet_nvnmd.cc
@@ -160,7 +160,7 @@ class MatmulFitnetNvnmdOp : public OpKernel {
         s = floor(s * prec * precx) * div_precx;
         y[ii * K + kk] = s;
       }  // loop xx
-    }    // loop kk
+    }  // loop kk
 
   }  // Compute
 
diff --git a/source/op/matmul_flt2fix_nvnmd.cc b/source/op/matmul_flt2fix_nvnmd.cc
index ab823a829d..10cfb3d3ba 100644
--- a/source/op/matmul_flt2fix_nvnmd.cc
+++ b/source/op/matmul_flt2fix_nvnmd.cc
@@ -138,9 +138,9 @@ class MatmulFlt2fixNvnmdOp : public OpKernel {
           ufi.nint &= FLT_MASK;
           y[hh * N * K + ii * K + kk] = ufi.nflt;
         }  // loop jj
-      }    // loop ii
-    }      // loop hh
-  }        // Compute
+      }  // loop ii
+    }  // loop hh
+  }  // Compute
 
  private:
   int nbit;
diff --git a/source/op/matmul_flt_nvnmd.cc b/source/op/matmul_flt_nvnmd.cc
index 92b6375100..22ed23c0a3 100644
--- a/source/op/matmul_flt_nvnmd.cc
+++ b/source/op/matmul_flt_nvnmd.cc
@@ -188,9 +188,9 @@ class MatmulFltNvnmdOp : public OpKernel {
           ufi3.nint &= FLT_MASK;
           y[hh * N * K + ii * K + kk] = ufi3.nflt;
         }  // loop kk
-      }    // loop ii
-    }      // loop hh
-  }        // Compute
+      }  // loop ii
+    }  // loop hh
+  }  // Compute
 
  private:
   int normx;
diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index 6a70f60a96..50267df556 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -191,7 +191,7 @@ class TabulateFusionSeAOp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 3"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(4);  // TODO: be careful here;
+    descriptor_shape.AddDim(4);  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
@@ -390,7 +390,7 @@ class TabulateFusionSeAttenOp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 2"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(4);  // TODO: be careful here;
+    descriptor_shape.AddDim(4);  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
@@ -786,8 +786,7 @@ class TabulateFusionSeROp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 2"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(
-        em_tensor.shape().dim_size(1));  // TODO: be careful here;
+    descriptor_shape.AddDim(em_tensor.shape().dim_size(1));  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
diff --git a/source/op/tanh4_flt_nvnmd.cc b/source/op/tanh4_flt_nvnmd.cc
index 987013a5e6..3351a366e4 100644
--- a/source/op/tanh4_flt_nvnmd.cc
+++ b/source/op/tanh4_flt_nvnmd.cc
@@ -117,8 +117,8 @@ class Tanh4FltNvnmdOp : public OpKernel {
         y = floor(y * prechi) / prechi;
         ys(ii, jj) = (x < 0) ? (-y) : y;
       }  // loop jj
-    }    // loop ii
-  }      // Compute
+    }  // loop ii
+  }  // Compute
 
   //- define the private variable for calculation
 };  // Tanh4FltNvnmd
diff --git a/source/tests/common/dpmodel/case_single_frame_with_nlist.py b/source/tests/common/dpmodel/case_single_frame_with_nlist.py
index c260a18527..828e090cad 100644
--- a/source/tests/common/dpmodel/case_single_frame_with_nlist.py
+++ b/source/tests/common/dpmodel/case_single_frame_with_nlist.py
@@ -72,3 +72,53 @@ def setUp(self):
         nlist1 = inv_perm[nlist1]
         nlist1 = np.where(mask, -1, nlist1)
         self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+
+
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
diff --git a/source/tests/common/dpmodel/test_dp_atomic_model.py b/source/tests/common/dpmodel/test_dp_atomic_model.py
index ac49280b82..c69de6161d 100644
--- a/source/tests/common/dpmodel/test_dp_atomic_model.py
+++ b/source/tests/common/dpmodel/test_dp_atomic_model.py
@@ -16,6 +16,7 @@
 
 from .case_single_frame_with_nlist import (
     TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithNlistWithVirtual,
 )
 
 
@@ -92,10 +93,8 @@ def test_excl_consistency(self):
 
             # check output def
             out_names = [vv.name for vv in md0.atomic_output_def().get_data().values()]
-            if atom_excl == []:
-                self.assertEqual(out_names, ["energy"])
-            else:
-                self.assertEqual(out_names, ["energy", "mask"])
+            self.assertEqual(out_names, ["energy", "mask"])
+            if atom_excl != []:
                 for ii in md0.atomic_output_def().get_data().values():
                     if ii.name == "mask":
                         self.assertEqual(ii.shape, [1])
@@ -115,3 +114,49 @@ def test_excl_consistency(self):
                 np.testing.assert_array_equal(ret0["mask"], expected)
             else:
                 raise ValueError(f"not expected atom_excl {atom_excl}")
+
+
+class TestDPAtomicModelVirtualConsistency(unittest.TestCase):
+    def setUp(self):
+        self.case0 = TestCaseSingleFrameWithNlist()
+        self.case1 = TestCaseSingleFrameWithNlistWithVirtual()
+        self.case0.setUp()
+        self.case1.setUp()
+
+    def test_virtual_consistency(self):
+        nf, _, _ = self.case0.nlist.shape
+        ds = DescrptSeA(
+            self.case0.rcut,
+            self.case0.rcut_smth,
+            self.case0.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.case0.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md1 = DPAtomicModel(ds, ft, type_map=type_map)
+
+        args0 = [self.case0.coord_ext, self.case0.atype_ext, self.case0.nlist]
+        # args0 = [np.array(ii) for ii in args0]
+        args1 = [self.case1.coord_ext, self.case1.atype_ext, self.case1.nlist]
+        # args1 = [np.array(ii) for ii in args1]
+
+        ret0 = md1.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+
+        for dd in range(self.case0.nf):
+            np.testing.assert_allclose(
+                ret0["energy"][dd],
+                ret1["energy"][dd, self.case1.get_real_mapping[dd], :],
+            )
+        expected_mask = np.array(
+            [
+                [1, 0, 1, 1],
+                [1, 1, 0, 1],
+            ]
+        )
+        np.testing.assert_equal(ret1["mask"], expected_mask)
diff --git a/source/tests/common/dpmodel/test_dp_model.py b/source/tests/common/dpmodel/test_dp_model.py
index c3de1f4cdf..9121c7cd07 100644
--- a/source/tests/common/dpmodel/test_dp_model.py
+++ b/source/tests/common/dpmodel/test_dp_model.py
@@ -87,7 +87,10 @@ def test_prec_consistency(self):
                 self.assertEqual(model_l_ret_32[ii].dtype, np.float64)
             else:
                 self.assertEqual(model_l_ret_32[ii].dtype, np.float32)
-            self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, np.int32)
             np.testing.assert_allclose(
                 model_l_ret_32[ii],
                 model_l_ret_64[ii],
@@ -138,8 +141,10 @@ def test_prec_consistency(self):
                 self.assertEqual(model_l_ret_32[ii].dtype, np.float64)
             else:
                 self.assertEqual(model_l_ret_32[ii].dtype, np.float32)
-            self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
-            self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, np.int32)
             np.testing.assert_allclose(
                 model_l_ret_32[ii],
                 model_l_ret_64[ii],
diff --git a/source/tests/common/dpmodel/test_nlist.py b/source/tests/common/dpmodel/test_nlist.py
index 35145cde39..ee8a7139e7 100644
--- a/source/tests/common/dpmodel/test_nlist.py
+++ b/source/tests/common/dpmodel/test_nlist.py
@@ -125,12 +125,12 @@ def test_nlist_lt(self):
 class TestNeighList(unittest.TestCase):
     def setUp(self):
         self.nf = 3
-        self.nloc = 2
+        self.nloc = 3
         self.ns = 5 * 5 * 3
         self.nall = self.ns * self.nloc
         self.cell = np.array([[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype)
-        self.icoord = np.array([[0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype)
-        self.atype = np.array([0, 1], dtype=np.int32)
+        self.icoord = np.array([[0, 0, 0], [0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype)
+        self.atype = np.array([-1, 0, 1], dtype=np.int32)
         [self.cell, self.icoord, self.atype] = [
             np.expand_dims(ii, 0) for ii in [self.cell, self.icoord, self.atype]
         ]
@@ -144,8 +144,9 @@ def setUp(self):
         self.nsel = [10, 10]
         self.ref_nlist = np.array(
             [
-                [0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1],
-                [0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1],
+                [-1] * sum(self.nsel),
+                [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1],
+                [1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1],
             ]
         )
 
@@ -269,7 +270,7 @@ def test_extend_coord(self):
         )
         np.testing.assert_allclose(
             cc,
-            np.array([30, 30, 30, 30, 30], dtype=np.int32),
+            np.array([self.ns * self.nloc // 5] * 5, dtype=np.int32),
             rtol=self.prec,
             atol=self.prec,
         )
@@ -282,7 +283,7 @@ def test_extend_coord(self):
         )
         np.testing.assert_allclose(
             cc,
-            np.array([30, 30, 30, 30, 30], dtype=np.int32),
+            np.array([self.ns * self.nloc // 5] * 5, dtype=np.int32),
             rtol=self.prec,
             atol=self.prec,
         )
@@ -295,7 +296,7 @@ def test_extend_coord(self):
         )
         np.testing.assert_allclose(
             cc,
-            np.array([50, 50, 50], dtype=np.int32),
+            np.array([self.ns * self.nloc // 3] * 3, dtype=np.int32),
             rtol=self.prec,
             atol=self.prec,
         )
diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py
index 1ec4cef3a5..91bb9c0174 100644
--- a/source/tests/common/test_examples.py
+++ b/source/tests/common/test_examples.py
@@ -2,6 +2,7 @@
 """This module ensures input in the examples directory
 could pass the argument checking.
 """
+
 import unittest
 from pathlib import (
     Path,
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
index 622e2ed3cf..cbcb987c89 100644
--- a/source/tests/consistent/common.py
+++ b/source/tests/consistent/common.py
@@ -252,11 +252,16 @@ def test_tf_consistent_with_ref(self):
         tf_obj = self.tf_class.deserialize(data1, suffix=self.unique_id)
         ret2, data2 = self.get_tf_ret_serialization_from_cls(tf_obj)
         ret2 = self.extract_ret(ret2, self.RefBackend.TF)
-        if tf_obj.__class__.__name__.startswith(("Polar", "Dipole")):
+        if tf_obj.__class__.__name__.startswith(("Polar", "Dipole", "DOS")):
             # tf, pt serialization mismatch
             common_keys = set(data1.keys()) & set(data2.keys())
             data1 = {k: data1[k] for k in common_keys}
             data2 = {k: data2[k] for k in common_keys}
+
+        # not comparing version
+        data1.pop("@version")
+        data2.pop("@version")
+
         np.testing.assert_equal(data1, data2)
         for rr1, rr2 in zip(ret1, ret2):
             np.testing.assert_allclose(
@@ -326,7 +331,7 @@ def test_pt_consistent_with_ref(self):
         ret2 = self.eval_pt(obj)
         ret2 = self.extract_ret(ret2, self.RefBackend.PT)
         data2 = obj.serialize()
-        if obj.__class__.__name__.startswith(("Polar", "Dipole")):
+        if obj.__class__.__name__.startswith(("Polar", "Dipole", "DOS")):
             # tf, pt serialization mismatch
             common_keys = set(data1.keys()) & set(data2.keys())
             data1 = {k: data1[k] for k in common_keys}
diff --git a/source/tests/consistent/fitting/test_dos.py b/source/tests/consistent/fitting/test_dos.py
new file mode 100644
index 0000000000..2832d67641
--- /dev/null
+++ b/source/tests/consistent/fitting/test_dos.py
@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.fitting.dos_fitting import DOSFittingNet as DOSFittingDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    FittingTest,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.task.dos import DOSFittingNet as DOSFittingPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    DOSFittingPT = object
+if INSTALLED_TF:
+    from deepmd.tf.fit.dos import DOSFitting as DOSFittingTF
+else:
+    DOSFittingTF = object
+from deepmd.utils.argcheck import (
+    fitting_dos,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float64", "float32"),  # precision
+    (True, False),  # mixed_types
+    (0, 1),  # numb_fparam
+    (10, 20),  # numb_dos
+)
+class TestDOS(CommonTest, FittingTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return {
+            "neuron": [5, 5, 5],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "numb_fparam": numb_fparam,
+            "seed": 20240217,
+            "numb_dos": numb_dos,
+        }
+
+    @property
+    def skip_tf(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        # TODO: mixed_types
+        return mixed_types or CommonTest.skip_pt
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    tf_class = DOSFittingTF
+    dp_class = DOSFittingDP
+    pt_class = DOSFittingPT
+    args = fitting_dos()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.inputs = np.ones((1, 6, 20), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # inconsistent if not sorted
+        self.atype.sort()
+        self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return {
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.inputs.shape[-1],
+            "mixed_types": mixed_types,
+        }
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return self.build_tf_fitting(
+            obj,
+            self.inputs.ravel(),
+            self.natoms,
+            self.atype,
+            self.fparam if numb_fparam else None,
+            suffix,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return (
+            pt_obj(
+                torch.from_numpy(self.inputs).to(device=PT_DEVICE),
+                torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
+                fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
+                if numb_fparam
+                else None,
+            )["dos"]
+            .detach()
+            .cpu()
+            .numpy()
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return dp_obj(
+            self.inputs,
+            self.atype.reshape(1, -1),
+            fparam=self.fparam if numb_fparam else None,
+        )["dos"]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        if backend == self.RefBackend.TF:
+            # shape is not same
+            ret = ret[0].reshape(-1, self.natoms[0], 1)
+        return (ret,)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/model/test_frozen.py b/source/tests/consistent/model/test_frozen.py
new file mode 100644
index 0000000000..a60a6abb3f
--- /dev/null
+++ b/source/tests/consistent/model/test_frozen.py
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    ModelTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.model import BaseModel as FrozenModelPT
+
+else:
+    FrozenModelPT = None
+if INSTALLED_TF:
+    from deepmd.tf.model.model import Model as FrozenModelTF
+else:
+    FrozenModelTF = None
+from pathlib import (
+    Path,
+)
+
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.utils.argcheck import (
+    model_args,
+)
+
+original_model = str(Path(__file__).parent.parent.parent / "infer" / "deeppot.dp")
+pt_model = "deeppot_for_consistent_frozen.pth"
+tf_model = "deeppot_for_consistent_frozen.pb"
+dp_model = original_model
+
+
+def setUpModule():
+    convert_backend(
+        INPUT=dp_model,
+        OUTPUT=tf_model,
+    )
+    convert_backend(
+        INPUT=dp_model,
+        OUTPUT=pt_model,
+    )
+
+
+def tearDownModule():
+    for model_file in (pt_model, tf_model):
+        try:
+            os.remove(model_file)
+        except FileNotFoundError:
+            pass
+
+
+@parameterized((pt_model, tf_model, dp_model))
+class TestFrozen(CommonTest, ModelTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (model_file,) = self.param
+        if not INSTALLED_PT and model_file.endswith(".pth"):
+            raise unittest.SkipTest("PyTorch is not installed")
+        if not INSTALLED_TF and model_file.endswith(".pb"):
+            raise unittest.SkipTest("TensorFlow is not installed")
+        return {
+            "type": "frozen",
+            "model_file": model_file,
+        }
+
+    tf_class = FrozenModelTF
+    dp_class = None
+    pt_class = FrozenModelPT
+    args = model_args()
+
+    def skip_dp(self):
+        return True
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, -1, 3)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32).reshape(1, -1)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, 9)
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+
+        # TF requires the atype to be sort
+        idx_map = np.argsort(self.atype.ravel())
+        self.atype = self.atype[:, idx_map]
+        self.coords = self.coords[:, idx_map]
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_model(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_model(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_model(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        # shape not matched. ravel...
+        if backend is self.RefBackend.DP:
+            return (ret["energy_redu"].ravel(), ret["energy"].ravel())
+        elif backend is self.RefBackend.PT:
+            return (ret["energy"].ravel(), ret["atom_energy"].ravel())
+        elif backend is self.RefBackend.TF:
+            return (ret[0].ravel(), ret[1].ravel())
+        raise ValueError(f"Unknown backend: {backend}")
diff --git a/source/tests/infer/deeppot.dp b/source/tests/infer/deeppot.dp
new file mode 100644
index 0000000000..2f7d9e3f6f
Binary files /dev/null and b/source/tests/infer/deeppot.dp differ
diff --git a/source/tests/pt/dos/data/set.000/atom_dos.npy b/source/tests/pt/dos/data/set.000/atom_dos.npy
new file mode 100644
index 0000000000..22809c1068
Binary files /dev/null and b/source/tests/pt/dos/data/set.000/atom_dos.npy differ
diff --git a/source/tests/pt/dos/data/set.000/box.npy b/source/tests/pt/dos/data/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/source/tests/pt/dos/data/set.000/box.npy differ
diff --git a/source/tests/pt/dos/data/set.000/coord.npy b/source/tests/pt/dos/data/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/source/tests/pt/dos/data/set.000/coord.npy differ
diff --git a/source/tests/pt/dos/data/set.000/dos.npy b/source/tests/pt/dos/data/set.000/dos.npy
new file mode 100644
index 0000000000..904b23e709
Binary files /dev/null and b/source/tests/pt/dos/data/set.000/dos.npy differ
diff --git a/source/tests/pt/dos/data/type.raw b/source/tests/pt/dos/data/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/source/tests/pt/dos/data/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pt/dos/data/type_map.raw b/source/tests/pt/dos/data/type_map.raw
new file mode 100644
index 0000000000..a9edc74f38
--- /dev/null
+++ b/source/tests/pt/dos/data/type_map.raw
@@ -0,0 +1 @@
+H
diff --git a/source/tests/pt/dos/input.json b/source/tests/pt/dos/input.json
new file mode 100644
index 0000000000..f9330003be
--- /dev/null
+++ b/source/tests/pt/dos/input.json
@@ -0,0 +1,80 @@
+{
+  "model": {
+    "type_map": [
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        90
+      ],
+      "rcut_smth": 1.8,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "precision": "float64",
+      "seed": 1
+    },
+    "fitting_net": {
+      "type": "dos",
+      "numb_dos": 250,
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 0,
+      "precision": "float64",
+      "seed": 1
+    }
+  },
+  "loss": {
+    "type": "dos",
+    "start_pref_dos": 0.0,
+    "limit_pref_dos": 0.0,
+    "start_pref_cdf": 0.0,
+    "limit_pref_cdf": 0.0,
+    "start_pref_ados": 1.0,
+    "limit_pref_ados": 1.0,
+    "start_pref_acdf": 0.0,
+    "limit_pref_acdf": 0.0
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "stop_lr": 1e-08
+  },
+  "training": {
+    "stop_batch": 100000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json",
+    "training_data": {
+      "systems": [
+        "pt/dos/data/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
+    },
+    "validation_data": {
+      "systems": [
+        "pt/dos/data/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
+    }
+  },
+  "_comment1": "that's all"
+}
diff --git a/source/tests/pt/model/test_dp_atomic_model.py b/source/tests/pt/model/test_dp_atomic_model.py
index 6daaeef2ef..4a35b4676a 100644
--- a/source/tests/pt/model/test_dp_atomic_model.py
+++ b/source/tests/pt/model/test_dp_atomic_model.py
@@ -27,6 +27,7 @@
 
 from .test_env_mat import (
     TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithNlistWithVirtual,
 )
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
@@ -166,10 +167,8 @@ def test_excl_consistency(self):
 
             # check output def
             out_names = [vv.name for vv in md0.atomic_output_def().get_data().values()]
-            if atom_excl == []:
-                self.assertEqual(out_names, ["energy"])
-            else:
-                self.assertEqual(out_names, ["energy", "mask"])
+            self.assertEqual(out_names, ["energy", "mask"])
+            if atom_excl != []:
                 for ii in md0.atomic_output_def().get_data().values():
                     if ii.name == "mask":
                         self.assertEqual(ii.shape, [1])
@@ -189,3 +188,49 @@ def test_excl_consistency(self):
                 np.testing.assert_array_equal(to_numpy_array(ret0["mask"]), expected)
             else:
                 raise ValueError(f"not expected atom_excl {atom_excl}")
+
+
+class TestDPAtomicModelVirtualConsistency(unittest.TestCase):
+    def setUp(self):
+        self.case0 = TestCaseSingleFrameWithNlist()
+        self.case1 = TestCaseSingleFrameWithNlistWithVirtual()
+        self.case0.setUp()
+        self.case1.setUp()
+
+    def test_virtual_consistency(self):
+        nf, _, _ = self.case0.nlist.shape
+        ds = DescrptSeA(
+            self.case0.rcut,
+            self.case0.rcut_smth,
+            self.case0.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.case0.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md1 = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+
+        args0 = [self.case0.coord_ext, self.case0.atype_ext, self.case0.nlist]
+        args0 = [to_torch_tensor(ii) for ii in args0]
+        args1 = [self.case1.coord_ext, self.case1.atype_ext, self.case1.nlist]
+        args1 = [to_torch_tensor(ii) for ii in args1]
+
+        ret0 = md1.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+
+        for dd in range(self.case0.nf):
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"])[dd],
+                to_numpy_array(ret1["energy"])[dd, self.case1.get_real_mapping[dd], :],
+            )
+        expected_mask = np.array(
+            [
+                [1, 0, 1, 1],
+                [1, 1, 0, 1],
+            ]
+        )
+        np.testing.assert_equal(to_numpy_array(ret1["mask"]), expected_mask)
diff --git a/source/tests/pt/model/test_dp_model.py b/source/tests/pt/model/test_dp_model.py
index c0b152b3d3..7470cf96d0 100644
--- a/source/tests/pt/model/test_dp_model.py
+++ b/source/tests/pt/model/test_dp_model.py
@@ -237,7 +237,10 @@ def test_prec_consistency(self):
                 self.assertEqual(model_l_ret_32[ii].dtype, torch.float64)
             else:
                 self.assertEqual(model_l_ret_32[ii].dtype, torch.float32)
-            self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.int32)
             np.testing.assert_allclose(
                 to_numpy_array(model_l_ret_32[ii]),
                 to_numpy_array(model_l_ret_64[ii]),
@@ -377,7 +380,10 @@ def test_prec_consistency(self):
                 self.assertEqual(model_l_ret_32[ii].dtype, torch.float64)
             else:
                 self.assertEqual(model_l_ret_32[ii].dtype, torch.float32)
-            self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.int32)
             np.testing.assert_allclose(
                 to_numpy_array(model_l_ret_32[ii]),
                 to_numpy_array(model_l_ret_64[ii]),
diff --git a/source/tests/pt/model/test_ener_fitting.py b/source/tests/pt/model/test_ener_fitting.py
index 69bd4b42a3..f63e17c2fa 100644
--- a/source/tests/pt/model/test_ener_fitting.py
+++ b/source/tests/pt/model/test_ener_fitting.py
@@ -44,12 +44,13 @@ def test_consistency(
         )
         atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
 
-        for od, mixed_types, nfp, nap, et in itertools.product(
+        for od, mixed_types, nfp, nap, et, nn in itertools.product(
             [1, 3],
             [True, False],
             [0, 3],
             [0, 4],
             [[], [0], [1]],
+            [[4, 4, 4], []],
         ):
             ft0 = InvarFitting(
                 "foo",
@@ -60,6 +61,7 @@ def test_consistency(
                 numb_aparam=nap,
                 mixed_types=mixed_types,
                 exclude_types=et,
+                neuron=nn,
             ).to(env.DEVICE)
             ft1 = DPInvarFitting.deserialize(ft0.serialize())
             ft2 = InvarFitting.deserialize(ft0.serialize())
diff --git a/source/tests/pt/model/test_env_mat.py b/source/tests/pt/model/test_env_mat.py
index 615e7c6230..e18093b2f1 100644
--- a/source/tests/pt/model/test_env_mat.py
+++ b/source/tests/pt/model/test_env_mat.py
@@ -64,6 +64,56 @@ def setUp(self):
         self.atol = 1e-12
 
 
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
+
+
 class TestCaseSingleFrameWithoutNlist:
     def setUp(self):
         # nloc == 3, nall == 4
diff --git a/source/tests/pt/model/test_linear_atomic_model.py b/source/tests/pt/model/test_linear_atomic_model.py
index adc682a41f..7f24ffdc53 100644
--- a/source/tests/pt/model/test_linear_atomic_model.py
+++ b/source/tests/pt/model/test_linear_atomic_model.py
@@ -178,11 +178,13 @@ def test_self_consistency(self):
 
     def test_jit(self):
         md1 = torch.jit.script(self.md1)
-        self.assertEqual(md1.get_rcut(), self.rcut)
-        self.assertEqual(md1.get_type_map(), ["foo", "bar"])
+        # atomic model no more export methods
+        # self.assertEqual(md1.get_rcut(), self.rcut)
+        # self.assertEqual(md1.get_type_map(), ["foo", "bar"])
         md3 = torch.jit.script(self.md3)
-        self.assertEqual(md3.get_rcut(), self.rcut)
-        self.assertEqual(md3.get_type_map(), ["foo", "bar"])
+        # atomic model no more export methods
+        # self.assertEqual(md3.get_rcut(), self.rcut)
+        # self.assertEqual(md3.get_type_map(), ["foo", "bar"])
 
 
 class TestRemmapMethod(unittest.TestCase):
diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py
index f42c11aa4c..aa1c0dd969 100644
--- a/source/tests/pt/model/test_model.py
+++ b/source/tests/pt/model/test_model.py
@@ -60,13 +60,13 @@
 
 def torch2tf(torch_name, last_layer_id=None):
     fields = torch_name.split(".")
-    offset = int(fields[2] == "networks")
+    offset = int(fields[3] == "networks") + 1
     element_id = int(fields[2 + offset])
-    if fields[0] == "descriptor":
+    if fields[1] == "descriptor":
         layer_id = int(fields[4 + offset]) + 1
         weight_type = fields[5 + offset]
         ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
-    elif fields[0] == "fitting_net":
+    elif fields[1] == "fitting_net":
         layer_id = int(fields[4 + offset])
         weight_type = fields[5 + offset]
         if layer_id != last_layer_id:
@@ -301,7 +301,7 @@ def test_consistency(self):
         )
 
         # Keep statistics consistency between 2 implentations
-        my_em = my_model.descriptor
+        my_em = my_model.get_descriptor()
         mean = stat_dict["descriptor.mean"].reshape([self.ntypes, my_em.get_nsel(), 4])
         stddev = stat_dict["descriptor.stddev"].reshape(
             [self.ntypes, my_em.get_nsel(), 4]
@@ -310,7 +310,7 @@ def test_consistency(self):
             torch.tensor(mean, device=DEVICE),
             torch.tensor(stddev, device=DEVICE),
         )
-        my_model.fitting_net.bias_atom_e = torch.tensor(
+        my_model.get_fitting_net().bias_atom_e = torch.tensor(
             stat_dict["fitting_net.bias_atom_e"], device=DEVICE
         )
 
@@ -338,34 +338,33 @@ def test_consistency(self):
         batch["natoms"] = torch.tensor(
             batch["natoms_vec"], device=batch["coord"].device
         ).unsqueeze(0)
-        model_predict = my_model(
-            batch["coord"].to(env.DEVICE),
-            batch["atype"].to(env.DEVICE),
-            batch["box"].to(env.DEVICE),
-            do_atomic_virial=True,
-        )
-        model_predict_1 = my_model(
-            batch["coord"].to(env.DEVICE),
-            batch["atype"].to(env.DEVICE),
-            batch["box"].to(env.DEVICE),
-            do_atomic_virial=False,
+        model_input = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": True,
+        }
+        model_input_1 = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": False,
+        }
+        label = {
+            "energy": batch["energy"].to(env.DEVICE),
+            "force": batch["force"].to(env.DEVICE),
+        }
+        cur_lr = my_lr.value(self.wanted_step)
+        model_predict, loss, _ = my_loss(
+            model_input, my_model, label, int(batch["natoms"][0, 0]), cur_lr
         )
+        model_predict_1 = my_model(**model_input_1)
         p_energy, p_force, p_virial, p_atomic_virial = (
             model_predict["energy"],
             model_predict["force"],
             model_predict["virial"],
             model_predict["atom_virial"],
         )
-        cur_lr = my_lr.value(self.wanted_step)
-        model_pred = {
-            "energy": p_energy,
-            "force": p_force,
-        }
-        label = {
-            "energy": batch["energy"].to(env.DEVICE),
-            "force": batch["force"].to(env.DEVICE),
-        }
-        loss, _ = my_loss(model_pred, label, int(batch["natoms"][0, 0]), cur_lr)
         np.testing.assert_allclose(
             head_dict["energy"], p_energy.view(-1).cpu().detach().numpy()
         )
diff --git a/source/tests/pt/model/test_nlist.py b/source/tests/pt/model/test_nlist.py
index 616af93081..244b3804c8 100644
--- a/source/tests/pt/model/test_nlist.py
+++ b/source/tests/pt/model/test_nlist.py
@@ -22,16 +22,16 @@
 class TestNeighList(unittest.TestCase):
     def setUp(self):
         self.nf = 3
-        self.nloc = 2
+        self.nloc = 3
         self.ns = 5 * 5 * 3
         self.nall = self.ns * self.nloc
         self.cell = torch.tensor(
             [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, device=env.DEVICE
         )
         self.icoord = torch.tensor(
-            [[0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype, device=env.DEVICE
+            [[0, 0, 0], [0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype, device=env.DEVICE
         )
-        self.atype = torch.tensor([0, 1], dtype=torch.int, device=env.DEVICE)
+        self.atype = torch.tensor([-1, 0, 1], dtype=torch.int, device=env.DEVICE)
         [self.cell, self.icoord, self.atype] = [
             ii.unsqueeze(0) for ii in [self.cell, self.icoord, self.atype]
         ]
@@ -51,8 +51,9 @@ def setUp(self):
         #   mapping[0], type_split=True, )
         self.ref_nlist = torch.tensor(
             [
-                [0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1],
-                [0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1],
+                [-1] * sum(self.nsel),
+                [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1],
+                [1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1],
             ],
             device=env.DEVICE,
         )
@@ -181,7 +182,9 @@ def test_extend_coord(self):
         )
         torch.testing.assert_close(
             cc,
-            torch.tensor([30, 30, 30, 30, 30], dtype=torch.long, device=env.DEVICE),
+            torch.tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=torch.long, device=env.DEVICE
+            ),
             rtol=self.prec,
             atol=self.prec,
         )
@@ -194,7 +197,9 @@ def test_extend_coord(self):
         )
         torch.testing.assert_close(
             cc,
-            torch.tensor([30, 30, 30, 30, 30], dtype=torch.long, device=env.DEVICE),
+            torch.tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=torch.long, device=env.DEVICE
+            ),
             rtol=self.prec,
             atol=self.prec,
         )
@@ -207,7 +212,9 @@ def test_extend_coord(self):
         )
         torch.testing.assert_close(
             cc,
-            torch.tensor([50, 50, 50], dtype=torch.long, device=env.DEVICE),
+            torch.tensor(
+                [self.ns * self.nloc // 3] * 3, dtype=torch.long, device=env.DEVICE
+            ),
             rtol=self.prec,
             atol=self.prec,
         )
diff --git a/source/tests/pt/model/test_null_input.py b/source/tests/pt/model/test_null_input.py
index c8f4307d52..d5cf2475fb 100644
--- a/source/tests/pt/model/test_null_input.py
+++ b/source/tests/pt/model/test_null_input.py
@@ -125,7 +125,6 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("FAILED at the moment")
 class TestEnergyModelZBL(unittest.TestCase, NullTest):
     def setUp(self):
         model_params = copy.deepcopy(model_zbl)
diff --git a/source/tests/pt/model/test_pairtab_atomic_model.py b/source/tests/pt/model/test_pairtab_atomic_model.py
index 322de51a2c..165e3dead7 100644
--- a/source/tests/pt/model/test_pairtab_atomic_model.py
+++ b/source/tests/pt/model/test_pairtab_atomic_model.py
@@ -98,8 +98,9 @@ def test_with_mask(self):
 
     def test_jit(self):
         model = torch.jit.script(self.model)
-        self.assertEqual(model.get_rcut(), 0.02)
-        self.assertEqual(model.get_type_map(), ["H", "O"])
+        # atomic model no more export methods
+        # self.assertEqual(model.get_rcut(), 0.02)
+        # self.assertEqual(model.get_type_map(), ["H", "O"])
 
     def test_deserialize(self):
         model1 = PairTabAtomicModel.deserialize(self.model.serialize())
@@ -121,8 +122,9 @@ def test_deserialize(self):
         )
 
         model1 = torch.jit.script(model1)
-        self.assertEqual(model1.get_rcut(), 0.02)
-        self.assertEqual(model1.get_type_map(), ["H", "O"])
+        # atomic model no more export methods
+        # self.assertEqual(model1.get_rcut(), 0.02)
+        # self.assertEqual(model1.get_type_map(), ["H", "O"])
 
     def test_cross_deserialize(self):
         model_dict = self.model.serialize()  # pytorch model to dict
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
index 8ec5c375fd..3d9a4df11e 100644
--- a/source/tests/pt/model/test_permutation.py
+++ b/source/tests/pt/model/test_permutation.py
@@ -36,6 +36,28 @@
     "data_stat_nbatch": 20,
 }
 
+model_dos = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+        "type": "dos",
+        "numb_dos": 5,
+    },
+    "data_stat_nbatch": 20,
+}
+
 model_zbl = {
     "type_map": ["O", "H", "B"],
     "use_srtab": "source/tests/pt/model/water/data/zbl_tab_potential/H2O_tab_potential.txt",
@@ -278,6 +300,13 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
+class TestDOSModelSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
 class TestEnergyModelDPA1(unittest.TestCase, PermutationTest):
     def setUp(self):
         model_params = copy.deepcopy(model_dpa1)
diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py
new file mode 100644
index 0000000000..ca3b037011
--- /dev/null
+++ b/source/tests/pt/model/test_polar_stat.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.task.polarizability import (
+    PolarFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.tf.fit.polar import (
+    PolarFittingSeA,
+)
+
+
+class TestConsistency(unittest.TestCase):
+    def setUp(self) -> None:
+        types = torch.randint(0, 4, (1, 5), device=env.DEVICE)
+        types = torch.cat((types, types, types), dim=0)
+        types[:, -1] = 3
+        ntypes = 4
+        atomic_polarizability = torch.rand((3, 5, 9), device=env.DEVICE)
+        polarizability = torch.rand((3, 9), device=env.DEVICE)
+        find_polarizability = torch.rand(1, device=env.DEVICE)
+        find_atomic_polarizability = torch.rand(1, device=env.DEVICE)
+        self.sampled = [
+            {
+                "type": types,
+                "find_atomic_polarizability": find_atomic_polarizability,
+                "atomic_polarizability": atomic_polarizability,
+                "polarizability": polarizability,
+                "find_polarizability": find_polarizability,
+            }
+        ]
+        self.all_stat = {
+            k: [v.numpy(force=True)] for d in self.sampled for k, v in d.items()
+        }
+        self.tfpolar = PolarFittingSeA(
+            ntypes=ntypes,
+            dim_descrpt=1,
+            embedding_width=1,
+            sel_type=list(range(ntypes)),
+        )
+        self.ptpolar = PolarFittingNet(
+            ntypes=ntypes,
+            dim_descrpt=1,
+            embedding_width=1,
+        )
+
+    def test_atomic_consistency(self):
+        self.tfpolar.compute_output_stats(self.all_stat)
+        tfbias = self.tfpolar.constant_matrix
+        self.ptpolar.compute_output_stats(self.sampled)
+        ptbias = self.ptpolar.constant_matrix
+        np.testing.assert_allclose(tfbias, to_numpy_array(ptbias))
+
+    def test_global_consistency(self):
+        self.sampled[0]["find_atomic_polarizability"] = -1
+        self.sampled[0]["polarizability"] = self.sampled[0][
+            "atomic_polarizability"
+        ].sum(dim=1)
+        self.all_stat["find_atomic_polarizability"] = [-1]
+        self.all_stat["polarizability"] = [
+            self.all_stat["atomic_polarizability"][0].sum(axis=1)
+        ]
+        self.tfpolar.compute_output_stats(self.all_stat)
+        tfbias = self.tfpolar.constant_matrix
+        self.ptpolar.compute_output_stats(self.sampled)
+        ptbias = self.ptpolar.constant_matrix
+        np.testing.assert_allclose(tfbias, to_numpy_array(ptbias), rtol=1e-5, atol=1e-5)
diff --git a/source/tests/pt/model/test_rot.py b/source/tests/pt/model/test_rot.py
index a12bd063b4..cbf09ecf40 100644
--- a/source/tests/pt/model/test_rot.py
+++ b/source/tests/pt/model/test_rot.py
@@ -15,6 +15,7 @@
 )
 
 from .test_permutation import (  # model_dpau,
+    model_dos,
     model_dpa1,
     model_dpa2,
     model_hybrid,
@@ -139,6 +140,13 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
+class TestDOSModelSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
 class TestEnergyModelDPA1(unittest.TestCase, RotTest):
     def setUp(self):
         model_params = copy.deepcopy(model_dpa1)
diff --git a/source/tests/pt/model/test_smooth.py b/source/tests/pt/model/test_smooth.py
index 86e9ed94d7..4f5be912cf 100644
--- a/source/tests/pt/model/test_smooth.py
+++ b/source/tests/pt/model/test_smooth.py
@@ -15,6 +15,7 @@
 )
 
 from .test_permutation import (  # model_dpau,
+    model_dos,
     model_dpa1,
     model_dpa2,
     model_hybrid,
@@ -139,6 +140,14 @@ def setUp(self):
         self.epsilon, self.aprec = None, None
 
 
+class TestDOSModelSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
 # @unittest.skip("dpa-1 not smooth at the moment")
 class TestEnergyModelDPA1(unittest.TestCase, SmoothTest):
     def setUp(self):
diff --git a/source/tests/pt/model/test_trans.py b/source/tests/pt/model/test_trans.py
index 359e91d8c8..a0aeefd6b3 100644
--- a/source/tests/pt/model/test_trans.py
+++ b/source/tests/pt/model/test_trans.py
@@ -15,6 +15,7 @@
 )
 
 from .test_permutation import (  # model_dpau,
+    model_dos,
     model_dpa1,
     model_dpa2,
     model_hybrid,
@@ -83,6 +84,13 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
+class TestDOSModelSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
 class TestEnergyModelDPA1(unittest.TestCase, TransTest):
     def setUp(self):
         model_params = copy.deepcopy(model_dpa1)
diff --git a/source/tests/pt/test_finetune.py b/source/tests/pt/test_finetune.py
index d21a44acc7..dd72eb4718 100644
--- a/source/tests/pt/test_finetune.py
+++ b/source/tests/pt/test_finetune.py
@@ -44,27 +44,29 @@ def test_finetune_change_energy_bias(self):
         else:
             model = get_model(self.model_config)
         if isinstance(model, EnergyModel):
-            model.fitting_net.bias_atom_e = torch.rand_like(
-                model.fitting_net.bias_atom_e
+            model.get_fitting_net().bias_atom_e = torch.rand_like(
+                model.get_fitting_net().bias_atom_e
             )
             energy_bias_before = deepcopy(
-                model.fitting_net.bias_atom_e.detach().cpu().numpy().reshape(-1)
+                model.get_fitting_net().bias_atom_e.detach().cpu().numpy().reshape(-1)
             )
             bias_atom_e_input = deepcopy(
-                model.fitting_net.bias_atom_e.detach().cpu().numpy().reshape(-1)
+                model.get_fitting_net().bias_atom_e.detach().cpu().numpy().reshape(-1)
             )
         elif isinstance(model, DPZBLModel):
-            model.dp_model.fitting_net.bias_atom_e = torch.rand_like(
-                model.dp_model.fitting_net.bias_atom_e
+            model.dp_model.get_fitting_net().bias_atom_e = torch.rand_like(
+                model.dp_model.get_fitting_net().bias_atom_e
             )
             energy_bias_before = deepcopy(
-                model.dp_model.fitting_net.bias_atom_e.detach()
+                model.dp_model.get_fitting_net()
+                .bias_atom_e.detach()
                 .cpu()
                 .numpy()
                 .reshape(-1)
             )
             bias_atom_e_input = deepcopy(
-                model.dp_model.fitting_net.bias_atom_e.detach()
+                model.dp_model.get_fitting_net()
+                .bias_atom_e.detach()
                 .cpu()
                 .numpy()
                 .reshape(-1)
diff --git a/source/tests/pt/test_loss.py b/source/tests/pt/test_loss.py
index dddc9af219..2abb22c2a9 100644
--- a/source/tests/pt/test_loss.py
+++ b/source/tests/pt/test_loss.py
@@ -171,8 +171,13 @@ def test_consistency(self):
             self.start_pref_v,
             self.limit_pref_v,
         )
-        my_loss, my_more_loss = mine(
-            self.model_pred,
+
+        def fake_model():
+            return self.model_pred
+
+        _, my_loss, my_more_loss = mine(
+            {},
+            fake_model,
             self.label,
             self.nloc,
             self.cur_lr,
@@ -345,8 +350,13 @@ def test_consistency(self):
             self.start_pref_fm,
             self.limit_pref_fm,
         )
-        my_loss, my_more_loss = mine(
-            self.model_pred,
+
+        def fake_model():
+            return self.model_pred
+
+        _, my_loss, my_more_loss = mine(
+            {},
+            fake_model,
             self.label,
             self.nloc_tf,  # use tf natoms pref
             self.cur_lr,
diff --git a/source/tests/pt/test_lr.py b/source/tests/pt/test_lr.py
index ca1ec7e490..9fbde599bb 100644
--- a/source/tests/pt/test_lr.py
+++ b/source/tests/pt/test_lr.py
@@ -27,6 +27,7 @@ def test_consistency(self):
                 self.decay_step = decay_step
                 self.stop_step = stop_step
                 self.judge_it()
+                self.decay_rate_pt()
 
     def judge_it(self):
         base_lr = learning_rate.LearningRateExp(
@@ -54,6 +55,52 @@ def judge_it(self):
         self.assertTrue(np.allclose(base_vals, my_vals))
         tf.reset_default_graph()
 
+    def decay_rate_pt(self):
+        my_lr = LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step, self.stop_step
+        )
+
+        default_ds = 100 if self.stop_step // 10 > 100 else self.stop_step // 100 + 1
+        if self.decay_step >= self.stop_step:
+            self.decay_step = default_ds
+        decay_rate = np.exp(
+            np.log(self.stop_lr / self.start_lr) / (self.stop_step / self.decay_step)
+        )
+        my_lr_decay = LearningRateExp(
+            self.start_lr,
+            1e-10,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        min_lr = 1e-5
+        my_lr_decay_trunc = LearningRateExp(
+            self.start_lr,
+            min_lr,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        my_vals = [
+            my_lr.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay = [
+            my_lr_decay.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay_trunc = [
+            my_lr_decay_trunc.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        self.assertTrue(np.allclose(my_vals_decay, my_vals))
+        self.assertTrue(
+            np.allclose(my_vals_decay_trunc, np.clip(my_vals, a_min=min_lr, a_max=None))
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
index d06733b016..e959e9a128 100644
--- a/source/tests/pt/test_multitask.py
+++ b/source/tests/pt/test_multitask.py
@@ -73,24 +73,24 @@ def setUp(self):
         self.stat_files = "se_e2_a"
         os.makedirs(self.stat_files, exist_ok=True)
         self.config = multitask_se_e2_a
-        self.config["training"]["data_dict"]["model_1"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_1"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_1"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_1"
-        self.config["training"]["data_dict"]["model_2"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_2"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_2"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         self.config["model"], self.shared_links = preprocess_shared_params(
@@ -111,24 +111,24 @@ def setUp(self):
         self.stat_files = "DPA1"
         os.makedirs(self.stat_files, exist_ok=True)
         self.config = multitask_DPA1
-        self.config["training"]["data_dict"]["model_1"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_1"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_1"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_1"
-        self.config["training"]["data_dict"]["model_2"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_2"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_2"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         self.config["model"], self.shared_links = preprocess_shared_params(
@@ -149,24 +149,24 @@ def setUp(self):
         self.stat_files = "DPA2"
         os.makedirs(self.stat_files, exist_ok=True)
         self.config = multitask_DPA2
-        self.config["training"]["data_dict"]["model_1"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_1"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_1"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_1"
-        self.config["training"]["data_dict"]["model_2"]["training_data"][
-            "systems"
-        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
         self.config["training"]["data_dict"]["model_2"]["validation_data"][
             "systems"
         ] = data_file
-        self.config["training"]["data_dict"]["model_2"][
-            "stat_file"
-        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         self.config["model"], self.shared_links = preprocess_shared_params(
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index db69a1bcea..a9ba2fd720 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -17,6 +17,7 @@
 )
 
 from .model.test_permutation import (
+    model_dos,
     model_dpa1,
     model_dpa2,
     model_hybrid,
@@ -52,11 +53,11 @@ def test_trainable(self):
             fix_params["model"]["descriptor"]["trainable"] = True
             trainer_fix = get_trainer(fix_params)
             model_dict_before_training = deepcopy(
-                trainer_fix.model.fitting_net.state_dict()
+                trainer_fix.model.get_fitting_net().state_dict()
             )
             trainer_fix.run()
             model_dict_after_training = deepcopy(
-                trainer_fix.model.fitting_net.state_dict()
+                trainer_fix.model.get_fitting_net().state_dict()
             )
         else:
             trainer_fix = get_trainer(fix_params)
@@ -96,6 +97,23 @@ def tearDown(self) -> None:
         DPTrainTest.tearDown(self)
 
 
+@unittest.skip("loss not implemented")
+class TestDOSModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "dos/data/")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
 class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest):
     def setUp(self):
         input_json = str(Path(__file__).parent / "water/zbl.json")
@@ -293,6 +311,7 @@ def setUp(self):
         self.config["model"]["atom_exclude_types"] = [1]
         self.config["model"]["fitting_net"]["type"] = "polar"
         self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         # can not set requires_grad false for all parameters,
@@ -326,6 +345,7 @@ def setUp(self):
         self.config["model"]["atom_exclude_types"] = [1]
         self.config["model"]["fitting_net"]["type"] = "polar"
         self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         # can not set requires_grad false for all parameters,
@@ -359,6 +379,7 @@ def setUp(self):
         self.config["model"]["atom_exclude_types"] = [1]
         self.config["model"]["fitting_net"]["type"] = "polar"
         self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
         # can not set requires_grad false for all parameters,
diff --git a/source/tests/tf/test_fitting_dos.py b/source/tests/tf/test_fitting_dos.py
index a2a54d6287..f9df5fc126 100644
--- a/source/tests/tf/test_fitting_dos.py
+++ b/source/tests/tf/test_fitting_dos.py
@@ -59,7 +59,8 @@ def test_fitting(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         jdata["model"]["fitting_net"].pop("type", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = DOSFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
@@ -189,21 +190,20 @@ def test_fitting(self):
 
         ref_atom_dos_1 = [
             -0.32495014,
-            -0.87979356,
-            -0.26630668,
             -0.32495882,
-            -0.87979767,
-            -0.2663072,
+            -0.32496842,
+            -0.32495892,
+            -0.32495469,
+            -0.32496075,
         ]
         ref_atom_dos_2 = [
-            -0.26630917,
             0.21549911,
-            -0.87979638,
-            -0.26630564,
             0.21550413,
-            -0.87979585,
+            0.21551077,
+            0.21550547,
+            0.21550303,
+            0.21550645,
         ]
         places = 4
-
         np.testing.assert_almost_equal(pred_atom_dos[:, 0], ref_atom_dos_1, places)
         np.testing.assert_almost_equal(pred_atom_dos[:, 50], ref_atom_dos_2, places)
diff --git a/source/tests/tf/test_init_frz_model_multi.py b/source/tests/tf/test_init_frz_model_multi.py
index b723134ca1..b6209a7e69 100644
--- a/source/tests/tf/test_init_frz_model_multi.py
+++ b/source/tests/tf/test_init_frz_model_multi.py
@@ -64,12 +64,12 @@ def _init_models():
     jdata["training"]["data_dict"]["water_ener"] = {}
     jdata["training"]["data_dict"]["water_ener"]["training_data"] = training_data_config
     jdata["training"]["data_dict"]["water_ener"]["training_data"]["systems"] = data_file
-    jdata["training"]["data_dict"]["water_ener"][
-        "validation_data"
-    ] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener"]["validation_data"][
-        "systems"
-    ] = data_file
+    jdata["training"]["data_dict"]["water_ener"]["validation_data"] = (
+        validation_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener"]["validation_data"]["systems"] = (
+        data_file
+    )
     jdata["training"]["save_ckpt"] = ckpt
     jdata["model"]["fitting_net_dict"] = {}
     jdata["model"]["fitting_net_dict"]["water_ener"] = fitting_config
@@ -98,18 +98,18 @@ def _init_models():
     jdata["learning_rate_dict"]["water_ener_new"] = learning_rate_config
     jdata["training"]["data_dict"] = {}
     jdata["training"]["data_dict"]["water_ener_new"] = {}
-    jdata["training"]["data_dict"]["water_ener_new"][
-        "training_data"
-    ] = training_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["training_data"][
-        "systems"
-    ] = data_file
-    jdata["training"]["data_dict"]["water_ener_new"][
-        "validation_data"
-    ] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"][
-        "systems"
-    ] = data_file
+    jdata["training"]["data_dict"]["water_ener_new"]["training_data"] = (
+        training_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["training_data"]["systems"] = (
+        data_file
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"] = (
+        validation_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"]["systems"] = (
+        data_file
+    )
     jdata["training"].pop("fitting_weight")
 
     jdata = replace_model_params_with_frz_multi_model(jdata, frozen_model)
diff --git a/source/tests/tf/test_model_dos.py b/source/tests/tf/test_model_dos.py
index d88c81c332..9c01b14e32 100644
--- a/source/tests/tf/test_model_dos.py
+++ b/source/tests/tf/test_model_dos.py
@@ -66,7 +66,8 @@ def test_model(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         jdata["model"]["fitting_net"].pop("type", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = DOSFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = DOSModel(descrpt, fitting)
 
@@ -123,106 +124,106 @@ def test_model(self):
 
         ref_dos = np.array(
             [
-                -2.98834333,
-                -0.63166985,
-                -3.37199568,
-                -1.88397887,
-                0.87560992,
-                4.85426159,
-                -1.22677731,
-                -0.60918118,
-                8.80472675,
-                -1.12006829,
-                -3.72653765,
-                -3.03698828,
-                3.50906891,
-                5.55140795,
-                -3.34920924,
-                -4.43507641,
-                -6.1729281,
-                -8.34865917,
-                0.14371788,
-                -4.38078479,
-                -6.43141133,
-                4.07791938,
-                7.14102837,
-                -0.52347718,
-                0.82663796,
-                -1.64225631,
-                -4.63088421,
-                3.3910594,
-                -9.09682274,
-                1.61104204,
-                4.45900773,
-                -2.44688559,
-                -2.83298183,
-                -2.00733658,
-                7.33444256,
-                7.09187373,
-                -1.97065392,
-                0.01623084,
-                -7.48861264,
-                -1.17790161,
-                2.77126775,
-                -2.55552037,
-                3.3518257,
-                -0.09316856,
-                -1.94521413,
-                0.50089251,
-                -2.75763233,
-                -1.94382637,
-                1.30562041,
-                5.08351043,
-                -1.90604837,
-                -0.80030045,
-                -4.87093267,
-                4.18009666,
-                -2.9011435,
-                2.58497143,
-                4.47495176,
-                -0.9639419,
-                8.15692179,
-                0.48758731,
-                -0.62264663,
-                -1.70677258,
-                -5.51641378,
-                3.98621565,
-                0.57749944,
-                2.9658081,
-                -4.10467591,
-                -7.14827888,
-                0.02838605,
-                -2.48630333,
-                -4.82178216,
-                -0.7444178,
-                2.48224802,
-                -1.54683936,
-                0.46969412,
-                -0.0960347,
-                -2.08290541,
-                6.357031,
-                -3.49716615,
-                3.28959028,
-                7.83932727,
-                1.51457023,
-                -4.14575033,
-                0.02007839,
-                4.20953773,
-                3.66456664,
-                -4.67441496,
-                -0.13296372,
-                -3.77145766,
-                1.49368976,
-                -2.53627817,
-                -3.14188618,
-                0.24991722,
-                0.8770123,
-                0.16635733,
-                -3.15391098,
-                -3.7733242,
-                -2.25134676,
-                1.00975552,
-                1.38717682,
+                -1.98049388,
+                -4.58033899,
+                -6.95508968,
+                -0.79619016,
+                15.58478599,
+                2.7636959,
+                -2.99147438,
+                -6.94430794,
+                -1.77877141,
+                -4.5000298,
+                -3.12026893,
+                -8.42191319,
+                3.8991195,
+                4.85271854,
+                8.30541908,
+                -1.0435944,
+                -4.42713079,
+                19.70011955,
+                -6.53945284,
+                0.85064846,
+                4.36868488,
+                4.77303801,
+                3.00829128,
+                0.70043584,
+                -7.69047143,
+                -0.0647043,
+                4.56830405,
+                -8.67154404,
+                -4.64015279,
+                -7.62202078,
+                -8.97078455,
+                -5.19685985,
+                -1.66080276,
+                -6.03225716,
+                -4.06780949,
+                -0.53046979,
+                8.3543131,
+                -1.84893576,
+                2.42669245,
+                -4.26357086,
+                -11.33995527,
+                10.98529887,
+                -10.70000829,
+                -4.50179402,
+                -1.34978505,
+                -8.83091676,
+                -11.85324773,
+                -3.6305035,
+                2.89933807,
+                4.65750153,
+                1.25464578,
+                -5.06196944,
+                10.05305042,
+                -1.83868447,
+                -11.57017913,
+                -2.03900316,
+                -3.37235187,
+                -1.37010554,
+                -2.93769471,
+                0.11905709,
+                6.99367431,
+                3.48640865,
+                -4.16242817,
+                4.44778342,
+                -0.98405367,
+                1.81581506,
+                -5.31481686,
+                8.72426364,
+                4.78954098,
+                7.67879332,
+                -5.00417706,
+                0.79717914,
+                -3.20581567,
+                -2.96034568,
+                6.31165294,
+                2.9891188,
+                -12.2013139,
+                -13.67496037,
+                4.77102881,
+                2.71353286,
+                6.83849229,
+                -3.50400312,
+                1.3839428,
+                -5.07550528,
+                -8.5623218,
+                17.64081151,
+                6.46051807,
+                2.89067584,
+                14.23057359,
+                17.85941763,
+                -6.46129295,
+                -3.43602528,
+                -3.13520203,
+                4.45313732,
+                -5.23012576,
+                -2.65929557,
+                -0.66191939,
+                4.47530191,
+                9.33992973,
+                -6.29808733,
             ]
         )
 
@@ -230,104 +231,104 @@ def test_model(self):
             [
                 -0.33019322,
                 -0.76332506,
-                -0.32665648,
-                -0.76601747,
-                -1.16441856,
-                -0.13627609,
                 -1.15916671,
                 -0.13280604,
-                2.60139518,
-                0.44470952,
-                -0.48316771,
-                -1.15926141,
                 2.59680457,
                 0.46049936,
-                -0.29459777,
-                -0.76433726,
-                -0.52091744,
-                -1.39903065,
                 -0.49890317,
                 -1.15747878,
-                0.66585524,
-                0.81804842,
-                1.38592217,
-                -0.18025826,
                 -0.2964021,
                 -0.74953328,
-                -0.7427461,
-                3.27935087,
-                -1.09340192,
-                0.1462458,
                 -0.51982728,
                 -1.40236941,
-                0.73902497,
-                0.79969456,
-                0.50726592,
-                0.11403234,
                 0.64964525,
                 0.8084967,
-                -1.27543102,
-                -0.00571457,
-                0.7748912,
-                -1.42492251,
                 1.38371838,
                 -0.17366078,
-                -0.76119888,
-                -1.26083707,
-                -1.48263244,
-                -0.85698727,
                 -0.7374573,
                 3.28274006,
-                -0.27029769,
-                -1.00478711,
-                -0.67481511,
-                -0.07978058,
                 -1.09001574,
                 0.14173437,
-                1.4092343,
-                -0.31785424,
-                0.40551362,
-                -0.71900495,
                 0.7269307,
                 0.79545851,
-                -1.88407155,
-                1.83983772,
-                -1.78413438,
-                -0.74852344,
                 0.50059876,
                 0.1165872,
-                -0.2139368,
-                -1.44989426,
-                -1.96651281,
-                -0.6031689,
                 -1.28106632,
                 -0.01107711,
-                0.48796663,
-                0.76500912,
-                0.21308153,
-                -0.85297893,
                 0.76139868,
                 -1.44547292,
-                1.68105021,
-                -0.30655702,
-                -1.93123,
-                -0.34294737,
                 -0.77352498,
                 -1.26982082,
-                -0.5562998,
-                -0.22048683,
-                -0.48641512,
-                0.01124872,
                 -1.49597963,
                 -0.86647985,
-                1.17310075,
-                0.59402879,
-                -0.705076,
-                0.72991794,
                 -0.27728806,
                 -1.00542829,
-                -0.16289102,
-                0.29464248,
+                -0.67794229,
+                -0.08898442,
+                1.39205396,
+                -0.30789099,
+                0.40393006,
+                -0.70982912,
+                -1.88961087,
+                1.830906,
+                -1.78326071,
+                -0.75013615,
+                -0.22537904,
+                -1.47257916,
+                -1.9756803,
+                -0.60493323,
+                0.48350014,
+                0.77676571,
+                0.20885468,
+                -0.84351691,
+                1.67501205,
+                -0.30662021,
+                -1.92884376,
+                -0.34021625,
+                -0.56212664,
+                -0.22884438,
+                -0.4891038,
+                0.0199886,
+                1.16506594,
+                0.58068956,
+                -0.69376438,
+                0.74156043,
+                -0.16360848,
+                0.30303168,
+                -0.88639571,
+                1.453683,
+                0.79818052,
+                1.2796414,
+                -0.8335433,
+                0.13359098,
+                -0.53425462,
+                -0.4939294,
+                1.05247266,
+                0.49770575,
+                -2.03320073,
+                -2.27918678,
+                0.79462598,
+                0.45187804,
+                1.13925239,
+                -0.58410808,
+                0.23092918,
+                -0.84611213,
+                -1.42726499,
+                2.93985879,
+                1.07635712,
+                0.48092082,
+                2.37197063,
+                2.97647126,
+                -1.07670667,
+                -0.57300341,
+                -0.52316403,
+                0.74274268,
+                -0.87188274,
+                -0.44279998,
+                -0.11060956,
+                0.74619435,
+                1.55646754,
+                -1.05043903,
             ]
         )
 
diff --git a/source/tests/tf/test_pairwise_dprc.py b/source/tests/tf/test_pairwise_dprc.py
index afe6885542..38b8d8b775 100644
--- a/source/tests/tf/test_pairwise_dprc.py
+++ b/source/tests/tf/test_pairwise_dprc.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test pairwise DPRc features."""
+
 import json
 import unittest
 
diff --git a/source/tests/tf/test_virtual_type.py b/source/tests/tf/test_virtual_type.py
index e9c675fe3a..a3e87a35ed 100644
--- a/source/tests/tf/test_virtual_type.py
+++ b/source/tests/tf/test_virtual_type.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test virtual atomic type."""
+
 import os
 import unittest