diff --git a/.github/labeler.yml b/.github/labeler.yml
index 049c9badff..b0a85679de 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,15 +1,39 @@
 Python:
-- deepmd/**/*
-- deepmd_utils/**/*
-- source/tests/**/*
-Docs: doc/**/*
-Examples: examples/**/*
-Core: source/lib/**/*
-CUDA: source/lib/src/gpu/**/*
-ROCM: source/lib/src/gpu/**/*
-OP: source/op/**/*
-C++: source/api_cc/**/*
-C: source/api_c/**/*
-LAMMPS: source/lmp/**/*
-Gromacs: source/gmx/**/*
-i-Pi: source/ipi/**/*
+- changed-files:
+  - any-glob-to-any-file:
+    - deepmd/**/*
+    - deepmd_utils/**/*
+    - source/tests/**/*
+Docs:
+- changed-files:
+  - any-glob-to-any-file: doc/**/*
+Examples:
+- changed-files:
+  - any-glob-to-any-file: examples/**/*
+Core:
+- changed-files:
+  - any-glob-to-any-file: source/lib/**/*
+CUDA:
+- changed-files:
+  - any-glob-to-any-file: source/lib/src/gpu/**/*
+ROCM:
+- changed-files:
+  - any-glob-to-any-file: source/lib/src/gpu/**/*
+OP:
+- changed-files:
+  - any-glob-to-any-file: source/op/**/*
+C++:
+- changed-files:
+  - any-glob-to-any-file: source/api_cc/**/*
+C:
+- changed-files:
+  - any-glob-to-any-file: source/api_c/**/*
+LAMMPS:
+- changed-files:
+  - any-glob-to-any-file: source/lmp/**/*
+Gromacs:
+- changed-files:
+  - any-glob-to-any-file: source/gmx/**/*
+i-Pi:
+- changed-files:
+  - any-glob-to-any-file: source/ipi/**/*
diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index e6377f4fab..f029517d80 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -21,7 +21,7 @@ jobs:
           dp_variant: clang
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         cache: 'pip'
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index c58a5925bf..e700109cce 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -87,7 +87,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
           python-version: '3.11'
@@ -141,7 +141,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934
+        uses: docker/metadata-action@31cebacef4805868f9ce9a0cb03ee36c32df2ac4
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 
@@ -149,7 +149,7 @@ jobs:
         uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56
         with:
           context: source/install/docker
-          push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }}
+          push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }}
           tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }}
           labels: ${{ steps.meta.outputs.labels }}
           build-args: |
@@ -164,7 +164,7 @@ jobs:
         with:
           name: artifact
           path: dist/packages
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
           python-version: '3.11'
@@ -189,7 +189,7 @@ jobs:
     steps:
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v2
+        uses: actions/deploy-pages@v3
 
   pass:
     name: Pass testing build wheels
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 2c8ba30ba1..877c780f1f 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -9,6 +9,6 @@ jobs:
       pull-requests: write
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/labeler@v4
+    - uses: actions/labeler@v5
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index a98afa7a94..ef6fade8e5 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         cache: 'pip'
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index d8eddaa44f..e74c0abde2 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -18,7 +18,7 @@ jobs:
     - name: Make sudo and git work
       run: apt-get update && apt-get install -y sudo git
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         # cache: 'pip'
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index b6011cb523..1bd78bfae0 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -18,7 +18,7 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python }}
         cache: 'pip'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 64a061dd54..ce83792f10 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,14 +23,14 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.0
     hooks:
     - id: isort
       files: \.py$
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.6
+    rev: v0.1.7
     hooks:
     - id: ruff
       args: ["--fix"]
diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 8636ff30d2..b9c0a81006 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -45,6 +45,8 @@ class DP(Calculator):
     type_dict : Dict[str, int], optional
         mapping of element types and their numbers, best left None and the calculator
         will infer this information from model, by default None
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Examples
     --------
@@ -83,10 +85,11 @@ def __init__(
         model: Union[str, "Path"],
         label: str = "DP",
         type_dict: Optional[Dict[str, int]] = None,
+        neighbor_list=None,
         **kwargs,
     ) -> None:
         Calculator.__init__(self, label=label, **kwargs)
-        self.dp = DeepPotential(str(Path(model).resolve()))
+        self.dp = DeepPotential(str(Path(model).resolve()), neighbor_list=neighbor_list)
         if type_dict:
             self.type_dict = type_dict
         else:
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index b3cfa8eaa4..2b5c54fd5a 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -783,16 +783,16 @@ def _pass_filter(
             type_i = -1
             if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor:
                 inputs_i = descrpt2r4(inputs_i, natoms)
+            self.atype_nloc = tf.reshape(
+                tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
+            )  # when nloc != nall, pass nloc to mask
             if len(self.exclude_types):
-                atype_nloc = tf.reshape(
-                    tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
-                )  # when nloc != nall, pass nloc to mask
                 mask = self.build_type_exclude_mask(
                     self.exclude_types,
                     self.ntypes,
                     self.sel_a,
                     self.ndescrpt,
-                    atype_nloc,
+                    self.atype_nloc,
                     tf.shape(inputs_i)[0],
                 )
                 inputs_i *= mask
@@ -957,7 +957,7 @@ def _filter_lower(
                     extra_embedding_index = self.nei_type_vec
                 else:
                     padding_ntypes = type_embedding.shape[0]
-                    atype_expand = tf.reshape(self.atype, [-1, 1])
+                    atype_expand = tf.reshape(self.atype_nloc, [-1, 1])
                     idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei])
                     idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei])
                     idx = idx_i + idx_j
@@ -1003,13 +1003,6 @@ def _filter_lower(
                             [-1, two_side_type_embedding.shape[-1]],
                         )
 
-                        atype_expand = tf.reshape(self.atype, [-1, 1])
-                        idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei])
-                        idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei])
-                        idx = idx_i + idx_j
-                        index_of_two_side = tf.reshape(idx, [-1])
-                        self.extra_embedding_index = index_of_two_side
-
                         net_output = embedding_net(
                             two_side_type_embedding,
                             self.filter_neuron,
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index c3ed167e14..898205d0cb 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -521,6 +521,11 @@ def build(
                     self.bias_atom_e[type_i] = self.bias_atom_e[type_i]
             self.bias_atom_e = self.bias_atom_e[:ntypes_atom]
 
+        if nvnmd_cfg.enable:
+            # fix the bug: CNN and QNN have different t_bias_atom_e.
+            if "t_bias_atom_e" in nvnmd_cfg.weight.keys():
+                self.bias_atom_e = nvnmd_cfg.weight["t_bias_atom_e"]
+
         with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
             t_dfparam = tf.constant(self.numb_fparam, name="dfparam", dtype=tf.int32)
             t_daparam = tf.constant(self.numb_aparam, name="daparam", dtype=tf.int32)
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index 14d75d0c44..c1071af35c 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -58,6 +58,7 @@ def DeepPotential(
     load_prefix: str = "load",
     default_tf_graph: bool = False,
     input_map: Optional[dict] = None,
+    neighbor_list=None,
 ) -> Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepDOS, DeepWFC]:
     """Factory function that will inialize appropriate potential read from `model_file`.
 
@@ -71,6 +72,8 @@ def DeepPotential(
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Returns
     -------
@@ -97,6 +100,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "dos":
         dp = DeepDOS(
@@ -111,6 +115,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "polar":
         dp = DeepPolar(
@@ -118,6 +123,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "global_polar":
         dp = DeepGlobalPolar(
@@ -125,6 +131,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "wfc":
         dp = DeepWFC(
diff --git a/deepmd/infer/deep_dipole.py b/deepmd/infer/deep_dipole.py
index 6020118135..aba098a9f3 100644
--- a/deepmd/infer/deep_dipole.py
+++ b/deepmd/infer/deep_dipole.py
@@ -27,6 +27,8 @@ class DeepDipole(DeepTensor):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Warnings
     --------
@@ -41,6 +43,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # use this in favor of dict update to move attribute from class to
         # instance namespace
@@ -58,6 +61,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
     def get_dim_fparam(self) -> int:
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 3f5dede1ad..0ca9f21a77 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -45,6 +45,9 @@ class DeepEval:
         as the initial batch size.
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
     """
 
     load_prefix: str  # set by subclass
@@ -56,6 +59,7 @@ def __init__(
         default_tf_graph: bool = False,
         auto_batch_size: Union[bool, int, AutoBatchSize] = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ):
         self.graph = self._load_graph(
             model_file,
@@ -86,6 +90,8 @@ def __init__(
         else:
             raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
 
+        self.neighbor_list = neighbor_list
+
     @property
     @lru_cache(maxsize=None)
     def model_type(self) -> str:
@@ -360,3 +366,92 @@ def eval_typeebd(self) -> np.ndarray:
         t_typeebd = self._get_tensor("t_typeebd:0")
         [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={})
         return typeebd
+
+    def build_neighbor_list(
+        self,
+        coords: np.ndarray,
+        cell: Optional[np.ndarray],
+        atype: np.ndarray,
+        imap: np.ndarray,
+        neighbor_list,
+    ):
+        """Make the mesh with neighbor list for a single frame.
+
+        Parameters
+        ----------
+        coords : np.ndarray
+            The coordinates of atoms. Should be of shape [natoms, 3]
+        cell : Optional[np.ndarray]
+            The cell of the system. Should be of shape [3, 3]
+        atype : np.ndarray
+            The type of atoms. Should be of shape [natoms]
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [natoms]
+        neighbor_list : ase.neighborlist.NewPrimitiveNeighborList
+            ASE neighbor list. The following method or attribute will be
+            used/set: bothways, self_interaction, update, build, first_neigh,
+            pair_second, offset_vec.
+
+        Returns
+        -------
+        natoms_vec : np.ndarray
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: nloc
+            natoms[1]: nall
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc
+        coords : np.ndarray
+            The coordinates of atoms, including ghost atoms. Should be of
+            shape [nframes, nall, 3]
+        atype : np.ndarray
+            The type of atoms, including ghost atoms. Should be of shape [nall]
+        mesh : np.ndarray
+            The mesh in nei_mode=4.
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [nall]
+        ghost_map : np.ndarray
+            The index map of ghost atoms. Should be of shape [nghost]
+        """
+        pbc = np.repeat(cell is not None, 3)
+        cell = cell.reshape(3, 3)
+        positions = coords.reshape(-1, 3)
+        neighbor_list.bothways = True
+        neighbor_list.self_interaction = False
+        if neighbor_list.update(pbc, cell, positions):
+            neighbor_list.build(pbc, cell, positions)
+        first_neigh = neighbor_list.first_neigh.copy()
+        pair_second = neighbor_list.pair_second.copy()
+        offset_vec = neighbor_list.offset_vec.copy()
+        # get out-of-box neighbors
+        out_mask = np.any(offset_vec != 0, axis=1)
+        out_idx = pair_second[out_mask]
+        out_offset = offset_vec[out_mask]
+        out_coords = positions[out_idx] + out_offset.dot(cell)
+        atype = np.array(atype, dtype=int)
+        out_atype = atype[out_idx]
+
+        nloc = positions.shape[0]
+        nghost = out_idx.size
+        all_coords = np.concatenate((positions, out_coords), axis=0)
+        all_atype = np.concatenate((atype, out_atype), axis=0)
+        # convert neighbor indexes
+        ghost_map = pair_second[out_mask]
+        pair_second[out_mask] = np.arange(nloc, nloc + nghost)
+        # get the mesh
+        mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int)
+        mesh[0] = nloc
+        # ilist
+        mesh[16 : 16 + nloc] = np.arange(nloc)
+        # numnei
+        mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1]
+        # jlist
+        mesh[16 + nloc * 2 :] = pair_second
+
+        # natoms_vec
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        natoms_vec[0] = nloc
+        natoms_vec[1] = nloc + nghost
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atype == ii)
+        # imap append ghost atoms
+        imap = np.concatenate((imap, np.arange(nloc, nloc + nghost)))
+        return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map
diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py
index 118f8c98a7..c1f981ef86 100644
--- a/deepmd/infer/deep_polar.py
+++ b/deepmd/infer/deep_polar.py
@@ -30,6 +30,8 @@ class DeepPolar(DeepTensor):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Warnings
     --------
@@ -44,6 +46,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # use this in favor of dict update to move attribute from class to
         # instance namespace
@@ -61,6 +64,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
     def get_dim_fparam(self) -> int:
@@ -83,10 +87,16 @@ class DeepGlobalPolar(DeepTensor):
         The prefix in the load computational graph
     default_tf_graph : bool
         If uses the default tf graph, otherwise build a new tf graph for evaluation
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
     """
 
     def __init__(
-        self, model_file: str, load_prefix: str = "load", default_tf_graph: bool = False
+        self,
+        model_file: str,
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        neighbor_list=None,
     ) -> None:
         self.tensors.update(
             {
@@ -101,6 +111,7 @@ def __init__(
             model_file,
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
+            neighbor_list=None,
         )
 
     def eval(
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index fc9a6a76ed..81cfdde7a8 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -51,6 +51,9 @@ class DeepPot(DeepEval):
         as the initial batch size.
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
 
     Examples
     --------
@@ -78,6 +81,7 @@ def __init__(
         default_tf_graph: bool = False,
         auto_batch_size: Union[bool, int, AutoBatchSize] = True,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # add these tensors on top of what is defined by DeepTensor Class
         # use this in favor of dict update to move attribute from class to
@@ -112,6 +116,7 @@ def __init__(
             default_tf_graph=default_tf_graph,
             auto_batch_size=auto_batch_size,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
         # load optional tensors
@@ -479,8 +484,30 @@ def _prepare_feed_dict(
             aparam = np.reshape(aparam, [nframes, natoms * fdim])
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -501,12 +528,12 @@ def _prepare_feed_dict(
             raise RuntimeError
         if self.has_efield:
             feed_dict_test[self.t_efield] = np.reshape(efield, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
         if self.has_fparam:
             feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
         if self.has_aparam:
             feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1])
-        return feed_dict_test, imap, natoms_vec
+        return feed_dict_test, imap, natoms_vec, ghost_map
 
     def _eval_inner(
         self,
@@ -522,10 +549,13 @@ def _eval_inner(
         natoms, nframes = self._get_natoms_and_nframes(
             coords, atom_types, mixed_type=mixed_type
         )
-        feed_dict_test, imap, natoms_vec = self._prepare_feed_dict(
+        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
             coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
         )
 
+        nloc = natoms_vec[0]
+        nall = natoms_vec[1]
+
         t_out = [self.t_energy, self.t_force, self.t_virial]
         if atomic:
             t_out += [self.t_ae, self.t_av]
@@ -548,6 +578,13 @@ def _eval_inner(
             )
         else:
             natoms_real = natoms
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            force = np.reshape(force, [nframes, -1, 3])
+            np.add.at(force[0], ghost_map, force[0, nloc:])
+            if atomic:
+                av = np.reshape(av, [nframes, -1, 9])
+                np.add.at(av[0], ghost_map, av[0, nloc:])
 
         # reverse map of the outputs
         force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap)
@@ -556,11 +593,15 @@ def _eval_inner(
             av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap)
 
         energy = np.reshape(energy, [nframes, 1])
-        force = np.reshape(force, [nframes, natoms, 3])
+        force = np.reshape(force, [nframes, nall, 3])
+        if nloc < nall:
+            force = force[:, :nloc, :]
         virial = np.reshape(virial, [nframes, 9])
         if atomic:
             ae = np.reshape(ae, [nframes, natoms_real, 1])
-            av = np.reshape(av, [nframes, natoms, 9])
+            av = np.reshape(av, [nframes, nall, 9])
+            if nloc < nall:
+                av = av[:, :nloc, :]
             return energy, force, virial, ae, av
         else:
             return energy, force, virial
@@ -640,10 +681,11 @@ def _eval_descriptor_inner(
         natoms, nframes = self._get_natoms_and_nframes(
             coords, atom_types, mixed_type=mixed_type
         )
-        feed_dict_test, imap, natoms_vec = self._prepare_feed_dict(
+        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
             coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
         )
         (descriptor,) = run_sess(
             self.sess, [self.t_descriptor], feed_dict=feed_dict_test
         )
+        imap = imap[:natoms]
         return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py
index 268523e959..a803eb0c6b 100644
--- a/deepmd/infer/deep_tensor.py
+++ b/deepmd/infer/deep_tensor.py
@@ -39,6 +39,8 @@ class DeepTensor(DeepEval):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
     """
 
     tensors: ClassVar[Dict[str, str]] = {
@@ -63,6 +65,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         """Constructor."""
         DeepEval.__init__(
@@ -71,6 +74,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
         # check model type
         model_type = self.tensors["t_tensor"][2:-2]
@@ -209,8 +213,29 @@ def eval(
         )
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                _,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -223,7 +248,7 @@ def eval(
             )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
         feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
 
         if atomic:
             assert (
@@ -333,8 +358,30 @@ def eval_full(
         )
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -347,7 +394,7 @@ def eval_full(
             )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
         feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
 
         t_out = [self.t_global_tensor, self.t_force, self.t_virial]
         if atomic:
@@ -361,21 +408,39 @@ def eval_full(
             at = v_out[3]  # atom tensor
             av = v_out[4]  # atom virial
 
+        nloc = natoms_vec[0]
+        nall = natoms_vec[1]
+
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            force = np.reshape(force, [nframes * nout, -1, 3])
+            # TODO: is there some way not to use for loop?
+            for ii in range(nframes * nout):
+                np.add.at(force[ii], ghost_map, force[ii, nloc:])
+            if atomic:
+                av = np.reshape(av, [nframes * nout, -1, 9])
+                for ii in range(nframes * nout):
+                    np.add.at(av[ii], ghost_map, av[ii, nloc:])
+
         # please note here the shape are wrong!
-        force = self.reverse_map(np.reshape(force, [nframes * nout, natoms, 3]), imap)
+        force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap)
         if atomic:
             at = self.reverse_map(
                 np.reshape(at, [nframes, len(sel_at), nout]), sel_imap
             )
-            av = self.reverse_map(np.reshape(av, [nframes * nout, natoms, 9]), imap)
+            av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap)
 
         # make sure the shapes are correct here
         gt = np.reshape(gt, [nframes, nout])
-        force = np.reshape(force, [nframes, nout, natoms, 3])
+        force = np.reshape(force, [nframes, nout, nall, 3])
+        if nloc < nall:
+            force = force[:, :, :nloc, :]
         virial = np.reshape(virial, [nframes, nout, 9])
         if atomic:
             at = np.reshape(at, [nframes, len(sel_at), self.output_dim])
-            av = np.reshape(av, [nframes, nout, natoms, 9])
+            av = np.reshape(av, [nframes, nout, nall, 9])
+            if nloc < nall:
+                av = av[:, :, :nloc, :]
             return gt, force, virial, at, av
         else:
             return gt, force, virial
diff --git a/deepmd/loss/dos.py b/deepmd/loss/dos.py
index fa30552486..7d38f2b17a 100644
--- a/deepmd/loss/dos.py
+++ b/deepmd/loss/dos.py
@@ -143,16 +143,20 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_dos:
             l2_loss += atom_norm_ener * (pref_dos * l2_dos_loss)
-            more_loss["l2_dos_loss"] = l2_dos_loss
+            more_loss["l2_dos_loss"] = self.display_if_exist(l2_dos_loss, find_dos)
         if self.has_cdf:
             l2_loss += atom_norm_ener * (pref_cdf * l2_cdf_loss)
-            more_loss["l2_cdf_loss"] = l2_cdf_loss
+            more_loss["l2_cdf_loss"] = self.display_if_exist(l2_cdf_loss, find_dos)
         if self.has_ados:
             l2_loss += global_cvt_2_ener_float(pref_ados * l2_atom_dos_loss)
-            more_loss["l2_atom_dos_loss"] = l2_atom_dos_loss
+            more_loss["l2_atom_dos_loss"] = self.display_if_exist(
+                l2_atom_dos_loss, find_atom_dos
+            )
         if self.has_acdf:
             l2_loss += global_cvt_2_ener_float(pref_acdf * l2_atom_cdf_loss)
-            more_loss["l2_atom_cdf_loss"] = l2_atom_cdf_loss
+            more_loss["l2_atom_cdf_loss"] = self.display_if_exist(
+                l2_atom_cdf_loss, find_atom_dos
+            )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py
index 95997bad10..d7f83f09e5 100644
--- a/deepmd/loss/ener.py
+++ b/deepmd/loss/ener.py
@@ -291,22 +291,32 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_e:
             l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-            more_loss["l2_ener_loss"] = l2_ener_loss
+            more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
         if self.has_f:
             l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss)
-            more_loss["l2_force_loss"] = l2_force_loss
+            more_loss["l2_force_loss"] = self.display_if_exist(
+                l2_force_loss, find_force
+            )
         if self.has_v:
             l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-            more_loss["l2_virial_loss"] = l2_virial_loss
+            more_loss["l2_virial_loss"] = self.display_if_exist(
+                l2_virial_loss, find_virial
+            )
         if self.has_ae:
             l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-            more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss
+            more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+                l2_atom_ener_loss, find_atom_ener
+            )
         if self.has_pf:
             l2_loss += global_cvt_2_ener_float(pref_pf * l2_pref_force_loss)
-            more_loss["l2_pref_force_loss"] = l2_pref_force_loss
+            more_loss["l2_pref_force_loss"] = self.display_if_exist(
+                l2_pref_force_loss, find_atom_pref
+            )
         if self.has_gf:
             l2_loss += global_cvt_2_ener_float(pref_gf * l2_gen_force_loss)
-            more_loss["l2_gen_force_loss"] = l2_gen_force_loss
+            more_loss["l2_gen_force_loss"] = self.display_if_exist(
+                l2_gen_force_loss, find_drdq
+            )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
@@ -553,19 +563,25 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_e:
             l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-        more_loss["l2_ener_loss"] = l2_ener_loss
+        more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
         if self.has_fr:
             l2_loss += global_cvt_2_ener_float(pref_fr * l2_force_r_loss)
-        more_loss["l2_force_r_loss"] = l2_force_r_loss
+        more_loss["l2_force_r_loss"] = self.display_if_exist(
+            l2_force_r_loss, find_force
+        )
         if self.has_fm:
             l2_loss += global_cvt_2_ener_float(pref_fm * l2_force_m_loss)
-        more_loss["l2_force_m_loss"] = l2_force_m_loss
+        more_loss["l2_force_m_loss"] = self.display_if_exist(
+            l2_force_m_loss, find_force
+        )
         if self.has_v:
             l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-        more_loss["l2_virial_loss"] = l2_virial_loss
+        more_loss["l2_virial_loss"] = self.display_if_exist(l2_virial_loss, find_virial)
         if self.has_ae:
             l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-        more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss
+        more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+            l2_atom_ener_loss, find_atom_ener
+        )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss", tf.sqrt(l2_loss))
@@ -785,8 +801,10 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
         l2_loss += global_cvt_2_ener_float(pref_ed * l2_ener_dipole_loss)
-        more_loss["l2_ener_loss"] = l2_ener_loss
-        more_loss["l2_ener_dipole_loss"] = l2_ener_dipole_loss
+        more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
+        more_loss["l2_ener_dipole_loss"] = self.display_if_exist(
+            l2_ener_dipole_loss, find_ener_dipole
+        )
 
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
         self.l2_loss_ener_summary = tf.summary.scalar(
diff --git a/deepmd/loss/loss.py b/deepmd/loss/loss.py
index 9324077691..a719a08d81 100644
--- a/deepmd/loss/loss.py
+++ b/deepmd/loss/loss.py
@@ -8,6 +8,8 @@
     Tuple,
 )
 
+import numpy as np
+
 from deepmd.env import (
     tf,
 )
@@ -72,3 +74,20 @@ def eval(
             A dictionary that maps keys to values. It
             should contain key `natoms`
         """
+
+    @staticmethod
+    def display_if_exist(loss: tf.Tensor, find_property: float) -> tf.Tensor:
+        """Display NaN if labeled property is not found.
+
+        Parameters
+        ----------
+        loss : tf.Tensor
+            the loss tensor
+        find_property : float
+            whether the property is found
+        """
+        return tf.cond(
+            tf.cast(find_property, tf.bool),
+            lambda: loss,
+            lambda: tf.cast(np.nan, dtype=loss.dtype),
+        )
diff --git a/deepmd/loss/tensor.py b/deepmd/loss/tensor.py
index 74eb2b74dc..a40f95a18e 100644
--- a/deepmd/loss/tensor.py
+++ b/deepmd/loss/tensor.py
@@ -87,7 +87,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
             local_loss = global_cvt_2_tf_float(find_atomic) * tf.reduce_mean(
                 tf.square(self.scale * (polar - atomic_polar_hat)), name="l2_" + suffix
             )
-            more_loss["local_loss"] = local_loss
+            more_loss["local_loss"] = self.display_if_exist(local_loss, find_atomic)
             l2_loss += self.local_weight * local_loss
             self.l2_loss_local_summary = tf.summary.scalar(
                 "l2_local_loss_" + suffix, tf.sqrt(more_loss["local_loss"])
@@ -118,7 +118,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
                 tf.square(self.scale * (global_polar - polar_hat)), name="l2_" + suffix
             )
 
-            more_loss["global_loss"] = global_loss
+            more_loss["global_loss"] = self.display_if_exist(global_loss, find_global)
             self.l2_loss_global_summary = tf.summary.scalar(
                 "l2_global_loss_" + suffix,
                 tf.sqrt(more_loss["global_loss"]) / global_cvt_2_tf_float(atoms),
diff --git a/deepmd/nvnmd/data/data.py b/deepmd/nvnmd/data/data.py
index 29c8b84a37..9e6dd4cc89 100644
--- a/deepmd/nvnmd/data/data.py
+++ b/deepmd/nvnmd/data/data.py
@@ -60,6 +60,7 @@
     },
     "ctrl": {
         # NSTDM
+        "MAX_NNEI": 128,
         "NSTDM": 64,
         "NSTDM_M1": 32,
         "NSTDM_M2": 2,
@@ -67,6 +68,7 @@
         "NSEL": "NSTDM*NTYPE_MAX",
         "NSADV": "NSTDM+1",
         "VERSION": 0,
+        "SUB_VERSION": 1,
     },
     "nbit": {
         # general
@@ -116,6 +118,22 @@
     "end": "",
 }
 
+# change the configuration accordng to the max_nnei
+jdata_config_v0_ni128 = jdata_config_v0.copy()
+jdata_config_v0_ni256 = jdata_config_v0.copy()
+jdata_config_v0_ni256["ctrl"] = {
+    "MAX_NNEI": 256,
+    "NSTDM": 128,
+    "NSTDM_M1": 32,
+    "NSTDM_M2": 4,
+    "NSTDM_M1X": 8,
+    "NSEL": "NSTDM*NTYPE_MAX",
+    "NSADV": "NSTDM+1",
+    "VERSION": 0,
+    "SUB_VERSION": 1,
+}
+jdata_config_v0_ni256["nbit"]["NBIT_NEIB"] = 9
+
 jdata_config_v1 = {
     "dscp": {
         # basic config from deepmd model
@@ -174,6 +192,7 @@
     },
     "ctrl": {
         # NSTDM
+        "MAX_NNEI": 128,
         "NSTDM": 64,
         "NSTDM_M1": 32,
         "NSTDM_M2": 2,
@@ -181,6 +200,7 @@
         "NSEL": "NSTDM",
         "NSADV": "NSTDM+1",
         "VERSION": 1,
+        "SUB_VERSION": 1,
     },
     "nbit": {
         # general
@@ -230,6 +250,22 @@
     "end": "",
 }
 
+# change the configuration accordng to the max_nnei
+jdata_config_v1_ni128 = jdata_config_v1.copy()
+jdata_config_v1_ni256 = jdata_config_v1.copy()
+jdata_config_v1_ni256["ctrl"] = {
+    "MAX_NNEI": 256,
+    "NSTDM": 128,
+    "NSTDM_M1": 32,
+    "NSTDM_M2": 4,
+    "NSTDM_M1X": 8,
+    "NSEL": "NSTDM",
+    "NSADV": "NSTDM+1",
+    "VERSION": 1,
+    "SUB_VERSION": 1,
+}
+jdata_config_v1_ni256["nbit"]["NBIT_NEIB"] = 9
+
 jdata_deepmd_input_v0 = {
     "model": {
         "descriptor": {
@@ -247,6 +283,7 @@
     },
     "nvnmd": {
         "version": 0,
+        "max_nnei": 128,  # 128 or 256
         "net_size": 128,
         "config_file": "none",
         "weight_file": "none",
@@ -286,6 +323,10 @@
     },
 }
 
+jdata_deepmd_input_v0_ni128 = jdata_deepmd_input_v0.copy()
+jdata_deepmd_input_v0_ni256 = jdata_deepmd_input_v0.copy()
+jdata_deepmd_input_v0_ni256["nvnmd"]["max_nnei"] = 256
+
 jdata_deepmd_input_v1 = {
     "model": {
         "descriptor": {
@@ -308,6 +349,7 @@
     },
     "nvnmd": {
         "version": 1,
+        "max_nnei": 128,  # 128 or 256
         "net_size": 128,
         "config_file": "none",
         "weight_file": "none",
@@ -347,6 +389,10 @@
     },
 }
 
+jdata_deepmd_input_v1_ni128 = jdata_deepmd_input_v1.copy()
+jdata_deepmd_input_v1_ni256 = jdata_deepmd_input_v1.copy()
+jdata_deepmd_input_v1_ni256["nvnmd"]["max_nnei"] = 256
+
 NVNMD_WELCOME = (
     r" _   _  __     __  _   _   __  __   ____  ",
     r"| \ | | \ \   / / | \ | | |  \/  | |  _ \ ",
diff --git a/deepmd/nvnmd/descriptor/se_a.py b/deepmd/nvnmd/descriptor/se_a.py
index 67ea45924b..816f17cfa3 100644
--- a/deepmd/nvnmd/descriptor/se_a.py
+++ b/deepmd/nvnmd/descriptor/se_a.py
@@ -50,12 +50,17 @@ def check_switch_range(davg, dstd):
         else:
             min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
     else:
-        min_dist = rmin
+        min_dist = None
+
+    # fix the bug: if model initial mode is 'init_from_model',
+    # we need dmin to calculate smin and smax in mapt.py
+    if min_dist is not None:
+        nvnmd_cfg.dscp["dmin"] = min_dist
+        nvnmd_cfg.save()
 
     # if davg and dstd is None, the model initial mode is in
     #  'init_from_model', 'restart', 'init_from_frz_model', 'finetune'
     if (davg is not None) and (dstd is not None):
-        nvnmd_cfg.dscp["dmin"] = min_dist
         nvnmd_cfg.get_s_range(davg, dstd)
 
 
diff --git a/deepmd/nvnmd/descriptor/se_atten.py b/deepmd/nvnmd/descriptor/se_atten.py
index 727a93ca45..cfffb8a90b 100644
--- a/deepmd/nvnmd/descriptor/se_atten.py
+++ b/deepmd/nvnmd/descriptor/se_atten.py
@@ -49,7 +49,13 @@ def check_switch_range(davg, dstd):
         else:
             min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
     else:
-        min_dist = rmin
+        min_dist = None
+
+    # fix the bug: if model initial mode is 'init_from_model',
+    # we need dmin to calculate smin and smax in mapt.py
+    if min_dist is not None:
+        nvnmd_cfg.dscp["dmin"] = min_dist
+        nvnmd_cfg.save()
 
     # if davg and dstd is None, the model initial mode is in
     #  'init_from_model', 'restart', 'init_from_frz_model', 'finetune'
@@ -58,7 +64,6 @@ def check_switch_range(davg, dstd):
             davg = np.zeros([ntype, ndescrpt])
         if dstd is None:
             dstd = np.ones([ntype, ndescrpt])
-        nvnmd_cfg.dscp["dmin"] = min_dist
         nvnmd_cfg.get_s_range(davg, dstd)
 
 
diff --git a/deepmd/nvnmd/entrypoints/freeze.py b/deepmd/nvnmd/entrypoints/freeze.py
index 6c356c6118..e56a0c2130 100644
--- a/deepmd/nvnmd/entrypoints/freeze.py
+++ b/deepmd/nvnmd/entrypoints/freeze.py
@@ -52,6 +52,7 @@ def filter_tensorVariableList(tensorVariableList) -> dict:
         p1 = p1 or name.startswith("filter_type_")
         p1 = p1 or name.startswith("layer_")
         p1 = p1 or name.startswith("final_layer")
+        p1 = p1 or name.endswith("t_bias_atom_e")
         p2 = "Adam" not in name
         p3 = "XXX" not in name
         if p1 and p2 and p3:
@@ -75,4 +76,5 @@ def save_weight(sess, file_name: str = "nvnmd/weight.npy"):
     else:
         min_dist = 0.0
     dic_key_value["train_attr.min_nbor_dist"] = min_dist
+    dic_key_value["t_bias_atom_e"] = dic_key_value["fitting_attr.t_bias_atom_e"]
     FioDic().save(file_name, dic_key_value)
diff --git a/deepmd/nvnmd/entrypoints/mapt.py b/deepmd/nvnmd/entrypoints/mapt.py
index eb77913983..1299d7a74e 100644
--- a/deepmd/nvnmd/entrypoints/mapt.py
+++ b/deepmd/nvnmd/entrypoints/mapt.py
@@ -87,9 +87,22 @@ def __init__(self, config_file: str, weight_file: str, map_file: str):
         jdata["weight_file"] = weight_file
         jdata["enable"] = True
 
+        # 0 : xyz_scatter = xyz_scatter * two_embd + xyz_scatter;
+        # Gs + 1, Gt + 0
+        # 1 : xyz_scatter = xyz_scatter * two_embd + two_embd   ;
+        # Gs + 0, Gt + 1
+        self.Gs_Gt_mode = 1
+
         nvnmd_cfg.init_from_jdata(jdata)
 
     def build_map(self):
+        if self.Gs_Gt_mode == 0:
+            self.shift_Gs = 1
+            self.shift_Gt = 0
+        if self.Gs_Gt_mode == 1:
+            self.shift_Gs = 0
+            self.shift_Gt = 1
+        #
         M = nvnmd_cfg.dscp["M1"]
         if nvnmd_cfg.version == 0:
             ndim = nvnmd_cfg.dscp["ntype"]
@@ -482,7 +495,7 @@ def build_s2g_grad(self):
             shift = 0
         if nvnmd_cfg.version == 1:
             ndim = 1
-            shift = 1
+            shift = self.shift_Gs
         #
         dic_ph = {}
         dic_ph["s"] = tf.placeholder(tf.float64, [None, 1], "t_s")
@@ -496,6 +509,13 @@ def run_s2g(self):
         r"""Build s-> graph and run it to get value of mapping table."""
         smin = nvnmd_cfg.dscp["smin"]
         smax = nvnmd_cfg.dscp["smax"]
+        # fix the bug: if model initial mode is 'init_from_model',
+        # we need dmin to calculate smin and smax in mapt.py
+        if smin == -2:
+            davg, dstd = get_normalize(nvnmd_cfg.weight)
+            nvnmd_cfg.get_s_range(davg, dstd)
+            smin = nvnmd_cfg.dscp["smin"]
+            smax = nvnmd_cfg.dscp["smax"]
 
         tf.reset_default_graph()
         dic_ph = self.build_s2g_grad()
@@ -567,9 +587,11 @@ def build_t2g(self):
             two_side_type_embedding,
             [-1, two_side_type_embedding.shape[-1]],
         )
-
+        # see se_atten.py in dp
         wbs = [get_filter_type_weight(nvnmd_cfg.weight, ll) for ll in range(1, 5)]
-        dic_ph["gt"] = self.build_embedding_net(two_side_type_embedding, wbs)
+        dic_ph["gt"] = (
+            self.build_embedding_net(two_side_type_embedding, wbs) + self.shift_Gt
+        )
         return dic_ph
 
     def run_t2g(self):
diff --git a/deepmd/nvnmd/entrypoints/train.py b/deepmd/nvnmd/entrypoints/train.py
index cb3dad0792..6e14b6f865 100644
--- a/deepmd/nvnmd/entrypoints/train.py
+++ b/deepmd/nvnmd/entrypoints/train.py
@@ -100,6 +100,7 @@ def normalized_input_qnn(jdata, PATH_QNN, CONFIG_CNN, WEIGHT_CNN, MAP_CNN):
     jdata_nvnmd = jdata_deepmd_input_v0["nvnmd"]
     jdata_nvnmd["enable"] = True
     jdata_nvnmd["version"] = nvnmd_cfg.version
+    jdata_nvnmd["max_nnei"] = nvnmd_cfg.max_nnei
     jdata_nvnmd["config_file"] = CONFIG_CNN
     jdata_nvnmd["weight_file"] = WEIGHT_CNN
     jdata_nvnmd["map_file"] = MAP_CNN
@@ -117,6 +118,7 @@ def normalized_input_qnn(jdata, PATH_QNN, CONFIG_CNN, WEIGHT_CNN, MAP_CNN):
 def train_nvnmd(
     *,
     INPUT: str,
+    init_model: Optional[str],
     restart: Optional[str],
     step: str,
     skip_neighbor_stat: bool = False,
@@ -142,6 +144,7 @@ def train_nvnmd(
         jdata = jdata_cmd_train.copy()
         jdata["INPUT"] = INPUT_CNN
         jdata["log_path"] = LOG_CNN
+        jdata["init_model"] = init_model
         jdata["restart"] = restart
         jdata["skip_neighbor_stat"] = skip_neighbor_stat
         train(**jdata)
diff --git a/deepmd/nvnmd/entrypoints/wrap.py b/deepmd/nvnmd/entrypoints/wrap.py
index 455dd999df..1ba2ed7384 100644
--- a/deepmd/nvnmd/entrypoints/wrap.py
+++ b/deepmd/nvnmd/entrypoints/wrap.py
@@ -156,33 +156,75 @@ def wrap_head(self, nhs, nws):
         r"""Wrap the head information.
 
         version
+        nhead
         nheight
-        nweight
-        rcut
+        nwidth
+        rcut       cut-off radius
+        ntype      number of atomic species
+        nnei       number of neighbors
+        atom_ener  atom bias energy
         """
         nbit = nvnmd_cfg.nbit
         ctrl = nvnmd_cfg.ctrl
+        dscp = nvnmd_cfg.dscp
+        fitn = nvnmd_cfg.fitn
+        weight = nvnmd_cfg.weight
         VERSION = ctrl["VERSION"]
+        SUB_VERSION = ctrl["SUB_VERSION"]
+        MAX_NNEI = ctrl["MAX_NNEI"]
+        nhead = 128
         NBIT_MODEL_HEAD = nbit["NBIT_MODEL_HEAD"]
         NBIT_FIXD_FL = nbit["NBIT_FIXD_FL"]
-        rcut = nvnmd_cfg.dscp["rcut"]
+        rcut = dscp["rcut"]
+        ntype = dscp["ntype"]
+        SEL = dscp["SEL"]
 
         bs = ""
         e = Encode()
         # version
-        bs = e.dec2bin(VERSION, NBIT_MODEL_HEAD)[0] + bs
+        vv = VERSION + 256 * SUB_VERSION + 256 * 256 * MAX_NNEI
+        bs = e.dec2bin(vv, NBIT_MODEL_HEAD)[0] + bs
+        # nhead
+        bs = e.dec2bin(nhead, NBIT_MODEL_HEAD)[0] + bs
         # height
         for n in nhs:
             bs = e.dec2bin(n, NBIT_MODEL_HEAD)[0] + bs
-        # weight
+        # width
         for n in nws:
             bs = e.dec2bin(n, NBIT_MODEL_HEAD)[0] + bs
-        # dscp
+        # rcut
         RCUT = e.qr(rcut, NBIT_FIXD_FL)
         bs = e.dec2bin(RCUT, NBIT_MODEL_HEAD)[0] + bs
+        # ntype
+        bs = e.dec2bin(ntype, NBIT_MODEL_HEAD)[0] + bs
+        # nnei
+        if VERSION == 0:
+            for tt in range(ntype):
+                bs = e.dec2bin(SEL[tt], NBIT_MODEL_HEAD)[0] + bs
+        if VERSION == 1:
+            bs = e.dec2bin(SEL, NBIT_MODEL_HEAD)[0] + bs
+        # atom_ener
+        # fix the bug: the different energy between qnn and lammps
+        if "t_bias_atom_e" in weight.keys():
+            atom_ener = weight["t_bias_atom_e"]
+        else:
+            atom_ener = [0] * 32
+        nlayer_fit = fitn["nlayer_fit"]
+        if VERSION == 0:
+            for tt in range(ntype):
+                w, b, _idt = get_fitnet_weight(weight, tt, nlayer_fit - 1, nlayer_fit)
+                shift = atom_ener[tt] + b[0]
+                SHIFT = e.qr(shift, NBIT_FIXD_FL)
+                bs = e.dec2bin(SHIFT, NBIT_MODEL_HEAD, signed=True)[0] + bs
+        if VERSION == 1:
+            for tt in range(ntype):
+                w, b, _idt = get_fitnet_weight(weight, 0, nlayer_fit - 1, nlayer_fit)
+                shift = atom_ener[tt] + b[0]
+                SHIFT = e.qr(shift, NBIT_FIXD_FL)
+                bs = e.dec2bin(SHIFT, NBIT_MODEL_HEAD, signed=True)[0] + bs
         # extend
         hs = e.bin2hex(bs)
-        hs = e.extend_hex(hs, NBIT_MODEL_HEAD * 32)
+        hs = e.extend_hex(hs, NBIT_MODEL_HEAD * nhead)
         return hs
 
     def wrap_dscp(self):
diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/nvnmd/utils/argcheck.py
index 2cbff3cbdc..2dc17ebc27 100644
--- a/deepmd/nvnmd/utils/argcheck.py
+++ b/deepmd/nvnmd/utils/argcheck.py
@@ -8,6 +8,7 @@ def nvnmd_args():
     doc_version = (
         "configuration the nvnmd version (0 | 1), 0 for 4 types, 1 for 32 types"
     )
+    doc_max_nnei = "configuration the max number of neighbors, 128|256 for version 0, 128 for version 1"
     doc_net_size_file = (
         "configuration the number of nodes of fitting_net, just can be set as 128"
     )
@@ -25,6 +26,7 @@ def nvnmd_args():
     doc_quantize_fitting_net = "enable the quantizatioin of fitting_net"
     args = [
         Argument("version", int, optional=False, default=0, doc=doc_version),
+        Argument("max_nnei", int, optional=False, default=128, doc=doc_max_nnei),
         Argument("net_size", int, optional=False, default=128, doc=doc_net_size_file),
         Argument("map_file", str, optional=False, default="none", doc=doc_map_file),
         Argument(
diff --git a/deepmd/nvnmd/utils/config.py b/deepmd/nvnmd/utils/config.py
index 96ca74c4c9..5bfd9ea54f 100644
--- a/deepmd/nvnmd/utils/config.py
+++ b/deepmd/nvnmd/utils/config.py
@@ -7,9 +7,15 @@
     NVNMD_CITATION,
     NVNMD_WELCOME,
     jdata_config_v0,
-    jdata_config_v1,
+    jdata_config_v0_ni128,
+    jdata_config_v0_ni256,
+    jdata_config_v1_ni128,
+    jdata_config_v1_ni256,
     jdata_deepmd_input_v0,
-    jdata_deepmd_input_v1,
+    jdata_deepmd_input_v0_ni128,
+    jdata_deepmd_input_v0_ni256,
+    jdata_deepmd_input_v1_ni128,
+    jdata_deepmd_input_v1_ni256,
 )
 from deepmd.nvnmd.utils.fio import (
     FioDic,
@@ -50,6 +56,7 @@ def init_from_jdata(self, jdata: dict = {}):
             return None
 
         self.version = jdata["version"]
+        self.max_nnei = jdata["max_nnei"]
         self.net_size = jdata["net_size"]
         self.map_file = jdata["map_file"]
         self.config_file = jdata["config_file"]
@@ -65,7 +72,7 @@ def init_from_jdata(self, jdata: dict = {}):
             self.map = FioDic().load(self.map_file, {})
             self.weight = FioDic().load(self.weight_file, {})
 
-            self.init_config_by_version(self.version)
+            self.init_config_by_version(self.version, self.max_nnei)
             load_config = FioDic().load(self.config_file, self.config)
             self.init_from_config(load_config)
             # if load the file, set net_size
@@ -106,7 +113,11 @@ def init_from_config(self, jdata):
         r"""Initialize member element one by one."""
         if "ctrl" in jdata.keys():
             if "VERSION" in jdata["ctrl"].keys():
-                self.init_config_by_version(jdata["ctrl"]["VERSION"])
+                if "MAX_NNEI" not in jdata["ctrl"].keys():
+                    jdata["ctrl"]["MAX_NNEI"] = 128
+                self.init_config_by_version(
+                    jdata["ctrl"]["VERSION"], jdata["ctrl"]["MAX_NNEI"]
+                )
         #
         self.config = FioDic().update(jdata, self.config)
         self.config["dscp"] = self.init_dscp(self.config["dscp"], self.config)
@@ -117,16 +128,29 @@ def init_from_config(self, jdata):
         self.config["nbit"] = self.init_nbit(self.config["nbit"], self.config)
         self.init_value()
 
-    def init_config_by_version(self, version):
+    def init_config_by_version(self, version, max_nnei):
         r"""Initialize version-dependent parameters."""
         self.version = version
+        self.max_nnei = max_nnei
         log.debug("#Set nvnmd version as %d " % self.version)
         if self.version == 0:
-            self.jdata_deepmd_input = jdata_deepmd_input_v0.copy()
-            self.config = jdata_config_v0.copy()
+            if self.max_nnei == 128:
+                self.jdata_deepmd_input = jdata_deepmd_input_v0_ni128.copy()
+                self.config = jdata_config_v0_ni128.copy()
+            elif self.max_nnei == 256:
+                self.jdata_deepmd_input = jdata_deepmd_input_v0_ni256.copy()
+                self.config = jdata_config_v0_ni256.copy()
+            else:
+                log.error("The max_nnei only can be set as 128|256 for version 0")
         if self.version == 1:
-            self.jdata_deepmd_input = jdata_deepmd_input_v1.copy()
-            self.config = jdata_config_v1.copy()
+            if self.max_nnei == 128:
+                self.jdata_deepmd_input = jdata_deepmd_input_v1_ni128.copy()
+                self.config = jdata_config_v1_ni128.copy()
+            elif self.max_nnei == 256:
+                self.jdata_deepmd_input = jdata_deepmd_input_v1_ni256.copy()
+                self.config = jdata_config_v1_ni256.copy()
+            else:
+                log.error("The max_nnei only can be set as 128|256 for version 1")
 
     def init_net_size(self):
         r"""Initialize net_size."""
@@ -154,10 +178,15 @@ def init_dscp(self, jdata: dict, jdata_parent: dict = {}) -> dict:
             jdata["M1"] = jdata["neuron"][-1]
             jdata["M2"] = jdata["axis_neuron"]
             jdata["SEL"] = (jdata["sel"] + [0, 0, 0, 0])[0:4]
+            for s in jdata["sel"]:
+                if s > self.max_nnei:
+                    log.error("The sel cannot be greater than the max_nnei")
+                    exit(1)
             jdata["NNODE_FEAS"] = [1] + jdata["neuron"]
             jdata["nlayer_fea"] = len(jdata["neuron"])
             jdata["same_net"] = 1 if jdata["type_one_side"] else 0
             # neighbor
+            jdata["NI"] = self.max_nnei
             jdata["NIDP"] = int(np.sum(jdata["sel"]))
             jdata["NIX"] = 2 ** int(np.ceil(np.log2(jdata["NIDP"] / 1.5)))
             # type
@@ -168,10 +197,14 @@ def init_dscp(self, jdata: dict, jdata_parent: dict = {}) -> dict:
             jdata["M1"] = jdata["neuron"][-1]
             jdata["M2"] = jdata["axis_neuron"]
             jdata["SEL"] = jdata["sel"]
+            if jdata["sel"] > self.max_nnei:
+                log.error("The sel cannot be greater than the max_nnei")
+                exit(1)
             jdata["NNODE_FEAS"] = [1] + jdata["neuron"]
             jdata["nlayer_fea"] = len(jdata["neuron"])
             jdata["same_net"] = 1 if jdata["type_one_side"] else 0
             # neighbor
+            jdata["NI"] = self.max_nnei
             jdata["NIDP"] = int(jdata["sel"])
             jdata["NIX"] = 2 ** int(np.ceil(np.log2(jdata["NIDP"] / 1.5)))
             # type
@@ -306,6 +339,7 @@ def get_nvnmd_jdata(self):
         r"""Generate `nvnmd` in input script."""
         jdata = self.jdata_deepmd_input["nvnmd"]
         jdata["net_size"] = self.net_size
+        jdata["max_nnei"] = self.max_nnei
         jdata["config_file"] = self.config_file
         jdata["weight_file"] = self.weight_file
         jdata["map_file"] = self.map_file
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index bbcb305404..3b81740a93 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -943,6 +943,7 @@ def print_header(fp, train_results, valid_results, multi_task_mode=False):
                     for k in train_results[fitting_key].keys():
                         print_str += prop_fmt % (k + "_trn")
                 print_str += "   %8s\n" % (fitting_key + "_lr")
+        print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n"
         fp.write(print_str)
         fp.flush()
 
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 2b3117d849..fe876a65a5 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -7,8 +7,12 @@
 )
 
 import numpy as np
+from packaging.version import (
+    Version,
+)
 
 from deepmd.env import (
+    TF_VERSION,
     tf,
 )
 from deepmd.utils.errors import (
@@ -59,7 +63,10 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
             self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
         else:
             self.maximum_working_batch_size = initial_batch_size
-            if tf.test.is_gpu_available():
+            if (
+                Version(TF_VERSION) >= Version("1.14")
+                and tf.config.experimental.get_visible_devices("GPU")
+            ) or tf.test.is_gpu_available():
                 self.minimal_not_working_batch_size = 2**31
             else:
                 self.minimal_not_working_batch_size = (
diff --git a/deepmd_utils/main.py b/deepmd_utils/main.py
index c36b09e7e5..3433f0bff9 100644
--- a/deepmd_utils/main.py
+++ b/deepmd_utils/main.py
@@ -552,10 +552,26 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log],
         help="train nvnmd model",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        epilog=textwrap.dedent(
+            """\
+        examples:
+            dp train-nvnmd input_cnn.json -s s1
+            dp train-nvnmd input_qnn.json -s s2
+            dp train-nvnmd input_cnn.json -s s1 --restart model.ckpt
+            dp train-nvnmd input_cnn.json -s s2 --init-model model.ckpt
+        """
+        ),
     )
     parser_train_nvnmd.add_argument(
         "INPUT", help="the input parameter file in json format"
     )
+    parser_train_nvnmd.add_argument(
+        "-i",
+        "--init-model",
+        type=str,
+        default=None,
+        help="Initialize the model by the provided path prefix of checkpoint files.",
+    )
     parser_train_nvnmd.add_argument(
         "-r",
         "--restart",
diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format.py
index 2dfdf278ee..68a6d4045b 100644
--- a/deepmd_utils/model_format.py
+++ b/deepmd_utils/model_format.py
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Native DP model format for multiple backends."""
+"""Native DP model format for multiple backends.
+
+See issue #2982 for more information.
+"""
 import json
 from typing import (
     List,
@@ -114,7 +117,6 @@ def load_dp_model(filename: str) -> dict:
     """
     with h5py.File(filename, "r") as f:
         model_dict = json.loads(f.attrs["json"])
-        print(model_dict)
         model_dict = traverse_model_dict(model_dict, lambda x: f[x][()].copy())
     return model_dict
 
@@ -179,6 +181,16 @@ def __setitem__(self, key, value):
         else:
             raise KeyError(key)
 
+    def __getitem__(self, key):
+        if key in ("w", "matrix"):
+            return self.w
+        elif key in ("b", "bias"):
+            return self.b
+        elif key == "idt":
+            return self.idt
+        else:
+            raise KeyError(key)
+
 
 class NativeNet:
     """Native representation of a neural network.
diff --git a/doc/inference/python.md b/doc/inference/python.md
index 48eb1d7df0..b5d3ca1efc 100644
--- a/doc/inference/python.md
+++ b/doc/inference/python.md
@@ -27,3 +27,20 @@ model_devi = calc_model_devi(coord, cell, atype, graphs)
 ```
 
 Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. Otherwise, tensorFlow will never release the memory and this may lead to an out-of-memory (OOM) error.
+
+## External neighbor list algorithm
+
+The native neighbor list algorithm of the DeePMD-kit is in $O(N^2)$ complexity ($N$ is the number of atoms).
+While this is not a problem for small systems that quantum methods can afford, the large systems for molecular dynamics have slow performance.
+In this case, one may pass an external neighbor list that has lower complexity to {class}`DeepPot <deepmd.infer.DeepPot>`, once it is compatible with {class}`ase.neighborlist.NewPrimitiveNeighborList`.
+
+```py
+import ase.neighborlist
+
+neighbor_list = ase.neighborlist.NewPrimitiveNeighborList(
+    cutoffs=6, bothways=True, self_interaction=False
+)
+dp = DeepPot("graph.pb", neighbor_list=neighbor_list)
+```
+
+The `update` and `build` methods will be called by {class}`DeepPot <deepmd.infer.DeepPot>`, and `first_neigh`, `pair_second`, and `offset_vec` properties will be used.
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index 7a11e3170e..c11fee0bc9 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -60,6 +60,7 @@ The "nvnmd" section is defined as
 ```json
 {
     "version": 0,
+    "max_nnei":128,
     "net_size":128,
     "sel":[60, 60],
     "rcut":6.0,
@@ -73,6 +74,7 @@ where items are defined as:
 | Item      | Mean                        | Optional Value                                |
 | --------- | --------------------------- | --------------------------------------------- |
 | version | the version of network structure | 0 or 1 |
+| max_nnei  | the maximum number of neighbors that do not distinguish element types | 128  or 256 |
 | net_size  | the size of nueral network  | 128                                     |
 | sel       | the number of neighbors     | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer |
 | rcut      | the cutoff radial           | (0, 8.0]                                      |
@@ -187,6 +189,15 @@ You can also restart the CNN training from the path prefix of checkpoint files (
 dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1
 ```
 
+You can also initialize the CNN model and train it by
+
+``` bash
+mv nvnmd_cnn nvnmd_cnn_bck
+cp train_cnn.json train_cnn2.json
+# please edit train_cnn2.json
+dp train-nvnmd train_cnn2.json -s s1 -i nvnmd_cnn_bck/model.ckpt
+```
+
 
 # Testing
 
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 023345d638..150d755795 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -82,6 +82,7 @@ Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Po
 This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit), which can have either double or single float precision interface.
 
 The model deviation evalulates the consistency of the force predictions from multiple models. By default, only the maximal, minimal and average model deviations are output. If the key `atomic` is set, then the model deviation of force prediction of each atom will be output.
+The unit follows [LAMMPS units](#units) and the [scale factor](https://docs.lammps.org/pair_hybrid.html) is not applied.
 
 By default, the model deviation is output in absolute value. If the keyword `relative` is set, then the relative model deviation of the force will be output, including values output by the keyword `atomic`. The relative model deviation of the force on atom $i$ is defined by
 
diff --git a/examples/nvnmd/train/train_cnn.json b/examples/nvnmd/train/train_cnn.json
index c89c8b13d6..1865106909 100644
--- a/examples/nvnmd/train/train_cnn.json
+++ b/examples/nvnmd/train/train_cnn.json
@@ -1,6 +1,7 @@
 {
   "nvnmd": {
     "version": 0,
+    "max_nnei": 128,
     "net_size": 128,
     "sel": [
       60,
diff --git a/examples/nvnmd/train/train_qnn.json b/examples/nvnmd/train/train_qnn.json
index 0235575f52..72b299f70d 100644
--- a/examples/nvnmd/train/train_qnn.json
+++ b/examples/nvnmd/train/train_qnn.json
@@ -1,6 +1,7 @@
 {
   "nvnmd": {
     "version": 0,
+    "max_nnei": 128,
     "net_size": 128,
     "sel": [
       60,
diff --git a/pyproject.toml b/pyproject.toml
index 04bcc69f75..fdd4904eb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 requires = [
     # dynamic metadata API is still unstable
     # TODO: unpin the upper bound when it is stable
-    "scikit-build-core>=0.5,<0.7,!=0.6.0",
+    "scikit-build-core>=0.5,<0.8,!=0.6.0",
     "packaging",
 ]
 build-backend = "backend.dp_backend"
diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index b0c030962a..d05f790bf9 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -1271,6 +1271,13 @@ void DP_SelectMapInt(const int* in,
                      const int nall2,
                      int* out);
 
+/**
+ * @brief Destroy a char array.
+ *
+ * @param c_str The char array.
+ */
+void DP_DeleteChar(const char* c_str);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 90c1c1c918..4a376e0bec 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -35,10 +35,14 @@ struct deepmd_exception : public std::runtime_error {
 /**
  * @brief Check if any exceptions throw in the C++ API. Throw if possible.
  */
-#define DP_CHECK_OK(check_func, dp)     \
-  const char *err_msg = check_func(dp); \
-  if (std::strlen(err_msg))             \
-    throw deepmd::hpp::deepmd_exception(std::string(err_msg));
+#define DP_CHECK_OK(check_func, dp)                   \
+  const char *err_msg = check_func(dp);               \
+  if (std::strlen(err_msg)) {                         \
+    std::string err_msg_str = std::string(err_msg);   \
+    DP_DeleteChar(err_msg);                           \
+    throw deepmd::hpp::deepmd_exception(err_msg_str); \
+  }                                                   \
+  DP_DeleteChar(err_msg);
 
 template <typename FPTYPE>
 inline void _DP_DeepPotCompute(DP_DeepPot *dp,
@@ -1019,7 +1023,7 @@ class DeepPot {
   void get_type_map(std::string &type_map) {
     const char *type_map_c = DP_DeepPotGetTypeMap(dp);
     type_map.assign(type_map_c);
-    delete[] type_map_c;
+    DP_DeleteChar(type_map_c);
   };
   /**
    * @brief Print the summary of DeePMD-kit, including the version and the build
@@ -1864,7 +1868,7 @@ class DeepTensor {
   void get_type_map(std::string &type_map) {
     const char *type_map_c = DP_DeepTensorGetTypeMap(dt);
     type_map.assign(type_map_c);
-    delete[] type_map_c;
+    DP_DeleteChar(type_map_c);
   };
 
  private:
@@ -2009,9 +2013,11 @@ void inline read_file_to_string(std::string model, std::string &file_content) {
   if (size < 0) {
     // negtive size indicates error
     std::string error_message = std::string(c_file_content, -size);
+    DP_DeleteChar(c_file_content);
     throw deepmd::hpp::deepmd_exception(error_message);
   }
   file_content = std::string(c_file_content, size);
+  DP_DeleteChar(c_file_content);
 };
 
 /**
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 9d1ed7d323..935e812cf0 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -1421,4 +1421,6 @@ void DP_SelectMapInt(const int* in,
   }
 }
 
+void DP_DeleteChar(const char* c_str) { delete[] c_str; }
+
 }  // extern "C"
diff --git a/source/api_c/tests/test_deeppot_a.cc b/source/api_c/tests/test_deeppot_a.cc
index 50e8131cc0..63f53e16e9 100644
--- a/source/api_c/tests/test_deeppot_a.cc
+++ b/source/api_c/tests/test_deeppot_a.cc
@@ -172,6 +172,7 @@ TEST_F(TestInferDeepPotA, type_map) {
   const char* type_map = DP_DeepPotGetTypeMap(dp);
   char expected_type_map[] = "O H";
   EXPECT_EQ(strcmp(type_map, expected_type_map), 0);
+  DP_DeleteChar(type_map);
 }
 
 class TestInferDeepPotANoPBC : public ::testing::Test {
diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index 658ec68442..d687c02e75 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -11,7 +11,13 @@ DipoleChargeModifier::DipoleChargeModifier(const std::string& model,
                                            const int& gpu_rank,
                                            const std::string& name_scope_)
     : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  init(model, gpu_rank, name_scope_);
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DipoleChargeModifier::~DipoleChargeModifier() { delete graph_def; };
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 23a0a7e663..018c3aca09 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -404,7 +404,13 @@ DeepPot::DeepPot(const std::string& model,
                  const int& gpu_rank,
                  const std::string& file_content)
     : inited(false), init_nbor(false), graph_def(new GraphDef()) {
-  init(model, gpu_rank, file_content);
+  try {
+    init(model, gpu_rank, file_content);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DeepPot::~DeepPot() { delete graph_def; }
@@ -1236,7 +1242,15 @@ DeepPotModelDevi::DeepPotModelDevi(
     const int& gpu_rank,
     const std::vector<std::string>& file_contents)
     : inited(false), init_nbor(false), numb_models(0) {
-  init(models, gpu_rank, file_contents);
+  try {
+    init(models, gpu_rank, file_contents);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    for (unsigned ii = 0; ii < numb_models; ++ii) {
+      delete graph_defs[ii];
+    }
+    throw;
+  }
 }
 
 DeepPotModelDevi::~DeepPotModelDevi() {
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 30ff99497c..655819e086 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -10,7 +10,13 @@ DeepTensor::DeepTensor(const std::string &model,
                        const int &gpu_rank,
                        const std::string &name_scope_)
     : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  init(model, gpu_rank, name_scope_);
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DeepTensor::~DeepTensor() { delete graph_def; }
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 5994e9446f..33c433a90a 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -1277,10 +1277,10 @@ void deepmd::print_summary(const std::string& pre) {
   deepmd::get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   std::cout << pre << "installed to:       " + global_install_prefix << "\n";
   std::cout << pre << "source:             " + global_git_summ << "\n";
-  std::cout << pre << "source branch:       " + global_git_branch << "\n";
+  std::cout << pre << "source branch:      " + global_git_branch << "\n";
   std::cout << pre << "source commit:      " + global_git_hash << "\n";
   std::cout << pre << "source commit at:   " + global_git_date << "\n";
-  std::cout << pre << "surpport model ver.:" + global_model_version << "\n";
+  std::cout << pre << "support model ver.: " + global_model_version << "\n";
 #if defined(GOOGLE_CUDA)
   std::cout << pre << "build variant:      cuda"
             << "\n";
diff --git a/source/api_cc/tests/test_deepmd_exception.cc b/source/api_cc/tests/test_deepmd_exception.cc
index 1cbec270b5..dd97f2786f 100644
--- a/source/api_cc/tests/test_deepmd_exception.cc
+++ b/source/api_cc/tests/test_deepmd_exception.cc
@@ -10,7 +10,9 @@
 #include <string>
 #include <vector>
 
+#include "DataModifier.h"
 #include "DeepPot.h"
+#include "DeepTensor.h"
 #include "errors.h"
 TEST(TestDeepmdException, deepmdexception) {
   std::string expected_error_message = "DeePMD-kit Error: unittest";
@@ -21,6 +23,22 @@ TEST(TestDeepmdException, deepmdexception) {
   }
 }
 
-TEST(TestDeepmdException, deepmdexception_nofile) {
+TEST(TestDeepmdException, deepmdexception_nofile_deeppot) {
   ASSERT_THROW(deepmd::DeepPot("_no_such_file.pb"), deepmd::deepmd_exception);
 }
+
+TEST(TestDeepmdException, deepmdexception_nofile_deeppotmodeldevi) {
+  ASSERT_THROW(
+      deepmd::DeepPotModelDevi({"_no_such_file.pb", "_no_such_file.pb"}),
+      deepmd::deepmd_exception);
+}
+
+TEST(TestDeepmdException, deepmdexception_nofile_deeptensor) {
+  ASSERT_THROW(deepmd::DeepTensor("_no_such_file.pb"),
+               deepmd::deepmd_exception);
+}
+
+TEST(TestDeepmdException, deepmdexception_nofile_dipolechargemodifier) {
+  ASSERT_THROW(deepmd::DipoleChargeModifier("_no_such_file.pb"),
+               deepmd::deepmd_exception);
+}
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 600c4cae29..533e3538fe 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -394,6 +394,7 @@ PairDeepMD::PairDeepMD(LAMMPS *lmp)
   out_each = 0;
   out_rel = 0;
   out_rel_v = 0;
+  stdf_comm_buff_size = 0;
   eps = 0.;
   eps_v = 0.;
   scale = NULL;
@@ -720,13 +721,11 @@ void PairDeepMD::compute(int eflag, int vflag) {
         }
         double min = numeric_limits<double>::max(), max = 0, avg = 0;
         ana_st(max, min, avg, std_f, nlocal);
-        int all_nlocal = 0;
-        MPI_Reduce(&nlocal, &all_nlocal, 1, MPI_INT, MPI_SUM, 0, world);
         double all_f_min = 0, all_f_max = 0, all_f_avg = 0;
         MPI_Reduce(&min, &all_f_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
         MPI_Reduce(&max, &all_f_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
         MPI_Reduce(&avg, &all_f_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
-        all_f_avg /= double(all_nlocal);
+        all_f_avg /= double(atom->natoms);
         // std v
         std::vector<double> send_v(9 * numb_models);
         std::vector<double> recv_v(9 * numb_models);
@@ -767,22 +766,34 @@ void PairDeepMD::compute(int eflag, int vflag) {
           all_v_avg = sqrt(all_v_avg / 9);
         }
         if (rank == 0) {
-          all_v_max *= scale[1][1] * ener_unit_cvt_factor;
-          all_v_min *= scale[1][1] * ener_unit_cvt_factor;
-          all_v_avg *= scale[1][1] * ener_unit_cvt_factor;
-          all_f_max *= scale[1][1] * force_unit_cvt_factor;
-          all_f_min *= scale[1][1] * force_unit_cvt_factor;
-          all_f_avg *= scale[1][1] * force_unit_cvt_factor;
+          all_v_max *= ener_unit_cvt_factor;
+          all_v_min *= ener_unit_cvt_factor;
+          all_v_avg *= ener_unit_cvt_factor;
+          all_f_max *= force_unit_cvt_factor;
+          all_f_min *= force_unit_cvt_factor;
+          all_f_avg *= force_unit_cvt_factor;
           fp << setw(12) << update->ntimestep << " " << setw(18) << all_v_max
              << " " << setw(18) << all_v_min << " " << setw(18) << all_v_avg
              << " " << setw(18) << all_f_max << " " << setw(18) << all_f_min
              << " " << setw(18) << all_f_avg;
         }
         if (out_each == 1) {
-          vector<double> std_f_all(all_nlocal);
+          vector<double> std_f_all(atom->natoms);
           // Gather std_f and tags
           tagint *tag = atom->tag;
           int nprocs = comm->nprocs;
+          // Grow arrays if necessary
+          if (atom->natoms > stdf_comm_buff_size) {
+            stdf_comm_buff_size = atom->natoms;
+            memory->destroy(stdfsend);
+            memory->destroy(stdfrecv);
+            memory->destroy(tagsend);
+            memory->destroy(tagrecv);
+            memory->create(stdfsend, stdf_comm_buff_size, "deepmd:stdfsendall");
+            memory->create(stdfrecv, stdf_comm_buff_size, "deepmd:stdfrecvall");
+            memory->create(tagsend, stdf_comm_buff_size, "deepmd:tagsendall");
+            memory->create(tagrecv, stdf_comm_buff_size, "deepmd:tagrecvall");
+          }
           for (int ii = 0; ii < nlocal; ii++) {
             tagsend[ii] = tag[ii];
             stdfsend[ii] = std_f[ii];
@@ -797,11 +808,10 @@ void PairDeepMD::compute(int eflag, int vflag) {
           MPI_Gatherv(stdfsend, nlocal, MPI_DOUBLE, stdfrecv, counts,
                       displacements, MPI_DOUBLE, 0, world);
           if (rank == 0) {
-            for (int dd = 0; dd < all_nlocal; ++dd) {
-              std_f_all[tagrecv[dd] - 1] =
-                  stdfrecv[dd] * scale[1][1] * force_unit_cvt_factor;
+            for (int dd = 0; dd < atom->natoms; ++dd) {
+              std_f_all[tagrecv[dd] - 1] = stdfrecv[dd] * force_unit_cvt_factor;
             }
-            for (int dd = 0; dd < all_nlocal; ++dd) {
+            for (int dd = 0; dd < atom->natoms; ++dd) {
               fp << " " << setw(18) << std_f_all[dd];
             }
           }
@@ -1279,6 +1289,9 @@ void PairDeepMD::init_style() {
   if (out_each == 1) {
     int ntotal = atom->natoms;
     int nprocs = comm->nprocs;
+    if (ntotal > stdf_comm_buff_size) {
+      stdf_comm_buff_size = ntotal;
+    }
     memory->create(counts, nprocs, "deepmd:counts");
     memory->create(displacements, nprocs, "deepmd:displacements");
     memory->create(stdfsend, ntotal, "deepmd:stdfsendall");
diff --git a/source/lmp/pair_deepmd.h b/source/lmp/pair_deepmd.h
index 0f704ab45c..cd72dc7b2a 100644
--- a/source/lmp/pair_deepmd.h
+++ b/source/lmp/pair_deepmd.h
@@ -98,6 +98,7 @@ class PairDeepMD : public Pair {
   int out_each;
   int out_rel;
   int out_rel_v;
+  int stdf_comm_buff_size;
   bool single_model;
   bool multi_models_mod_devi;
   bool multi_models_no_mod_devi;
diff --git a/source/op/prod_env_mat_multi_device_nvnmd.cc b/source/op/prod_env_mat_multi_device_nvnmd.cc
index abca947f0a..1cbfb968f1 100644
--- a/source/op/prod_env_mat_multi_device_nvnmd.cc
+++ b/source/op/prod_env_mat_multi_device_nvnmd.cc
@@ -411,6 +411,9 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
       // no pbc
       assert(nloc == nall);
       nei_mode = -1;
+    } else if (mesh_tensor.shape().dim_size(0) > 16) {
+      // pass neighbor list inside the tensor
+      nei_mode = 4;
     } else if (mesh_tensor.shape().dim_size(0) == 7 ||
                mesh_tensor.shape().dim_size(0) == 1) {
       throw deepmd::deepmd_exception(
@@ -422,16 +425,16 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     // Create output tensors
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(int_64(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    descrpt_deriv_shape.AddDim(int_64(nloc) * ndescrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(int_64(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(int_64(nloc) * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
@@ -460,8 +463,16 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
 
+    // must declar out of if, otherwise the memory will be destroyed!
+    Tensor int_temp;
+    Tensor uint64_temp;
+    std::vector<Tensor> tensor_list(7);
+    if (device == "GPU") {
+      // UNDEFINE
+    }
+
     // loop over samples
-    for (int ff = 0; ff < nsamples; ++ff) {
+    for (int_64 ff = 0; ff < nsamples; ++ff) {
       FPTYPE* em = p_em + ff * nloc * ndescrpt;
       FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
       FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
@@ -633,15 +644,18 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    } else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6 ||
+               mesh_tensor.shape().dim_size(0) == 7) {
       // manual copied pbc
-      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    } else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0 ||
+               mesh_tensor.shape().dim_size(0) == 1) {
       // no pbc
-      assert(nloc == nall);
       nei_mode = -1;
+    } else if (mesh_tensor.shape().dim_size(0) > 16) {
+      // pass neighbor list inside the tensor
+      nei_mode = 4;
     } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
@@ -691,6 +705,12 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
                    context->allocate_output(context_output_index++, nmask_shape,
                                             &nmask_tensor));
 
+    Tensor fake_type_tensor;  // all zeros
+    TensorShape fake_type_shape;
+    fake_type_shape.AddDim(nsamples * nall);
+    OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape,
+                                                   &fake_type_tensor));
+
     FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
     FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
     FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
@@ -702,7 +722,25 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
     const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
+    int* p_f_type = fake_type_tensor.flat<int>().data();
+
+    if (device == "GPU") {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+// UNDEFINE
+#endif
+    } else if (device == "CPU") {
+      for (int ii = 0; ii < nsamples * nall; ii++) {
+        p_f_type[ii] = (p_type[ii] < 0) ? -1 : 0;
+      }
+    }
 
+    // must declar out of if, otherwise the memory will be destroyed!
+    Tensor int_temp;
+    Tensor uint64_temp;
+    std::vector<Tensor> tensor_list(7);
+    if (device == "GPU") {
+      // UNDEFINE
+    }
     // loop over samples
     for (int_64 ff = 0; ff < nsamples; ++ff) {
       FPTYPE* em = p_em + ff * nloc * ndescrpt;
@@ -714,6 +752,7 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
       const FPTYPE* coord = p_coord + ff * nall * 3;
       const FPTYPE* box = p_box + ff * 9;
       const int* type = p_type + ff * nall;
+      const int* f_type = p_f_type + ff * nall;
 
       if (device == "GPU") {
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@@ -729,13 +768,6 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
         std::vector<FPTYPE> coord_cpy;
         std::vector<int> type_cpy;
         int frame_nall = nall;
-        std::vector<int> fake_type(nall, 0);
-        for (int ii = 0; ii < nall; ii++) {
-          if (type[ii] < 0) {
-            fake_type[ii] = -1;
-          }
-        }
-        const int* f_type = &fake_type[0];
         // prepare coord and nlist
         _prepare_coord_nlist_cpu<FPTYPE>(
             context, &coord, coord_cpy, &f_type, type_cpy, idx_mapping, inlist,
diff --git a/source/tests/test_deepdipole.py b/source/tests/test_deepdipole.py
index e26ad84a55..1d06b5fe92 100644
--- a/source/tests/test_deepdipole.py
+++ b/source/tests/test_deepdipole.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     finite_difference,
@@ -964,10 +965,6 @@ def test_1frame_full_atm(self):
         gt, ff, vv, at, av = self.dp.eval_full(
             self.coords, self.box, self.atype, atomic=True
         )
-        for dd in at, ff, av:
-            print("\n\n")
-            print(", ".join(f"{ii:.18e}" for ii in dd.reshape(-1)))
-            print("\n\n")
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1035,3 +1032,30 @@ def test_1frame_full_atm_shuffle(self):
         np.testing.assert_almost_equal(
             vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
         )
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepDipoleNewPBCNeighborList(TestDeepDipoleNewPBC):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole_new.pbtxt")),
+            "deepdipole_new.pb",
+        )
+        cls.dp = DeepDipole(
+            "deepdipole_new.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_full_atm(self):
+        pass
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_old_atm(self):
+        pass
diff --git a/source/tests/test_deeppolar.py b/source/tests/test_deeppolar.py
index 271d1650c0..9627851de4 100644
--- a/source/tests/test_deeppolar.py
+++ b/source/tests/test_deeppolar.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     tests_path,
@@ -980,12 +981,6 @@ def test_1frame_full_atm(self):
             self.coords, self.box, self.atype, atomic=True
         )
 
-        # print the values
-        for dd in (at, ff, av):
-            print("\n\n")
-            print(", ".join(f"{i:.18e}" for i in dd.reshape(-1)))
-            print("\n\n")
-
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1088,3 +1083,30 @@ def test_2frame_full_atm(self):
         np.testing.assert_almost_equal(
             vv.reshape([-1]), expected_gv.reshape([-1]), decimal=default_places
         )
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepPolarNewPBCNeighborList(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppolar_new.pbtxt")),
+            "deeppolar_new.pb",
+        )
+        cls.dp = DeepPolar(
+            "deeppolar_new.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_full_atm(self):
+        pass
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_old_atm(self):
+        pass
diff --git a/source/tests/test_deeppot_a.py b/source/tests/test_deeppot_a.py
index 1c6cdc4afc..c229b4302c 100644
--- a/source/tests/test_deeppot_a.py
+++ b/source/tests/test_deeppot_a.py
@@ -3,6 +3,7 @@
 import shutil
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     run_dp,
@@ -1096,3 +1097,25 @@ def test_2frame_atm_all_param(self):
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+
+
+class TestDeepPotAPBCNeighborList(TestDeepPotAPBC):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+        )
+        cls.dp = DeepPot(
+            "deeppot.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_atm(self):
+        pass
+
+    @unittest.skip("Zero atoms not supported")
+    def test_zero_input(self):
+        pass
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
new file mode 100644
index 0000000000..b959ace3f6
--- /dev/null
+++ b/source/tests/test_model_format_utils.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+from copy import (
+    deepcopy,
+)
+
+import numpy as np
+
+from deepmd_utils.model_format import (
+    NativeNet,
+    load_dp_model,
+    save_dp_model,
+)
+
+
+class TestNativeNet(unittest.TestCase):
+    def setUp(self) -> None:
+        self.w = np.full((3, 2), 3.0)
+        self.b = np.full((3,), 4.0)
+
+    def test_serialize(self):
+        network = NativeNet()
+        network[1]["w"] = self.w
+        network[1]["b"] = self.b
+        network[0]["w"] = self.w
+        network[0]["b"] = self.b
+        jdata = network.serialize()
+        np.testing.assert_array_equal(jdata["layers"][0]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][0]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][1]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][1]["b"], self.b)
+
+    def test_deserialize(self):
+        network = NativeNet.deserialize(
+            {
+                "layers": [
+                    {"w": self.w, "b": self.b},
+                    {"w": self.w, "b": self.b},
+                ]
+            }
+        )
+        np.testing.assert_array_equal(network[0]["w"], self.w)
+        np.testing.assert_array_equal(network[0]["b"], self.b)
+        np.testing.assert_array_equal(network[1]["w"], self.w)
+        np.testing.assert_array_equal(network[1]["b"], self.b)
+
+
+class TestDPModel(unittest.TestCase):
+    def setUp(self) -> None:
+        self.w = np.full((3, 2), 3.0)
+        self.b = np.full((3,), 4.0)
+        self.model_dict = {
+            "type": "some_type",
+            "@variables": {
+                "layers": [
+                    {"w": self.w, "b": self.b},
+                    {"w": self.w, "b": self.b},
+                ]
+            },
+        }
+        self.filename = "test_dp_model_format.dp"
+
+    def test_save_load_model(self):
+        save_dp_model(self.filename, deepcopy(self.model_dict))
+        model = load_dp_model(self.filename)
+        np.testing.assert_equal(model["model"], self.model_dict)
+        assert "software" in model
+        assert "version" in model
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.filename):
+            os.remove(self.filename)
diff --git a/source/tests/test_nvnmd_entrypoints.py b/source/tests/test_nvnmd_entrypoints.py
index 3e721516f1..d82c905024 100644
--- a/source/tests/test_nvnmd_entrypoints.py
+++ b/source/tests/test_nvnmd_entrypoints.py
@@ -521,6 +521,7 @@ def test_mapt_cnn_v1(self):
         map_file = str(tests_path / "nvnmd" / "out" / "map_v1_cnn.npy")
         # mapt
         mapObj = MapTable(config_file, weight_file, map_file)
+        mapObj.Gs_Gt_mode = 0
         mapt = mapObj.build_map()
         #
         N = 32
@@ -859,8 +860,9 @@ def test_wrap_qnn_v1(self):
         # test
         data = FioBin().load(jdata["nvnmd_model"])
         idx = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]
+        idx = [i + 128 * 4 for i in idx]
         pred = [data[i] for i in idx]
-        red_dout = [1, 0, 0, 128, 0, 0, 0, 8, 249, 0, 0, 0, 91, 252, 183, 254]
+        red_dout = [249, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 95, 24, 176]
         np.testing.assert_equal(pred, red_dout)
         # close
         nvnmd_cfg.enable = False