From 9a4a4dd1339c900d40deed0420187241fc09c4d9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 30 Oct 2023 02:37:19 -0400
Subject: [PATCH 01/97] support compressing gelu_tf (#2957)

`gelu_tf` is equivalent to `gelu`.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/utils/tabulate.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index d0a167f1dc..ade4d973ce 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -85,7 +85,10 @@ def __init__(
         # functype
         if activation_fn == ACTIVATION_FN_DICT["tanh"]:
             self.functype = 1
-        elif activation_fn == ACTIVATION_FN_DICT["gelu"]:
+        elif activation_fn in (
+            ACTIVATION_FN_DICT["gelu"],
+            ACTIVATION_FN_DICT["gelu_tf"],
+        ):
             self.functype = 2
         elif activation_fn == ACTIVATION_FN_DICT["relu"]:
             self.functype = 3

From 1429e462c3a8f72328988365412f527001459ad2 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 30 Oct 2023 02:57:35 -0400
Subject: [PATCH 02/97] fix SpecifierSet behavior with prereleases (#2959)

By default, SpecifierSet doesn't allow prereleases, which is not our
expected behavior.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 backend/find_tensorflow.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index aa75d5ecb4..6d7ce5087d 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -114,9 +114,9 @@ def get_tf_requirement(tf_version: str = "") -> dict:
 
     extra_requires = []
     extra_select = {}
-    if not (tf_version == "" or tf_version in SpecifierSet(">=2.12")):
+    if not (tf_version == "" or tf_version in SpecifierSet(">=2.12", prereleases=True)):
         extra_requires.append("protobuf<3.20")
-    if tf_version == "" or tf_version in SpecifierSet(">=1.15"):
+    if tf_version == "" or tf_version in SpecifierSet(">=1.15", prereleases=True):
         extra_select["mpi"] = [
             "horovod",
             "mpi4py",
@@ -138,9 +138,9 @@ def get_tf_requirement(tf_version: str = "") -> dict:
             ],
             **extra_select,
         }
-    elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet(
-        ">=2.0,<2.1"
-    ):
+    elif tf_version in SpecifierSet(
+        "<1.15", prereleases=True
+    ) or tf_version in SpecifierSet(">=2.0,<2.1", prereleases=True):
         return {
             "cpu": [
                 f"tensorflow=={tf_version}",

From 5be8fc930d6f4b0ff4085cf4c1fd3472f036a3ed Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 31 Oct 2023 08:58:03 +0800
Subject: [PATCH 03/97] [pre-commit.ci] pre-commit autoupdate (#2961)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.1 →
v0.1.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.1...v0.1.3)
- [github.com/psf/black-pre-commit-mirror: 23.10.0 →
23.10.1](https://github.com/psf/black-pre-commit-mirror/compare/23.10.0...23.10.1)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e168af2c8d..9ccb86f959 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,13 +30,13 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.1
+    rev: v0.1.3
     hooks:
     - id: ruff
       args: ["--fix"]
       exclude: ^source/3rdparty
 -   repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 23.10.0
+    rev: 23.10.1
     hooks:
     -   id: black-jupyter
         exclude: ^source/3rdparty

From 389d403ef97ba374fb4af5f4256c30c2299cdc95 Mon Sep 17 00:00:00 2001
From: Lysithea <52808607+CaRoLZhangxy@users.noreply.github.com>
Date: Tue, 31 Oct 2023 14:55:13 +0800
Subject: [PATCH 04/97] merge prob_sys_size with prob_sys_size;0:nsys:1.0
 (#2963)

to be consistent with Pytorch version

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/utils/data_system.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 69a6cbe112..09dcac2d8d 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -195,8 +195,7 @@ def __init__(
         assert isinstance(self.test_size, (list, np.ndarray))
         assert len(self.test_size) == self.nsystems
 
-        # prob of batch, init pick idx
-        self.prob_nbatches = [float(i) for i in self.nbatches] / np.sum(self.nbatches)
+        # init pick idx
         self.pick_idx = 0
 
         # derive system probabilities
@@ -350,11 +349,13 @@ def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
             if auto_prob_style == "prob_uniform":
                 prob_v = 1.0 / float(self.nsystems)
                 probs = [prob_v for ii in range(self.nsystems)]
-            elif auto_prob_style == "prob_sys_size":
-                probs = self.prob_nbatches
-            elif auto_prob_style[:14] == "prob_sys_size;":
+            elif auto_prob_style[:13] == "prob_sys_size":
+                if auto_prob_style == "prob_sys_size":
+                    prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0"
+                else:
+                    prob_style = auto_prob_style
                 probs = prob_sys_size_ext(
-                    auto_prob_style, self.get_nsystems(), self.nbatches
+                    prob_style, self.get_nsystems(), self.nbatches
                 )
             else:
                 raise RuntimeError("Unknown auto prob style: " + auto_prob_style)

From 1529e72a171eaeaadc4a556706b2eceb50db7cb9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 31 Oct 2023 20:38:32 -0400
Subject: [PATCH 05/97] move to ruff formatter (#2951)

See https://astral.sh/blog/the-ruff-formatter

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml               |  9 ++++-----
 deepmd/entrypoints/train.py           |  4 +---
 deepmd/fit/polar.py                   |  5 +++--
 deepmd/utils/argcheck.py              | 16 ++++------------
 deepmd/utils/finetune.py              | 14 ++++++++------
 deepmd/utils/tabulate.py              |  3 +--
 doc/getting-started/quick_start.ipynb |  3 ++-
 7 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9ccb86f959..ce5cf54d33 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,11 +35,10 @@ repos:
     - id: ruff
       args: ["--fix"]
       exclude: ^source/3rdparty
--   repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 23.10.1
-    hooks:
-    -   id: black-jupyter
-        exclude: ^source/3rdparty
+      types_or: [python, pyi, jupyter]
+    - id: ruff-format
+      exclude: ^source/3rdparty
+      types_or: [python, pyi, jupyter]
 # numpydoc
 -   repo: https://github.com/Carreau/velin
     rev: 0.0.12
diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index 9469b7df90..227aa13644 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -404,9 +404,7 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False):
                 None,
             )
             tmp_data.get_batch()
-            assert (
-                tmp_data.get_type_map()
-            ), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
+            assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
             if train_data is None:
                 train_data = tmp_data
             else:
diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py
index 0a6f7d4242..8f6631866c 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/fit/polar.py
@@ -213,8 +213,9 @@ def compute_input_stats(self, all_stat, protection=1e-2):
                     # add polar_bias
                     polar_bias.append(all_stat["polarizability"][ss].reshape((1, 9)))
 
-            matrix, bias = np.concatenate(sys_matrix, axis=0), np.concatenate(
-                polar_bias, axis=0
+            matrix, bias = (
+                np.concatenate(sys_matrix, axis=0),
+                np.concatenate(polar_bias, axis=0),
             )
             atom_polar, _, _, _ = np.linalg.lstsq(matrix, bias, rcond=None)
             for itype in range(len(self.sel_type)):
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 7104eb1de4..8d09d25577 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1857,12 +1857,8 @@ def normalize_multi_task(data):
             fitting_weight=fitting_weight,
         )
     else:
-        assert (
-            not multi_loss
-        ), "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
-        assert (
-            not multi_learning_rate
-        ), "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
+        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
+        assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
     return data
 
 
@@ -1912,9 +1908,7 @@ def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
     failed_learning_rate_keys = [
         item for item in learning_rate_dict if item not in fitting_keys
     ]
-    assert (
-        not failed_learning_rate_keys
-    ), "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
+    assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
         str(failed_learning_rate_keys), str(list(fitting_keys))
     )
     new_dict = {}
@@ -1970,9 +1964,7 @@ def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
         failed_weight_keys = [
             item for item in fitting_weight if item not in fitting_keys
         ]
-        assert (
-            not failed_weight_keys
-        ), "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
+        assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
             str(failed_weight_keys), str(list(fitting_keys))
         )
         sum_prob = 0.0
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index 4e597b1e05..cc6c0224de 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -41,12 +41,14 @@ def replace_model_params_with_pretrained_model(
     pretrained_jdata = json.loads(t_jdata)
 
     # Check the model type
-    assert pretrained_jdata["model"]["descriptor"]["type"] in [
-        "se_atten",
-        "se_atten_v2",
-    ] and pretrained_jdata["model"]["fitting_net"]["type"] in [
-        "ener"
-    ], "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!"
+    assert (
+        pretrained_jdata["model"]["descriptor"]["type"]
+        in [
+            "se_atten",
+            "se_atten_v2",
+        ]
+        and pretrained_jdata["model"]["fitting_net"]["type"] in ["ener"]
+    ), "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!"
 
     # Check the type map
     pretrained_type_map = pretrained_jdata["model"]["type_map"]
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index ade4d973ce..2b270b1dbc 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -333,8 +333,7 @@ def _build_lower(
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
             tt = np.full((nspline, self.last_layer_size), stride1)
             tt[
-                int((lower - extrapolate * lower) / stride1)
-                + 1 : (
+                int((lower - extrapolate * lower) / stride1) + 1 : (
                     int((lower - extrapolate * lower) / stride1)
                     + int((upper - lower) / stride0)
                 ),
diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb
index 31209ae381..028e56dc6f 100644
--- a/doc/getting-started/quick_start.ipynb
+++ b/doc/getting-started/quick_start.ipynb
@@ -208,7 +208,8 @@
     "print(\"# the data contains %d frames\" % len(data))\n",
     "\n",
     "# random choose 40 index for validation_data\n",
-    "index_validation = np.random.choice(201, size=40, replace=False)\n",
+    "rng = np.random.default_rng()\n",
+    "index_validation = rng.choice(201, size=40, replace=False)\n",
     "\n",
     "# other indexes are training_data\n",
     "index_training = list(set(range(201)) - set(index_validation))\n",

From 1cbe63da1fb0a440691f3785524f65b5ff0c47e5 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 1 Nov 2023 21:23:57 -0400
Subject: [PATCH 06/97] docs: add theory from v2 paper (#2715)

Add theory to documentation.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Signed-off-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 README.md                         |  1 +
 backend/dynamic_metadata.py       |  3 +-
 doc/conf.py                       |  1 +
 doc/freeze/compress.md            | 49 +++++++++++++++++++++++++++
 doc/model/dplr.md                 | 22 ++++++++++++
 doc/model/dprc.md                 | 38 +++++++++++++++++++--
 doc/model/index.md                |  1 +
 doc/model/index.rst               |  1 +
 doc/model/overall.md              | 26 ++++++++++++++
 doc/model/pairtab.md              | 35 +++++++++++++++++++
 doc/model/train-energy.md         | 56 +++++++++++++++++++++++++++++++
 doc/model/train-fitting-tensor.md | 34 +++++++++++++++++++
 doc/model/train-hybrid.md         | 17 ++++++++++
 doc/model/train-se-atten.md       | 45 +++++++++++++++++++++++--
 doc/model/train-se-e2-a-tebd.md   | 53 ++++++++++++++++++++++++++++-
 doc/model/train-se-e2-a.md        | 54 +++++++++++++++++++++++++++++
 doc/model/train-se-e2-r.md        | 40 ++++++++++++++++++++++
 doc/model/train-se-e3.md          | 34 ++++++++++++++++++-
 doc/nvnmd/nvnmd.md                |  2 +-
 doc/test/model-deviation.md       | 45 +++++++++++++++++++++++++
 doc/train/multi-task-training.md  | 17 ++++++++++
 doc/train/training-advanced.md    | 21 +++++++++---
 22 files changed, 582 insertions(+), 13 deletions(-)
 create mode 100644 doc/model/pairtab.md

diff --git a/README.md b/README.md
index 5914abe607..680e187012 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Deep potential long-range](doc/model/dplr.md)
     - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
     - [Linear model](doc/model/linear.md)
+    - [Interpolation with a pairwise potential](doc/model/pairtab.md)
 - [Training](doc/train/index.md)
     - [Training a model](doc/train/training.md)
     - [Advanced options](doc/train/training-advanced.md)
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index 0502684f47..59df7dce81 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -44,7 +44,8 @@ def dynamic_metadata(
                 "sphinx>=3.1.1",
                 "sphinx_rtd_theme>=1.0.0rc1",
                 "sphinx_markdown_tables",
-                "myst-nb",
+                "myst-nb>=1.0.0rc0",
+                "myst-parser>=0.19.2",
                 "breathe",
                 "exhale",
                 "numpydoc",
diff --git a/doc/conf.py b/doc/conf.py
index 4aa513d1a7..b17ca82fda 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -298,6 +298,7 @@ def setup(app):
     "dollarmath",
     "colon_fence",
 ]
+myst_fence_as_directive = ("math",)
 # fix emoji issue in pdf
 latex_engine = "xelatex"
 latex_elements = {
diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md
index 696d1377bf..7394f77143 100644
--- a/doc/freeze/compress.md
+++ b/doc/freeze/compress.md
@@ -1,5 +1,54 @@
 # Compress a model
 
+## Theory
+
+The compression of the DP model uses three techniques, tabulated inference, operator merging, and precise neighbor indexing, to improve the performance of model training and inference when the model parameters are properly trained.
+
+For better performance, the NN inference can be replaced by tabulated function evaluations if the input of the NN is of dimension one.
+The idea is to approximate the output of the NN by a piece-wise polynomial fitting.
+The input domain (a compact domain in $\mathbb R$) is divided into $L_c$ equally spaced intervals, in which we apply a fifth-order polynomial $g^l_m(x)$ approximation of the $m$-th output component of the NN function:
+```math
+    g^l_m(x) = a^l_m x^5 + b^l_m x^4 + c^l_m x^3 + d^l_m x^2 + e^l_m x + f^l_m,\quad
+    x \in [x_l, x_{l+1}),
+```
+where $l=1,2,\dots,L_c$ is the index of the intervals, $x_1, \dots, x_{L_c}, x_{L_c+1}$ are the endpoints of the intervals, and $a^l_m$, $b^l_m$, $c^l_m$, $d^l_m$, $e^l_m$, and $f^l_m$ are the fitting parameters.
+The fitting parameters can be computed by the equations below:
+```math
+    a^l_m = \frac{1}{2\Delta x_l^5}[12h_{m,l}-6(y'_{m,l+1}+y'_{m,l})\Delta x_l + (y''_{m,l+1}-y''_{m,l})\Delta x_l^2],
+```
+```math
+    b^l_m = \frac{1}{2\Delta x_l^4}[-30h_{m,l} +(14y'_{m,l+1}+16y'_{m,l})\Delta x_l + (-2y''_{m,l+1}+3y''_{m,l})\Delta x_l^2],
+```
+```math
+    c^l_m = \frac{1}{2\Delta x_l^3}[20h_{m,l}-(8y'_{m,l+1}+12y'_{m,l})\Delta x_l + (y''_{m,l+1}-3y''_{m,l})\Delta x_l^2],
+```
+```math
+    d^l_m = \frac{1}{2}y''_{m,l},
+```
+```math
+    e^l_m = y_{m,l}',
+```
+```math
+    f^l_m = y_{m,l},
+```
+where $\Delta x_l=x_{l+1}-x_l$ denotes the size of the interval. $h_{m,l}=y_{m,l+1}-y_{m,l}$. $y_{m,l} = y_m(x_l)$, $y'_{m,l} = y'_m(x_l)$ and $y''_{m,l} = y''_m(x_l)$ are the value, the first-order derivative, and the second-order derivative of the $m$-th component of the target NN function at the interval point $x_l$, respectively.
+The first and second-order derivatives are easily calculated by the back-propagation of the NN functions.
+
+In the standard DP model inference, taking the [two-body embedding descriptor](../model/train-se-e2-a.md) as an example, the matrix product $(\mathcal G^i)^T \mathcal R$ requires the transfer of the tensor  $\mathcal G^i$ between the register and the host/device memories, which usually becomes the bottle-neck of the computation due to the relatively small memory bandwidth of the GPUs.
+The compressed DP model merges the matrix multiplication $(\mathcal G^i)^T \mathcal R$ with the tabulated inference step.
+More specifically, once one column of the $(\mathcal G^i)^T$ is evaluated, it is immediately multiplied with one row of the environment matrix in the register, and the outer product is deposited to the result of $(\mathcal G^i)^T \mathcal R$.
+By the operator merging technique, the allocation of  $\mathcal G^i$ and the memory movement between register and host/device memories is avoided.
+The operator merging of the three-body embedding can be derived analogously.
+
+The first dimension, $N_c$, of the environment ($\mathcal R^i$) and embedding ($\mathcal G^i$) matrices is the expected maximum number of neighbors.
+If the number of neighbors of an atom is smaller than $N_c$, the corresponding positions of the matrices are pad with zeros.
+In practice, if the real number of neighbors is significantly smaller than $N_c$, a notable operation is spent on the multiplication of padding zeros.
+In the compressed DP model, the number of neighbors is precisely indexed at the tabulated inference stage, further saving computational costs.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 Once the frozen model is obtained from DeePMD-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way:
 ```bash
 dp compress -i graph.pb -o graph-compress.pb
diff --git a/doc/model/dplr.md b/doc/model/dplr.md
index 035c27ee14..feea84e562 100644
--- a/doc/model/dplr.md
+++ b/doc/model/dplr.md
@@ -6,6 +6,28 @@ The method of DPLR is described in [this paper][1]. One is recommended to read t
 
 In the following, we take the DPLR model for example to introduce the training and LAMMPS simulation with the DPLR model. The DPLR model is trained in two steps.
 
+## Theory
+
+The Deep Potential Long Range (DPLR) model adds the electrostatic energy to the total energy:
+```math
+    E=E_{\text{DP}} + E_{\text{ele}},
+```
+where $E_{\text{DP}}$ is the short-range contribution constructed as the [standard energy model](./train-energy.md) that is fitted against $(E^\ast-E_{\text{ele}})$.
+$E_{\text{ele}}$ is the electrostatic energy
+introduced by a group of Gaussian distributions that is an approximation of the electronic structure of the system, and is calculated in Fourier space by
+```math
+    E_{\text{ele}} = \frac{1}{2\pi V}\sum_{m \neq 0, \|m\|\leq L} \frac{\exp({-\pi ^2 m^2/\beta ^2})}{m^2}S^2(m),
+```
+where $\beta$ is a freely tunable parameter that controls the spread of the Gaussians.
+$L$ is the cutoff in Fourier space and $S(m)$, the structure factor, is given by
+```math
+    S(m)=\sum_i q_i e^{-2\pi \imath m \boldsymbol r_i} + \sum_n q_n e^{-2\pi \imath m \boldsymbol W_n},
+```
+where $\imath = \sqrt{-1}$ denotes the imaginary unit, $\boldsymbol r_i$ indicates ion coordinates, $q_i$ is the charge of the ion $i$, and $W_n$ is the $n$-th Wannier centroid (WC) which can be obtained from a separated [dipole model](./train-fitting-tensor.md).
+It can be proved that the error in the electrostatic energy introduced by the Gaussian approximations is dominated by a summation of dipole-quadrupole interactions that decay as $r^{-4}$, where $r$ is the distance between the dipole and quadrupole.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
 ## Train a deep Wannier model for Wannier centroids
 
 We use the deep Wannier model (DW) to represent the relative position of the Wannier centroid (WC) with the atom with which it is associated. One may consult the introduction of the [dipole model](train-fitting-tensor.md) for a detailed introduction. An example input `wc.json` and a small dataset `data` for tutorial purposes can be found in
diff --git a/doc/model/dprc.md b/doc/model/dprc.md
index 719421108a..c7547a769f 100644
--- a/doc/model/dprc.md
+++ b/doc/model/dprc.md
@@ -2,7 +2,39 @@
 
 Deep Potential - Range Correction (DPRc) is designed to combine with QM/MM method, and corrects energies from a low-level QM/MM method to a high-level QM/MM method:
 
-$$ E=E_\text{QM}(\mathbf R; \mathbf P)  + E_\text{QM/MM}(\mathbf R; \mathbf P) + E_\text{MM}(\mathbf R) + E_\text{DPRc}(\mathbf R) $$
+```math
+E=E_\text{QM}(\mathbf R; \mathbf P)  + E_\text{QM/MM}(\mathbf R; \mathbf P) + E_\text{MM}(\mathbf R) + E_\text{DPRc}(\mathbf R)
+```
+
+## Theory
+
+Deep Potential - Range Correction (DPRc) was initially designed to correct the potential energy from a fast, linear-scaling low-level semiempirical QM/MM theory to a high-level ''ab initio'' QM/MM theory in a range-correction way to quantitatively correct short and mid-range non-bonded interactions leveraging the non-bonded lists routinely used in molecular dynamics simulations using molecular mechanical force fields such as AMBER.
+In this way, long-ranged electrostatic interactions can be modeled efficiently using the particle mesh Ewald method or its extensions for multipolar and QM/MM potentials.
+In a DPRc model, the switch function is modified to disable MM-MM interaction:
+```math
+  s_\text{DPRc}(r_{ij}) =
+  \begin{cases}
+  0, &\text{if $i \in \text{MM} \land j \in \text{MM}$}, \\
+  s(r_{ij}), &\text{otherwise},
+  \end{cases}
+```
+where $s_\text{DPRc}(r_{ij})$ is the new switch function and $s(r_{ij})$ is the old one.
+This ensures the forces between MM atoms are zero, i.e.
+```math
+{\boldsymbol F}_{ij} = - \frac{\partial E}{\partial \boldsymbol r_{ij}} = 0, \quad i \in \text{MM} \land j \in \text{MM}.
+```
+The fitting network is revised to remove energy bias from MM atoms:
+```math
+  E_i=
+  \begin{cases}
+  \mathcal{F}_0(\mathcal{D}^i),  &\text{if $i \in \text{QM}$}, \\
+  \mathcal{F}_0(\mathcal{D}^i) - \mathcal{F}_0(\mathbf{0}), &\text{if $i \in \text{MM}$},
+  \end{cases}
+```
+where $\mathbf{0}$ is a zero matrix.
+It is worth mentioning that usage of DPRc is not limited to its initial design for QM/MM correction and can be expanded to any similar interaction.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 See the [JCTC paper](https://doi.org/10.1021/acs.jctc.1c00201) for details.
 
@@ -10,7 +42,9 @@ See the [JCTC paper](https://doi.org/10.1021/acs.jctc.1c00201) for details.
 
 Instead the normal _ab initio_ data, one needs to provide the correction from a low-level QM/MM method to a high-level QM/MM method:
 
-$$ E = E_\text{high-level QM/MM} - E_\text{low-level QM/MM} $$
+```math
+E = E_\text{high-level QM/MM} - E_\text{low-level QM/MM}
+```
 
 Two levels of data use the same MM method, so $E_\text{MM}$ is eliminated.
 
diff --git a/doc/model/index.md b/doc/model/index.md
index 4ef508ec1b..6c128028a6 100644
--- a/doc/model/index.md
+++ b/doc/model/index.md
@@ -17,3 +17,4 @@
 - [Deep potential long-range](dplr.md)
 - [Deep Potential - Range Correction (DPRc)](dprc.md)
 - [Linear model](linear.md)
+- [Interpolation with a pairwise potential](pairtab.md)
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 6597ce1d21..1e850cac67 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -20,3 +20,4 @@ Model
    dplr
    dprc
    linear
+   pairtab
diff --git a/doc/model/overall.md b/doc/model/overall.md
index 3d4052e464..f8fb2fa151 100644
--- a/doc/model/overall.md
+++ b/doc/model/overall.md
@@ -1,5 +1,31 @@
 # Overall
 
+## Theory
+
+A Deep Potential (DP) model, denoted by $\mathcal{M}$, can be generally represented as
+
+```math
+\boldsymbol y_i = \mathcal M (\boldsymbol x_i, \{\boldsymbol x_j\}_{j\in n(i)}; \boldsymbol \theta)
+= \mathcal{F} \big( \mathcal{D} (\boldsymbol x_i, \{\boldsymbol x_j\}_{j\in n(i)}; \boldsymbol \theta_d) ; \boldsymbol \theta_f \big),
+```
+
+where $\boldsymbol{y}_i$ is the fitting properties, $\mathcal{F}$ is the fitting network, $\mathcal{D}$ is the descriptor.
+$\boldsymbol{x} = (\boldsymbol r_i, \alpha_i)$, with $\boldsymbol r_i$ being the Cartesian coordinates and $\alpha_i$ being the chemical species, denotes the degrees of freedom of the atom $i$.
+
+The indices of the neighboring atoms (i.e. atoms within a certain cutoff radius) of atom $i$ are given by the notation $n(i)$.
+Note that the Cartesian coordinates can be either under the periodic boundary condition (PBC) or in vacuum (under the open boundary condition).
+The network parameters are denoted by $\boldsymbol \theta = \{\boldsymbol \theta_d, \boldsymbol \theta_f\}$, where $\boldsymbol \theta_d$ and $\boldsymbol\theta_f$ yield the network parameters of the descriptor (if any) and those of the fitting network, respectively.
+From the above equation, one may compute the global property of the system by
+```math
+    \boldsymbol y = \sum_{i=1}^N \boldsymbol y_i,
+```
+where $N$ is the number of atoms in a frame.
+For example, if $y_i$ represents the potential energy contribution of atom $i$, then $y$ gives the total potential energy of the frame.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 A model has two parts, a descriptor that maps atomic configuration to a set of symmetry invariant features, and a fitting net that takes descriptor as input and predicts the atomic contribution to the target physical property. It's defined in the {ref}`model <model>` section of the `input.json`, for example,
 ```json
     "model": {
diff --git a/doc/model/pairtab.md b/doc/model/pairtab.md
new file mode 100644
index 0000000000..e3f0118f2c
--- /dev/null
+++ b/doc/model/pairtab.md
@@ -0,0 +1,35 @@
+# Interpolation with a pairwise potential
+
+## Theory
+In applications like the radiation damage simulation, the interatomic distance may become too close, so that the DFT calculations fail.
+In such cases, the DP model that is an approximation of the DFT potential energy surface is usually replaced by an empirical potential, like the Ziegler-Biersack-Littmark (ZBL) screened nuclear repulsion potential in the radiation damage simulations.
+The DeePMD-kit package supports the interpolation between DP and an empirical pairwise potential
+```math
+  E_i = (1-w_i) E_i^{\mathrm{DP}} + w_i (E_i^0 + E_i^{\mathrm{pair}}),
+```
+where the $w_i$ is the interpolation weight and the $E_i^{\mathrm{pair}}  $ is the atomic contribution due to the pairwise potential $u^{\mathrm{pair}}(r)$, i.e.
+```math
+  E_i^{\mathrm{pair}} = \sum_{j\in n(i)} u^{\mathrm{pair}}(r_{ij}).
+```
+The interpolation weight $w_i$ is defined by
+```math
+    w_i =
+    \begin{cases}
+    1, & \sigma_i \lt r_a, \\
+    u_i^3 (-6 u_i^2 +15 u_i -10) +1, & r_a \leq \sigma_i \lt r_b, \\
+    0, & \sigma_i \geq r_b,
+    \end{cases}
+```
+where $u_i = (\sigma_i - r_a ) / (r_b - r_a)$.
+$E_i^0$ is the atom energy bias.
+In the range $[r_a, r_b]$, the DP model smoothly switched off and the pairwise potential smoothly switched on from $r_b$ to $r_a$. The $\sigma_i$ is the softmin of the distance between atom $i$ and its neighbors,
+```math
+  \sigma_i =
+  \dfrac
+  {\sum\limits_{j\in n(i)} r_{ij} e^{-r_{ij} / \alpha_s}}
+  {\sum\limits_{j\in n(i)} e^{-r_{ij} / \alpha_s}},
+```
+where the scale $\alpha_s$ is a tunable scale of the interatomic distance $r_{ij}$.
+The pairwise potential $u^{\textrm{pair}}(r)$ is defined by a user-defined table that provides the value of $u^{\textrm{pair}}$ on an evenly discretized grid from 0 to the cutoff distance.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index af3e4969b3..90e027d7a0 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -2,6 +2,62 @@
 
 In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.json` as an example of the input file.
 
+## Theory
+
+In the DP model, we let the fitting network $\mathcal{F}_ 0$ maps the descriptor $\mathcal{D}^i$ to a scalar, where the subscript $0$ means that the output is a zero-order tensor (i.e. scalar).  The model can then be used to predict the total potential energy of the system by
+```math
+    E  =  \sum_i E_i = \sum_i \mathcal F_0 (\mathcal D^i),
+```
+where the output of the fitting network is treated as the atomic potential energy contribution, i.e. $E_i$.
+The output scalar can also be treated as other scalar properties defined on an atom, for example, the partial charge of atom $i$.
+
+In some cases, atomic-specific or frame-specific  parameters, such as electron temperature, may be treated as extra input to the fitting network.
+We denote the atomic and frame-specific parameters by $\boldsymbol{P}^i\in \mathbb{R}^{N_p}$ (with $N_p$ being the dimension) and $\boldsymbol{Q}\in \mathbb{R}^{N_q}$ (with $N_q$ being the dimension), respectively.
+```math
+    E_i=\mathcal{F}_0(\{\mathcal{D}^i, \boldsymbol{P}^i, \boldsymbol Q\}).
+```
+
+The atomic force $\boldsymbol{F}_ {i}$ and the virial tensor $\boldsymbol{\Xi} = (\Xi_{\alpha\beta})$ (if PBC is applied) can be derived from the potential energy $E$:
+```math
+    F_{i,\alpha}=-\frac{\partial E}{\partial r_{i,\alpha}},
+```
+```math
+    \Xi_{\alpha\beta}=-\sum_{\gamma} \frac{\partial E}{\partial h_{\gamma\alpha}} h_{\gamma\beta},
+```
+where $r_{i,\alpha}$ and $F_{i,\alpha}$ denotes the $\alpha$-th component of the coordinate and force of atom $i$. $h_{\alpha\beta}$ is the $\beta$-th component of the $\alpha$-th basis vector of the simulation region.
+
+The properties $\eta$ of the energy loss function could be energy $E$, force $\boldsymbol{F}$, virial $\boldsymbol{\Xi}$, relative energy $\Delta E$, or any combination among them, and the loss functions of them are
+```math
+    L_E(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}(E(\boldsymbol{x};\boldsymbol{\theta})-E^*)^2,
+```
+```math
+    L_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_{\alpha=1}^3(F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2,
+```
+```math
+    L_\Xi(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{9N}\sum_{\alpha,\beta=1}^{3}(\Xi_{\alpha\beta}(\boldsymbol{x};\boldsymbol{\theta})-\Xi_{\alpha\beta}^*)^2,
+```
+```math
+    L_{\Delta E}(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}({\Delta E}(\boldsymbol{x};\boldsymbol{\theta})-{\Delta E}^*)^2,
+```
+where $F_{k,\alpha}$ is the $\alpha$-th component of the force on atom $k$, and the superscript $\ast$ indicates the label of the property that should be provided in advance.
+Using $N$ ensures that each loss of fitting property is averaged over atomic contributions before they contribute to the total loss by weight.
+
+If part of atoms is more important than others, for example, certain atoms play an essential role when calculating free energy profiles or kinetic isotope effects, the MSE of atomic forces with prefactors $q_{k}$ can also be used as the loss function:
+```math
+    L_F^p(\mathbf{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N} \sum_{\alpha} q_{k} (F_{k,\alpha}(\mathbf{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2.
+```
+The atomic forces with larger prefactors will be fitted more accurately than those in other atoms.
+
+If some forces are quite large, for example, forces can be greater than 60 eV/Å in high-temperature reactive simulations, one may also prefer the force loss is relative to the magnitude:
+```math
+    L^r_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_\alpha \left(\frac{F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*}{\lvert\boldsymbol{F}^\ast_k\lvert + \nu}\right)^2.
+```
+where $\nu$ is a small constant used to protect
+an atom where the magnitude of $\boldsymbol{F}^\ast_k$ is small from having a large $L^r_F$.
+Benefiting from the relative force loss, small forces can be fitted more accurately.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
 ## The fitting network
 
 The construction of the fitting net is given by section {ref}`fitting_net <model/fitting_net>`
diff --git a/doc/model/train-fitting-tensor.md b/doc/model/train-fitting-tensor.md
index d7c06a25ed..90370adfcf 100644
--- a/doc/model/train-fitting-tensor.md
+++ b/doc/model/train-fitting-tensor.md
@@ -11,6 +11,40 @@ The training and validation data are also provided our examples. But note that *
 
 Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net <model/fitting_net>` and {ref}`loss <loss>`.
 
+## Theory
+
+To represent the first-order tensorial properties (i.e. vector properties), we let the fitting network, denoted by $\mathcal F_{1}$, output an $M$-dimensional vector; then we have the representation,
+
+```math
+(T_i^{(1)})_\alpha =
+\frac{1}{N_c}
+\sum_{j=1}^{N_c}\sum_{m=1}^M (\mathcal G^i)_{jm} (\mathcal R^i)_{j,\alpha+1}
+(\mathcal F_{1}(\mathcal D^i))_m, \ \alpha=1,2,3.
+```
+We let the fitting network $\mathcal F_{2}$ output an $M$-dimensional vector, and the second-order tensorial properties (matrix properties) are formulated as
+```math
+(T_i^{(2)})_{\alpha\beta} =
+\frac{1}{N_c^2}
+\sum_{j=1}^{N_c}\sum_{k=1}^{N_c}\sum_{m=1}^M
+(\mathcal G^i)_{jm}
+(\mathcal R^i)_{j,\alpha+1}
+(\mathcal R^i)_{k,\beta+1}
+(\mathcal G^i)_{km}
+(\mathcal F_{2}(\mathcal D^i))_m,
+\ \alpha,\beta=1,2,3,
+```
+
+where $\mathcal{G}^i$ and $\mathcal{R}^i$ can be found in [`se_e2_a`](./train-se-e2-a.md).
+Thus, the tensor fitting network requires the descriptor to have the same or similar form as the DeepPot-SE descriptor.
+$\mathcal{F}_1$ and $\mathcal F_2$ are the neural network functions.
+The total tensor $\boldsymbol{T}$ (total dipole $\boldsymbol{T}^{(1)}$ or total polarizability $\boldsymbol{T}^{(2)}$) is the sum of the atomic tensor:
+```math
+    \boldsymbol{T} = \sum_i \boldsymbol{T}_i.
+```
+The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
 ## The fitting Network
 
 The {ref}`fitting_net <model/fitting_net>` section tells DP which fitting net to use.
diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md
index 37666668c7..58b66f25e0 100644
--- a/doc/model/train-hybrid.md
+++ b/doc/model/train-hybrid.md
@@ -2,6 +2,23 @@
 
 This descriptor hybridizes multiple descriptors to form a new descriptor. For example, we have a list of descriptors denoted by $\mathcal D_1$, $\mathcal D_2$, ..., $\mathcal D_N$, the hybrid descriptor this the concatenation of the list, i.e. $\mathcal D = (\mathcal D_1, \mathcal D_2, \cdots, \mathcal D_N)$.
 
+## Theory
+
+A hybrid descriptor $\mathcal{D}^i_\text{hyb}$ concatenates multiple kinds of descriptors into one descriptor:
+```math
+    \mathcal{D}^{i}_\text{hyb} = \{
+    \begin{array}{cccc}
+        \mathcal{D}^{i}_1 & \mathcal{D}^{i}_2 & \cdots & \mathcal{D}^{i}_n
+    \end{array}
+    \}.
+```
+The list of descriptors can be different types or the same descriptors with different parameters.
+This way, one can set the different cutoff radii for different descriptors.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model/descriptor/type>` to {ref}`hybrid <model/descriptor[hybrid]>`, then provide the definitions of the descriptors by the items in the `list`,
 ```json
         "descriptor" :{
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 55bb0458f7..7480ddbc12 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -8,9 +8,48 @@ Here we propose DPA-1, a Deep Potential model with a novel attention mechanism,
 
 See [this paper](https://arxiv.org/abs/2208.08236) for more information. DPA-1 is implemented as a new descriptor `"se_atten"` for model training, which can be used after simply editing the input.json.
 
-## Installation
-Follow the [standard installation](../install/install-from-source.md#install-the-python-interface) of Python interface in the DeePMD-kit.
-After that, you can smoothly use the DPA-1 model with the following instructions.
+## Theory
+
+Attention-based descriptor $\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}$, which is proposed in pretrainable DPA-1 model, is given by
+
+```math
+    \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<,
+```
+where $\hat{\mathcal{G}}^i$ represents the embedding matrix $\mathcal{G}^i$ after additional self-attention mechanism and $\mathcal{R}^i$ is defined by the full case in the [`se_e2_a`](./train-se-e2-a.md).
+Note that we obtain $\mathcal{G}^i$ using the type embedding method by default in this descriptor.
+
+To perform the self-attention mechanism, the queries $\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, keys $\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, and values $\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}$ are first obtained:
+```math
+    \left(\mathcal{Q}^{i,l}\right)_{j}=Q_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+```
+```math
+    \left(\mathcal{K}^{i,l}\right)_{j}=K_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+```
+```math
+    \left(\mathcal{V}^{i,l}\right)_{j}=V_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
+```
+where $Q_{l}$, $K_{l}$, $V_{l}$ represent three trainable linear transformations that output the queries and keys of dimension $d_k$ and values of dimension $d_v$, and $l$ is the index of the attention layer.
+The input embedding matrix to the attention layers,  denoted by $\mathcal{G}^{i,0}$, is chosen as the two-body embedding matrix.
+
+Then the scaled dot-product attention method is adopted:
+```math
+A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})=\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right)\mathcal{V}^{i,l},
+```
+where $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) \in \mathbb{R}^{N_c\times N_c}$ is attention weights.
+In the original attention method, one typically has $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}\right)=\mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right)$, with $\sqrt{d_{k}}$ being the normalization temperature.
+This is slightly modified to incorporate the angular information:
+```math
+\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) = \mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right) \odot \hat{\mathcal{R}}^{i}(\hat{\mathcal{R}}^{i})^{T},
+```
+where $\hat{\mathcal{R}}^{i} \in \mathbb{R}^{N_c\times 3}$ denotes normalized relative coordinates , $\hat{\mathcal{R}}^{i}_{j} = \frac{\boldsymbol{r}_{ij}}{\lVert \boldsymbol{r}_{ij} \lVert}$ and $\odot$ means element-wise multiplication.
+
+Then layer normalization is added in a residual way to finally obtain the self-attention local embedding matrix $\hat{\mathcal{G}}^{i} = \mathcal{G}^{i,L_a}$ after $L_a$ attention layers:[^1]
+```math
+\mathcal{G}^{i,l} = \mathcal{G}^{i,l-1} + \mathrm{LayerNorm}(A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})).
+```
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
 
 ## Introduction to new features of DPA-1
 Next, we will list the detailed settings in input.json and the data format, especially for large systems with dozens of elements. An example of DPA-1 input can be found [here](../../examples/water/se_atten/input.json).
diff --git a/doc/model/train-se-e2-a-tebd.md b/doc/model/train-se-e2-a-tebd.md
index 7528202ff2..cb6ce6674f 100644
--- a/doc/model/train-se-e2-a-tebd.md
+++ b/doc/model/train-se-e2-a-tebd.md
@@ -4,7 +4,58 @@ We generate specific a type embedding vector for each atom type so that we can s
 
 The training input script is similar to that of [`se_e2_a`](train-se-e2-a.md), but different by adding the {ref}`type_embedding <model/type_embedding>` section.
 
-## Type embedding net
+## Theory
+
+Usually, when the type embedding approach is not enabled, for a system with multiple chemical species ($|\{\alpha_i\}| > 1$), parameters of the embedding network $\mathcal{N}_{e,\{2,3\}}$ are as follows chemical-species-wise:
+
+```math
+    (\mathcal{G}^i)_j = \mathcal{N}^{\alpha_i, \alpha_j}_{e,2}(s(r_{ij})) \quad \mathrm{or}\quad
+    (\mathcal{G}^i)_j = \mathcal{N}^{ \alpha_j}_{e,2}(s(r_{ij})),
+```
+```math
+    (\mathcal{G}^i)_{jk} =\mathcal{N}^{\alpha_j, \alpha_k}_{e,3}((\theta_i)_{jk}).
+```
+
+Thus, there will be $N_t^2$ or $N_t$ embedding networks where $N_t$ is the number of chemical species.
+To improve the performance of matrix operations, $n(i)$ is divided into blocks of different chemical species.
+Each matrix with a dimension of $N_c$ is divided into corresponding blocks, and each block is padded to $N_c^{\alpha_j}$ separately.
+The limitation of this approach is that when there are large numbers of chemical species, the number of embedding networks will increase, requiring large memory and decreasing computing efficiency.
+
+Similar to the embedding networks, if the type embedding approach is not used, the fitting network parameters are chemical-species-wise, and there are $N_t$ sets of fitting network parameters.
+For performance, atoms are sorted by their chemical species $\alpha_i$ in advance.
+Take an example, the atomic energy $E_i$ is represented as follows:
+```math
+E_i=\mathcal{F}_0^{\alpha_i}(\mathcal{D}^i).
+```
+
+To reduce the number of NN parameters and improve computing efficiency when there are large numbers of chemical species,
+the type embedding $\mathcal{A}$ is introduced, represented as a NN function $\mathcal{N}_t$ of the atomic type $\alpha$:
+
+```math
+    \mathcal{A}^i = \mathcal{N}_t\big( \text{one hot}(\alpha_i) \big),
+```
+
+where $\alpha_i$ is converted to a one-hot vector representing the chemical species before feeding to the NN.
+The type embeddings of central and neighboring atoms $\mathcal{A}^i$ and $\mathcal{A}^j$ are added as an extra input of the embedding network $\mathcal{N}_{e,\{2,3\}}$:
+
+```math
+    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^i, \mathcal{A}^j\})  \quad \mathrm{or}\quad
+    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^j\}) ,
+```
+```math
+    (\mathcal{G}^i)_{jk} =\mathcal{N}_{e,3}(\{(\theta_i)_{jk}, \mathcal{A}^j, \mathcal{A}^k\}).
+```
+
+In fitting networks, the type embedding is inserted into the input of the fitting networks:
+```math
+E_i=\mathcal{F}_0(\{\mathcal{D}^i, \mathcal{A}^i\}).
+```
+
+In this way, all chemical species share the same network parameters through the type embedding.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
 The {ref}`model <model>` defines how the model is constructed, adding a section of type embedding net:
 ```json
     "model": {
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index a043f64716..537253a6d9 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -4,6 +4,60 @@ The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPo
 
 Note that it is sometimes called a "two-atom embedding descriptor" which means the input of the embedding net is atomic distances. The descriptor **does** encode multi-body information (both angular and radial information of neighboring atoms).
 
+## Theory
+
+The two-body embedding smooth edition of the DP descriptor $\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}$, is usually named DeepPot-SE descriptor.
+It is noted that the descriptor is a multi-body representation of the local environment of the atom $i$.
+We call it two-body embedding because the embedding network takes only the distance between atoms $i$ and $j$ (see below), but it is not implied that the descriptor takes only the pairwise information between $i$ and its neighbors.
+The descriptor, using full information, is given by
+
+```math
+    \mathcal{D}^i = \frac{1}{N_c^2} (\mathcal{G}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \mathcal{G}^i_<,
+```
+
+where
+$N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames.
+A matrix with a dimension of $N_c$ will be padded if the number of neighboring atoms is less than $N_c$. $\mathcal{R}^i \in \mathbb{R}^{N_c \times 4}$ is the coordinate matrix, and each row of $\mathcal{R}^i$ can be constructed as
+
+```math
+    (\mathcal{R}^i)_j =
+    \{
+    \begin{array}{cccc}
+    s(r_{ij}) & \frac{s(r_{ij})x_{ij}}{r_{ij}} & \frac{s(r_{ij})y_{ij}}{r_{ij}} & \frac{s(r_{ij})z_{ij}}{r_{ij}}
+    \end{array}
+    \},
+```
+
+where $\boldsymbol{r}_{ij}=\boldsymbol{r}_j-\boldsymbol{r}_i = (x_{ij}, y_{ij}, z_{ij})$ is the relative coordinate and $r_{ij}=\lVert \boldsymbol{r}_{ij} \lVert$ is its norm. The switching function $s(r)$ is defined as
+
+```math
+    s(r)=
+    \begin{cases}
+    \frac{1}{r}, & r \lt r_s, \\
+    \frac{1}{r} \big[ x^3 (-6 x^2 +15 x -10) +1 \big], & r_s \leq r \lt r_c, \\
+    0, & r \geq r_c,
+    \end{cases}
+```
+
+where $x=\frac{r - r_s}{ r_c - r_s}$  switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
+The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous.
+
+Each row of the embedding matrix  $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
+
+```math
+    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})),
+```
+
+where the subscript $e,2$ is used to distinguish the NN from other NNs used in the DP model.
+In the above equation, the network parameters are not explicitly written.
+$\mathcal{G}^i_< \in \mathbb{R}^{N_c \times M_<}$ only takes first $M_<$ columns of $\mathcal{G}^i$ to reduce the size of $\mathcal D^i$.
+$r_s$, $r_c$, $M$ and $M_<$ are hyperparameters provided by the user.
+The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 In this example, we will train a DeepPot-SE model for a water system.  A complete training input script of this example can be found in the directory.
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a/input.json
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
index f48e10c17b..f2f990b16a 100644
--- a/doc/model/train-se-e2-r.md
+++ b/doc/model/train-se-e2-r.md
@@ -2,6 +2,46 @@
 
 The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information.
 
+## Theory
+
+The descriptor, using either radial-only information, is given by
+
+```math
+    \mathcal{D}^i = \frac{1}{N_c} \sum_j (\mathcal{G}^i)_{jk},
+```
+
+where
+$N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames.
+A matrix with a dimension of $N_c$ will be padded if the number of neighboring atoms is less than $N_c$.
+
+Each row of the embedding matrix  $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
+
+```math
+    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})),
+```
+
+where $\boldsymbol{r}_ {ij}=\boldsymbol{r}_ j-\boldsymbol{r}_ i = (x_{ij}, y_{ij}, z_{ij})$ is the relative coordinate and $r_{ij}=\lVert \boldsymbol{r}_{ij} \lVert$ is its norm. The switching function $s(r)$ is defined as
+
+```math
+    s(r)=
+    \begin{cases}
+    \frac{1}{r}, & r \lt r_s, \\
+    \frac{1}{r} \big[ x^3 (-6 x^2 +15 x -10) +1 \big], & r_s \leq r \lt r_c, \\
+    0, & r \geq r_c,
+    \end{cases}
+```
+
+where $x=\frac{r - r_s}{ r_c - r_s}$  switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
+The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous.
+
+In the above equations, the network parameters are not explicitly written.
+$r_s$, $r_c$ and $M$ are hyperparameters provided by the user.
+The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 A complete training input script of this example can be found in the directory
 ```bash
 $deepmd_source_dir/examples/water/se_e2_r/input.json
diff --git a/doc/model/train-se-e3.md b/doc/model/train-se-e3.md
index d59f11b264..5b0710a389 100644
--- a/doc/model/train-se-e3.md
+++ b/doc/model/train-se-e3.md
@@ -1,6 +1,38 @@
 # Descriptor `"se_e3"`
 
-The notation of `se_e3` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The embedding takes angles between two neighboring atoms as input (denoted by `e3`).
+The notation of `se_e3` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The embedding takes bond angles between a central atom and its two neighboring atoms as input (denoted by `e3`).
+
+## Theory
+
+The three-body embedding DeepPot-SE descriptor incorporates bond-angle information, making the model more accurate. The descriptor $\mathcal{D}^i$ can be represented as
+```math
+    \mathcal{D}^i = \frac{1}{N_c^2}(\mathcal{R}^i(\mathcal{R}^i)^T):\mathcal{G}^i,
+```
+where
+$N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames.
+$\mathcal{R}^i$ is constructed as
+
+```math
+    (\mathcal{R}^i)_j =
+    \{
+    \begin{array}{cccc}
+    s(r_{ij}) & \frac{s(r_{ij})x_{ij}}{r_{ij}} & \frac{s(r_{ij})y_{ij}}{r_{ij}} & \frac{s(r_{ij})z_{ij}}{r_{ij}}
+    \end{array}
+    \},
+```
+Currently, only the full information case of $\mathcal{R}^i$ is supported by the three-body embedding.
+Each element of $\mathcal{G}^i \in \mathbb{R}^{N_c \times N_c \times M}$ comes from $M$ nodes from the output layer of an NN $\mathcal{N}_{e,3}$ function:
+
+```math
+    (\mathcal{G}^i)_{jk}=\mathcal{N}_{e,3}((\theta_i)_{jk}),
+```
+
+where $(\theta_i)_ {jk} = (\mathcal{R}^i)_ {j,\\{2,3,4\\}}\cdot (\mathcal{R}^i)_ {k,\\{2,3,4\\}}$ considers the angle form of two neighbours ($j$ and $k$).
+The notation $:$ in the equation indicates the contraction between matrix $\mathcal{R}^i(\mathcal{R}^i)^T$ and the first two dimensions of tensor $\mathcal{G}^i$.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
 
 A complete training input script of this example can be found in the directory
 ```bash
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index d89afd09e5..7a11e3170e 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -6,7 +6,7 @@ This is the training code we used to generate the results in our paper entitled
 
 Any user can follow two consecutive steps to run molecular dynamics (MD) on the proposed NVNMD computer, which has been released online: (i) to train a machine learning (ML) model that can decently reproduce the potential energy surface (PES); and (ii) to deploy the trained ML model on the proposed NVNMD computer, then run MD there to obtain the atomistic trajectories.
 
-# Training
+## Training
 
 Our training procedure consists of not only continuous neural network (CNN) training but also quantized neural network (QNN) training which uses the results of CNN as inputs. It is performed on CPU or GPU by using the training codes we open-sourced online.
 
diff --git a/doc/test/model-deviation.md b/doc/test/model-deviation.md
index 6a89d7c2f4..a59696c5ee 100644
--- a/doc/test/model-deviation.md
+++ b/doc/test/model-deviation.md
@@ -1,5 +1,50 @@
 # Calculate Model Deviation
 
+## Theory
+
+Model deviation $\epsilon_y$ is the standard deviation of properties $\boldsymbol y$ inferred by an ensemble of models $\mathcal{M}_ 1, \dots, \mathcal{M}_{n_m}$ that are trained by the same dataset(s) with the model parameters initialized independently.
+The DeePMD-kit supports $\boldsymbol y$ to be the atomic force $\boldsymbol F_i$ and the virial tensor $\boldsymbol \Xi$.
+The model deviation is used to estimate the error of a model at a certain data frame, denoted by $\boldsymbol x$, containing the coordinates and chemical species of all atoms.
+We present the model deviation of the atomic force and the virial tensor
+```math
+    \epsilon_{\boldsymbol{F},i} (\boldsymbol x)=
+    \sqrt{\langle \lVert \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k)-\langle \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k) \rangle \rVert^2 \rangle},
+```
+```math
+    \epsilon_{\boldsymbol{\Xi},{\alpha \beta}} (\boldsymbol x)=
+    \frac{1}{N} \sqrt{\langle ( {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k)-\langle {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k) \rangle )^2 \rangle},
+```
+where $\boldsymbol \theta_k$ is the parameters of the model $\mathcal M_k$, and the ensemble average $\langle\cdot\rangle$ is estimated by
+```math
+    \langle \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k) \rangle
+    =
+    \frac{1}{n_m} \sum_{k=1}^{n_m} \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k).
+```
+Small $\epsilon_{\boldsymbol{F},i}$ means the model has learned the given data; otherwise, it is not covered, and the training data needs to be expanded.
+If the magnitude of $\boldsymbol F_i$ or $\boldsymbol \Xi$ is quite large,
+a relative model deviation $\epsilon_{\boldsymbol{F},i,\text{rel}}$ or $\epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}}$ can be used instead of the absolute model deviation:
+```math
+    \epsilon_{\boldsymbol{F},i,\text{rel}}  (\boldsymbol x)
+    =
+    \frac{\lvert \epsilon_{\boldsymbol{F},i} (\boldsymbol x) \lvert}
+    {\lvert \langle \boldsymbol F_i (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu},
+```
+```math
+    \epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}}  (\boldsymbol x)
+    =
+    \frac{ \epsilon_{\boldsymbol{\Xi},\alpha\beta} (\boldsymbol x) }
+    {\lvert \langle \boldsymbol \Xi (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu},
+```
+where $\nu$ is a small constant used to protect
+an atom where the magnitude of $\boldsymbol{F}_i$ or $\boldsymbol{\Xi}$ is small from having a large model deviation.
+
+Statistics of $\epsilon_{\boldsymbol{F},i}$ and $\epsilon_{\boldsymbol{\Xi},{\alpha \beta}}$ can be provided, including the maximum, average, and minimal model deviation over the atom index $i$ and over the component index $\alpha,\beta$, respectively.
+The maximum model deviation of forces $\epsilon_{\boldsymbol F,\text{max}}$ in a frame was found to be the best error indicator in a concurrent or active learning algorithm.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## Instructions
+
 One can also use a subcommand to calculate the deviation of predicted forces or virials for a bunch of models in the following way:
 ```bash
 dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out
diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training.md
index c3cbe98c83..c647e6905e 100644
--- a/doc/train/multi-task-training.md
+++ b/doc/train/multi-task-training.md
@@ -1,5 +1,22 @@
 # Multi-task training
 
+## Theory
+
+The multi-task training process can simultaneously handle different datasets with properties that cannot be fitted in one network (e.g. properties from DFT calculations under different exchange-correlation functionals or different basis sets).
+These datasets are denoted by $\boldsymbol x^{(1)}, \dots, \boldsymbol x^{(n_t)}$.
+For each dataset, a training task is defined as
+```math
+    \min_{\boldsymbol \theta}   L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol  \theta^{(t)}, \tau), \quad t=1, \dots, n_t.
+```
+
+During the multi-task training process, all tasks share one descriptor with trainable parameters $\boldsymbol{\theta}_ {d}$, while each of them has its own fitting network with trainable parameters $\boldsymbol{\theta}_ f^{(t)}$, thus
+$\boldsymbol{\theta}^{(t)} = \{ \boldsymbol{\theta}_ {d} , \boldsymbol{\theta}_ {f}^{(t)} \}$.
+At each training step, a task is randomly picked from ${1, \dots, n_t}$, and the Adam optimizer is executed to minimize $L^{(t)}$ for one step to update the parameter $\boldsymbol \theta^{(t)}$.
+If different fitting networks have the same architecture, they can share the parameters of some layers
+to improve training efficiency.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
 ## Perform the multi-task training
 Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode,
 with one common descriptor and multiple specific fitting nets for each data set.
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index b0194e3471..4940b77fa7 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -4,6 +4,23 @@ In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.j
 
 ## Learning rate
 
+### Theory
+
+The learning rate $\gamma$ decays exponentially:
+```math
+    \gamma(\tau) = \gamma^0 r ^ {\lfloor  \tau/s \rfloor},
+```
+where $\tau \in \mathbb{N}$ is the index of the training step, $\gamma^0  \in \mathbb{R}$ is the learning rate at the first step, and the decay rate $r$ is given by
+```math
+    r = {\left(\frac{\gamma^{\text{stop}}}{\gamma^0}\right )} ^{\frac{s}{\tau^{\text{stop}}}},
+```
+where $\tau^{\text{stop}} \in \mathbb{N}$, $\gamma^{\text{stop}} \in \mathbb{R}$, and $s \in \mathbb{N}$ are the stopping step, the stopping learning rate, and the decay steps, respectively, all of which are hyperparameters provided in advance.
+[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+### Instructions
+
 The {ref}`learning_rate <learning_rate>` section in `input.json` is given as follows
 ```json
     "learning_rate" :{
@@ -18,10 +35,6 @@ The {ref}`learning_rate <learning_rate>` section in `input.json` is given as fol
 * {ref}`stop_lr <learning_rate[exp]/stop_lr>` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge.
 * During the training, the learning rate decays exponentially from {ref}`start_lr <learning_rate[exp]/start_lr>` to {ref}`stop_lr <learning_rate[exp]/stop_lr>` following the formula:
 
-$$ \alpha(t) = \alpha_0 \lambda ^ { t / \tau } $$
-
-where $t$ is the training step, $\alpha$ is the learning rate, $\alpha_0$ is the starting learning rate (set by {ref}`start_lr <learning_rate[exp]/start_lr>`), $\lambda$ is the decay rate, and $\tau$ is the decay steps, i.e.
-
     ```
     lr(t) = start_lr * decay_rate ^ ( t / decay_steps )
     ```

From 9154da26013ecce6297f744919e5771952b2f1b1 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 5 Nov 2023 19:53:05 -0500
Subject: [PATCH 07/97] docs: configuring automatically generated release notes
 (#2975)

See
https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes.

We shall add proper labels when submitting and merging PRs, so release
notes can be more automatic.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/release.yml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/release.yml

diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 0000000000..382e5db00e
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,34 @@
+changelog:
+  exclude:
+    authors:
+      - app/pre-commit-ci
+      - app/dependabot
+  categories:
+    - title: Breaking Changes
+      labels:
+        - "breaking change"
+    - title: New Features
+      labels:
+        - "new feature"
+    - title: Enhancement
+      labels:
+        - enhancement
+    - title: Documentation
+      labels:
+        # automatically added
+        - Docs
+        # for docs outside the doc directory
+        - "other docs"
+      exclude:
+        labels:
+          - build
+          - bug
+    - title: Build and release
+      labels:
+        - build
+    - title: Bug fixings
+      labels:
+        - bug
+    - title: Other Changes
+      labels:
+        - "*"

From 574068caadf0acd3a4971a22d1457c8d81d01dd1 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 5 Nov 2023 22:35:13 -0500
Subject: [PATCH 08/97] docs: use relative links (#2976)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/getting-started/quick_start.ipynb | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb
index 028e56dc6f..ec939265fd 100644
--- a/doc/getting-started/quick_start.ipynb
+++ b/doc/getting-started/quick_start.ipynb
@@ -102,8 +102,7 @@
    "metadata": {},
    "source": [
     "## General Introduction\n",
-    "This tutorial will introduce you to the basic usage of the DeePMD-kit, taking a gas phase methane molecule as an example. [DeePMD-kit's documentation](https://docs.deepmodeling.org/projects/deepmd/ \n",
-    ") is recommended as the complete reference.\n",
+    "This tutorial will introduce you to the basic usage of the DeePMD-kit, taking a gas phase methane molecule as an example. [DeePMD-kit's documentation](../index.rst) is recommended as the complete reference.\n",
     "\n",
     "The DP model is generated using the DeePMD-kit package (v2.1.5). The training data is converted into the format of DeePMD-kit using a tool named dpdata (v0.2.14). \n",
     "\n",
@@ -129,7 +128,7 @@
    "id": "209c5dd7-983a-468e-9406-652ade04be91",
    "metadata": {},
    "source": [
-    "Folder `abacus_md` is obtained by performing ab-initio molecular dynamics with ABACUS. Detailed instructions on ABACUS can be found in its [document](https://abacus.deepmodeling.com/en/latest/). "
+    "Folder `abacus_md` is obtained by performing ab-initio molecular dynamics with ABACUS. Detailed instructions on ABACUS can be found in its [document](https://abacus.deepmodeling.com/). "
    ]
   },
   {
@@ -456,7 +455,7 @@
    "id": "7b0edb0f-df47-4e6c-8c37-5f32c4bd6b39",
    "metadata": {},
    "source": [
-    "More detailed docs about Data conversion can be found [here](https://docs.deepmodeling.org/projects/deepmd/en/master/data/data-conv.html)."
+    "More detailed docs about Data conversion can be found [here](../data/data-conv.md)."
    ]
   },
   {

From 7fb94ef06154b77ec0a082ec10b2c7baaf617d74 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 7 Nov 2023 12:05:36 +0800
Subject: [PATCH 09/97] [pre-commit.ci] pre-commit autoupdate (#2978)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.3 →
v0.1.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.3...v0.1.4)
- [github.com/pre-commit/mirrors-clang-format: v17.0.3 →
v17.0.4](https://github.com/pre-commit/mirrors-clang-format/compare/v17.0.3...v17.0.4)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ce5cf54d33..245f691ce3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.3
+    rev: v0.1.4
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -53,7 +53,7 @@ repos:
     -   id: blacken-docs
 # C++
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.3
+    rev: v17.0.4
     hooks:
     -   id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc

From ffbdcf89626c0a82701040cfe135fdd8311af2da Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 7 Nov 2023 00:27:39 -0500
Subject: [PATCH 10/97] respect user defined CUDAARCHS (#2979)

Although we set it to `all` by default, one may manually set it via
[CMAKE_CUDA_ARCHITECTURES](https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html)
or
[CUDAARCHS](https://cmake.org/cmake/help/latest/envvar/CUDAARCHS.html#envvar:CUDAARCHS).

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/src/gpu/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index c78da978a2..5eb833e24d 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -5,7 +5,9 @@ if(USE_CUDA_TOOLKIT)
   project(deepmd_op_cuda)
   set(GPU_LIB_NAME deepmd_op_cuda)
 
-  set(CMAKE_CUDA_ARCHITECTURES all)
+  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES all)
+  endif()
   enable_language(CUDA)
   set(CMAKE_CUDA_STANDARD 11)
   add_compile_definitions(

From a40dcaa8c5e42f97f7f6976c18a8cb76337b2ef3 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 7 Nov 2023 00:30:02 -0500
Subject: [PATCH 11/97] lmp: refactor ixnode (#2971)

The current implementation used a complex method to ensure `ixnode`
(int) within `[0, nxnodes-1]`, but why not use mod ("%") to get the same
behavior?

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lmp/pair_deepmd.cpp | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 432077de5b..600c4cae29 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -339,24 +339,10 @@ void PairDeepMD::make_ttm_aparam(vector<double> &daparam) {
       int ixnode = static_cast<int>(xscale * nxnodes);
       int iynode = static_cast<int>(yscale * nynodes);
       int iznode = static_cast<int>(zscale * nznodes);
-      while (ixnode > nxnodes - 1) {
-        ixnode -= nxnodes;
-      }
-      while (iynode > nynodes - 1) {
-        iynode -= nynodes;
-      }
-      while (iznode > nznodes - 1) {
-        iznode -= nznodes;
-      }
-      while (ixnode < 0) {
-        ixnode += nxnodes;
-      }
-      while (iynode < 0) {
-        iynode += nynodes;
-      }
-      while (iznode < 0) {
-        iznode += nznodes;
-      }
+      // https://stackoverflow.com/a/1907585/9567349
+      ixnode = ((ixnode % nxnodes) + nxnodes) % nxnodes;
+      iynode = ((iynode % nynodes) + nynodes) % nynodes;
+      iznode = ((iznode % nznodes) + nznodes) % nznodes;
       daparam[ii] = T_electron[ixnode][iynode][iznode];
     }
   }

From 283e8fe2764d32519d34de8557b7d85181b5ee6c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 8 Nov 2023 00:19:29 -0500
Subject: [PATCH 12/97] bump scikit-build-core to 0.6 (#2981)

Bump scikit-build-core from `>=0.5,<0.6` to `>=0.5,<0.7,!=0.6.0`. (by
default it will install 0.6.1)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 35a11d2163..f8bfbbee79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 requires = [
     # dynamic metadata API is still unstable
     # TODO: unpin the upper bound when it is stable
-    "scikit-build-core>=0.5,<0.6",
+    "scikit-build-core>=0.5,<0.7,!=0.6.0",
     "packaging",
 ]
 build-backend = "backend.dp_backend"

From 43fc073a49932e55ab7a40804336c1632fdeacd5 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 8 Nov 2023 20:04:26 -0500
Subject: [PATCH 13/97] rename `deepmd_cli` to `deepmd_utils` (#2983)

Preparation for more methods in the `deepmd_utils` package.
(`deepmd_cli` is not a good name for this usage)

Enable documentation for the `deepmd_utils` package.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/labeler.yml                  | 2 +-
 .github/workflows/test_cuda.yml      | 2 +-
 .github/workflows/test_python.yml    | 2 +-
 backend/dynamic_metadata.py          | 2 +-
 codecov.yml                          | 2 +-
 deepmd/__init__.py                   | 2 +-
 deepmd/entrypoints/main.py           | 2 +-
 deepmd_cli/__init__.py               | 6 ------
 deepmd_utils/__init__.py             | 6 ++++++
 {deepmd_cli => deepmd_utils}/main.py | 7 ++++++-
 doc/conf.py                          | 3 ++-
 pyproject.toml                       | 4 ++--
 12 files changed, 23 insertions(+), 17 deletions(-)
 delete mode 100644 deepmd_cli/__init__.py
 create mode 100644 deepmd_utils/__init__.py
 rename {deepmd_cli => deepmd_utils}/main.py (98%)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 195d2cd217..049c9badff 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,6 +1,6 @@
 Python:
 - deepmd/**/*
-- deepmd_cli/**/*
+- deepmd_utils/**/*
 - source/tests/**/*
 Docs: doc/**/*
 Examples: examples/**/*
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index ca72a32277..5e754226ae 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -42,7 +42,7 @@ jobs:
         DP_VARIANT: cuda
         CUDA_PATH: /usr/local/cuda-11.8
     - run: dp --version
-    - run: python -m pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0
+    - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0
     - run: source/install/test_cc_local.sh
       env:
         OMP_NUM_THREADS: 1
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 0ac29a7d9b..b6011cb523 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -38,7 +38,7 @@ jobs:
         HOROVOD_WITH_TENSORFLOW: 1
         HOROVOD_WITHOUT_GLOO: 1
     - run: dp --version
-    - run: pytest --cov=deepmd --cov=deepmd_cli source/tests --durations=0
+    - run: pytest --cov=deepmd --cov=deepmd_utils source/tests --durations=0
     - uses: codecov/codecov-action@v3
       with:
         gcov: true
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index 59df7dce81..dad9c5b597 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -27,7 +27,7 @@ def dynamic_metadata(
     _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env()
     if field == "scripts":
         return {
-            "dp": "deepmd_cli.main:main",
+            "dp": "deepmd_utils.main:main",
             **extra_scripts,
         }
     elif field == "optional-dependencies":
diff --git a/codecov.yml b/codecov.yml
index 24dd9e3a23..3654859423 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -20,7 +20,7 @@ component_management:
       name: Python
       paths:
         - deepmd/**
-        - deepmd_cli/**
+        - deepmd_utils/**
     - component_id: module_op
       name: OP
       paths:
diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index b02817b6fc..0190bbc124 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -32,7 +32,7 @@
 set_mkl()
 
 try:
-    from deepmd_cli._version import version as __version__
+    from deepmd_utils._version import version as __version__
 except ImportError:
     from .__about__ import (
         __version__,
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 782136b542..2c6ac26a7f 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -32,7 +32,7 @@
 from deepmd.nvnmd.entrypoints.train import (
     train_nvnmd,
 )
-from deepmd_cli.main import (
+from deepmd_utils.main import (
     get_ll,
     main_parser,
     parse_args,
diff --git a/deepmd_cli/__init__.py b/deepmd_cli/__init__.py
deleted file mode 100644
index d295053965..0000000000
--- a/deepmd_cli/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""This module contains the entry points for DeePMD-kit.
-
-If only printing the help message, this module does not call
-the main DeePMD-kit module to avoid the slow import of TensorFlow.
-"""
diff --git a/deepmd_utils/__init__.py b/deepmd_utils/__init__.py
new file mode 100644
index 0000000000..1c5314bb7e
--- /dev/null
+++ b/deepmd_utils/__init__.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Untilization methods for DeePMD-kit.
+
+The __init__ module should not import any modules
+for performance.
+"""
diff --git a/deepmd_cli/main.py b/deepmd_utils/main.py
similarity index 98%
rename from deepmd_cli/main.py
rename to deepmd_utils/main.py
index bffc1c6911..3dc54db052 100644
--- a/deepmd_cli/main.py
+++ b/deepmd_utils/main.py
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+"""The entry points for DeePMD-kit.
+
+If only printing the help message, this module does not call
+the main DeePMD-kit module to avoid the slow import of TensorFlow.
+"""
 import argparse
 import logging
 import textwrap
@@ -8,7 +13,7 @@
 )
 
 try:
-    from deepmd_cli._version import version as __version__
+    from deepmd_utils._version import version as __version__
 except ImportError:
     __version__ = "unknown"
 
diff --git a/doc/conf.py b/doc/conf.py
index b17ca82fda..63af974a86 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -134,7 +134,7 @@ def run_apidoc(_):
 
     sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
     cur_dir = os.path.abspath(os.path.dirname(__file__))
-    module = os.path.join(cur_dir, "..", "deepmd")
+    module = os.path.join(cur_dir, "..")
     main(
         [
             "-M",
@@ -145,6 +145,7 @@ def run_apidoc(_):
             "-o",
             os.path.join(cur_dir, "api_py"),
             module,
+            "source/*",
             "--force",
         ]
     )
diff --git a/pyproject.toml b/pyproject.toml
index f8bfbbee79..4ba3bb81e1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,7 +82,7 @@ sdist.exclude = [
 ]
 wheel.packages = [
     "deepmd",
-    "deepmd_cli",
+    "deepmd_utils",
 ]
 wheel.py-api = "py37"
 build-dir = "build/{wheel_tag}"
@@ -102,7 +102,7 @@ provider-path = "backend"
 provider = "scikit_build_core.metadata.fancy_pypi_readme"
 
 [[tool.scikit-build.generate]]
-path = "deepmd_cli/_version.py"
+path = "deepmd_utils/_version.py"
 template = '''
 version = "${version}"
 '''

From 4c888d8605910790a508f005202aae293bb0bb53 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 12 Nov 2023 19:41:49 -0500
Subject: [PATCH 14/97] docs: remove lammps.md (#2986)

It has been covered by `quick_start.ipynb` or `lammps-command.md`.
---
 README.md                         | 3 +--
 doc/third-party/index.md          | 3 +--
 doc/third-party/index.rst         | 1 -
 doc/third-party/lammps-command.md | 2 +-
 doc/third-party/lammps.md         | 9 ---------
 5 files changed, 3 insertions(+), 15 deletions(-)
 delete mode 100644 doc/third-party/lammps.md

diff --git a/README.md b/README.md
index 680e187012..a1e9c9484a 100644
--- a/README.md
+++ b/README.md
@@ -135,8 +135,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Node.js interface](doc/inference/nodejs.md)
 - [Integrate with third-party packages](doc/third-party/index.rst)
     - [Use deep potential with ASE](doc/third-party/ase.md)
-    - [Run MD with LAMMPS](doc/third-party/lammps.md)
-    - [LAMMPS commands](doc/third-party/lammps-command.md)
+    - [Run MD with LAMMPS](doc/third-party/lammps-command.md)
     - [Run path-integral MD with i-PI](doc/third-party/ipi.md)
     - [Run MD with GROMACS](doc/third-party/gromacs.md)
     - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md)
diff --git a/doc/third-party/index.md b/doc/third-party/index.md
index 3de01d6944..235337974c 100644
--- a/doc/third-party/index.md
+++ b/doc/third-party/index.md
@@ -3,8 +3,7 @@
 Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details.
 
 - [Use deep potential with ASE](ase.md)
-- [Run MD with LAMMPS](lammps.md)
-- [LAMMPS commands](lammps-command.md)
+- [Run MD with LAMMPS](lammps-command.md)
 - [Run path-integral MD with i-PI](ipi.md)
 - [Run MD with GROMACS](gromacs.md)
 - [Interfaces out of DeePMD-kit](out-of-deepmd-kit.md)
diff --git a/doc/third-party/index.rst b/doc/third-party/index.rst
index 678dfc9315..f88a477fc7 100644
--- a/doc/third-party/index.rst
+++ b/doc/third-party/index.rst
@@ -7,7 +7,6 @@ Note that the model for inference is required to be compatible with the DeePMD-k
    :maxdepth: 1
 
    ase
-   lammps
    lammps-command
    ipi
    gromacs
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index cdfa4b87d6..023345d638 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -1,4 +1,4 @@
-# LAMMPS commands
+# Run MD with LAMMPS
 
 ## units
 All units in LAMMPS except `lj` are supported. `lj` is not supported.
diff --git a/doc/third-party/lammps.md b/doc/third-party/lammps.md
deleted file mode 100644
index 0020db01c5..0000000000
--- a/doc/third-party/lammps.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Run MD with LAMMPS
-
-Running an MD simulation with LAMMPS is simpler. In the LAMMPS input file, one needs to specify the pair style as follows
-
-```lammps
-pair_style     deepmd graph.pb
-pair_coeff     * * O H
-```
-where `graph.pb` is the file name of the frozen model. `pair_coeff` maps atom names (`O H`) with LAMMPS atom types (integers from 1 to Ntypes, i.e. `1 2`).

From 55d678dcf34f78ab2f1eacbc39d1f4ae78b9c316 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 16 Nov 2023 00:30:55 -0500
Subject: [PATCH 15/97] bump CUDA version to 12.2 for pre-built packages
 (#2960)

TensorFlow 2.15 bumps the CUDA version to 12.2. See
https://github.com/tensorflow/tensorflow/commit/3de44168950a5972ba4cfa7e3c6cbf4cffa67fe6.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/build_cc.yml        |  2 +-
 .github/workflows/test_cuda.yml       | 13 +++++++------
 backend/find_tensorflow.py            |  9 +++++++++
 doc/install/easy-install-dev.md       |  4 ++--
 doc/install/easy-install.md           |  8 ++++----
 doc/install/install-from-c-library.md |  2 +-
 pyproject.toml                        |  9 ++++-----
 source/install/docker/Dockerfile      |  2 +-
 source/install/docker_package_c.sh    |  2 +-
 9 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index 964a11ce37..e6377f4fab 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -37,7 +37,7 @@ jobs:
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
          && sudo apt-get update \
-         && sudo apt-get -y install cuda-cudart-dev-12-0 cuda-nvcc-12-0
+         && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2
       if: matrix.variant == 'cuda120'
       env:
         DEBIAN_FRONTEND: noninteractive
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 5e754226ae..d8eddaa44f 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: nvidia
     # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845
     container:
-      image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+      image: nvidia/cuda:12.2.0-devel-ubuntu22.04
       options: --gpus all
     if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch'
     steps:
@@ -31,16 +31,17 @@ jobs:
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
          && sudo apt-get update \
-         && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8
+         && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2
       if: false  # skip as we use nvidia image
     - name: Set PyPI mirror for Aliyun cloud machine
       run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/
     - run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
-    - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+    - run: python -m pip install "tensorflow>=2.15.0rc0"
+    - run: python -m pip install -v -e .[gpu,test,lmp,cu12] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
         DP_BUILD_TESTING: 1
         DP_VARIANT: cuda
-        CUDA_PATH: /usr/local/cuda-11.8
+        CUDA_PATH: /usr/local/cuda-12.2
     - run: dp --version
     - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0
     - run: source/install/test_cc_local.sh
@@ -52,7 +53,7 @@ jobs:
         CMAKE_GENERATOR: Ninja
         DP_VARIANT: cuda
         DP_USE_MPICH2: 1
-        CUDA_PATH: /usr/local/cuda-11.8
+        CUDA_PATH: /usr/local/cuda-12.2
     - run: |
         export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
         export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
@@ -63,7 +64,7 @@ jobs:
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
-        CUDA_PATH: /usr/local/cuda-11.8
+        CUDA_PATH: /usr/local/cuda-12.2
     - uses: codecov/codecov-action@v3
       with:
         gcov: true
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index 6d7ce5087d..fbbe0e56c0 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -87,6 +87,13 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
         # TypeError if submodule_search_locations are None
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
+        if os.environ.get("CIBUILDWHEEL", "0") == "1":
+            # CUDA 12.2
+            requires.extend(
+                [
+                    "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'",
+                ]
+            )
         requires.extend(get_tf_requirement()["cpu"])
         # setuptools will re-find tensorflow after installing setup_requires
         tf_install_dir = None
@@ -129,6 +136,8 @@ def get_tf_requirement(tf_version: str = "") -> dict:
             "cpu": [
                 "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')",
                 "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')",
+                # https://github.com/tensorflow/tensorflow/issues/61830
+                "tensorflow-cpu<2.15; platform_system=='Windows'",
                 *extra_requires,
             ],
             "gpu": [
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index dd943c37af..f3d4fa1a32 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -17,10 +17,10 @@ docker pull ghcr.io/deepmodeling/deepmd-kit:devel
 Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
 
 ```sh
-pip install -U --pre deepmd-kit[gpu,cu11,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
+pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
 ```
 
-`cu11` and `lmp` are optional, which is the same as the stable version.
+`cu12` and `lmp` are optional, which is the same as the stable version.
 
 ## Download pre-compiled C Library
 
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index f033310f8f..7bd632694b 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -84,13 +84,13 @@ docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021
 
 ## Install Python interface with pip
 
-If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 11 supported:
+If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported:
 
 ```bash
-pip install deepmd-kit[gpu,cu11]
+pip install deepmd-kit[gpu,cu12]
 ```
 
-`cu11` is required only when CUDA Toolkit and cuDNN were not installed.
+`cu12` is required only when CUDA Toolkit and cuDNN were not installed.
 
 Or install the CPU version without CUDA supported:
 ```bash
@@ -99,7 +99,7 @@ pip install deepmd-kit[cpu]
 
 [The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras:
 ```bash
-pip install deepmd-kit[gpu,cu11,lmp,ipi]
+pip install deepmd-kit[gpu,cu12,lmp,ipi]
 ```
 MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
 
diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md
index 343446888c..04b71234db 100644
--- a/doc/install/install-from-c-library.md
+++ b/doc/install/install-from-c-library.md
@@ -2,7 +2,7 @@
 
 DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own.
 
-The library is built in Linux (GLIBC 2.17) with CUDA 11.8. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
+The library is built in Linux (GLIBC 2.17) with CUDA 12.2. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
 
 ## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch
 
diff --git a/pyproject.toml b/pyproject.toml
index 4ba3bb81e1..e9ee563960 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ classifiers = [
     "Programming Language :: C",
     "Programming Language :: C++",
     "Programming Language :: Python :: 3 :: Only",
-    "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+    "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.2",
     "Intended Audience :: Science/Research",
     "Programming Language :: Python :: 3.7",
     "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
@@ -133,9 +133,8 @@ test-command = [
 test-extras = ["cpu", "test", "lmp", "ipi"]
 build = ["cp310-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
-# TODO: bump to "latest" tag when CUDA supports GCC 12
-manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
-manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:2022-11-19-1b19e81"
+manylinux-x86_64-image = "manylinux_2_28"
+manylinux-aarch64-image = "manylinux_2_28"
 
 [tool.cibuildwheel.macos]
 environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1" }
@@ -152,7 +151,7 @@ repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2
 environment-pass = ["CIBW_BUILD", "DP_VARIANT"]
 environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
 before-all = [
-    """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-11-8 cuda-cudart-devel-11-8; fi }""",
+    """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-12-2 cuda-cudart-devel-12-2; fi }""",
     "yum install -y mpich-devel",
 ]
 
diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile
index c5fa878e2a..9ac905dcd0 100644
--- a/source/install/docker/Dockerfile
+++ b/source/install/docker/Dockerfile
@@ -4,7 +4,7 @@ RUN python -m venv /opt/deepmd-kit
 ENV PATH="/opt/deepmd-kit/bin:$PATH"
 # Install package
 COPY dist /dist
-RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu11,lmp,ipi]" \
+RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu12,lmp,ipi]" \
     && dp -h \
     && lmp -h \
     && dp_ipi \
diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh
index d6fb269acd..75f2d1138b 100755
--- a/source/install/docker_package_c.sh
+++ b/source/install/docker_package_c.sh
@@ -3,7 +3,7 @@ set -e
 SCRIPT_PATH=$(dirname $(realpath -s $0))
 
 docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \
-	tensorflow/build:2.13-python3.11 \
+	tensorflow/build:2.15-python3.11 \
 	/bin/sh -c "pip install tensorflow cmake \
             && cd /root/deepmd-kit/source/install \
             && CC=/dt9/usr/bin/gcc \

From 6e9a57548b2949cb04542be94059cfc8c992116f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 17 Nov 2023 17:32:49 -0500
Subject: [PATCH 16/97] [pre-commit.ci] pre-commit autoupdate (#2990)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.4 →
v0.1.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.4...v0.1.5)
- [github.com/scop/pre-commit-shfmt: v3.7.0-1 →
v3.7.0-2](https://github.com/scop/pre-commit-shfmt/compare/v3.7.0-1...v3.7.0-2)
<!--pre-commit.ci end-->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 245f691ce3..8a8e98cac7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.4
+    rev: v0.1.5
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -64,9 +64,9 @@ repos:
     -   id: csslint
 # Shell
 - repo: https://github.com/scop/pre-commit-shfmt
-  rev: v3.7.0-1
+  rev: v3.7.0-3
   hooks:
-    - id: shfmt
+    - id: shfmt-src
 # CMake
 - repo: https://github.com/cheshirekow/cmake-format-precommit
   rev: v0.6.13

From 87d9ffbc2d80c48119b54d05d3c33be87bd77505 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 19 Nov 2023 19:50:10 -0500
Subject: [PATCH 17/97] add unit tests for LAMMPS fparam/aparam keywords
 (#2998)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 source/lmp/tests/test_lammps_faparam.py | 223 ++++++++++++++++++++++++
 1 file changed, 223 insertions(+)
 create mode 100644 source/lmp/tests/test_lammps_faparam.py

diff --git a/source/lmp/tests/test_lammps_faparam.py b/source/lmp/tests/test_lammps_faparam.py
new file mode 100644
index 0000000000..064928eeb1
--- /dev/null
+++ b/source/lmp/tests/test_lammps_faparam.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Test LAMMPS fparam and aparam input."""
+import os
+import subprocess as sp
+import sys
+from pathlib import (
+    Path,
+)
+
+import constants
+import numpy as np
+import pytest
+from lammps import (
+    PyLammps,
+)
+from write_lmp_data import (
+    write_lmp_data,
+)
+
+pbtxt_file = (
+    Path(__file__).parent.parent.parent / "tests" / "infer" / "fparam_aparam.pbtxt"
+)
+pb_file = Path(__file__).parent / "fparam_aparam.pb"
+system_file = Path(__file__).parent.parent.parent / "tests"
+data_file = Path(__file__).parent / "data.lmp"
+md_file = Path(__file__).parent / "md.out"
+
+# from api_cc/tests/test_deeppot_a_fparam_aparam.cc
+expected_ae = np.array(
+    [
+        -1.038271183039953804e-01,
+        -7.285433575272914908e-02,
+        -9.467600174099155552e-02,
+        -1.467050086239614082e-01,
+        -7.660561620618722145e-02,
+        -7.277295998502930630e-02,
+    ]
+)
+expected_e = np.sum(expected_ae)
+expected_f = np.array(
+    [
+        6.622266817497907132e-02,
+        5.278739055693523058e-02,
+        2.265727495541422845e-02,
+        -2.606047850915838363e-02,
+        -4.538811686410718776e-02,
+        1.058247569147072187e-02,
+        1.679392490937766935e-01,
+        -2.257828022687320690e-03,
+        -4.490145670355452645e-02,
+        -1.148364103573685929e-01,
+        -1.169790466695089237e-02,
+        6.140402504113953025e-02,
+        -8.078778132132799494e-02,
+        -5.838878056243369807e-02,
+        6.773639989682191109e-02,
+        -1.247724708090079161e-02,
+        6.494523955924384750e-02,
+        -1.174787188812918687e-01,
+    ]
+).reshape(6, 3)
+
+expected_v = -np.array(
+    [
+        -1.589185553287162656e-01,
+        2.586163333170100279e-03,
+        -1.575127933809472624e-04,
+        -1.855360380105876630e-02,
+        1.949822090859933826e-02,
+        -1.006552056166355388e-02,
+        3.177029853276916449e-02,
+        1.714349636720383010e-03,
+        -1.290389175187874483e-03,
+        -8.553510339477603253e-02,
+        -5.654637257232508415e-03,
+        -1.286954833787038420e-02,
+        2.464156457499515687e-02,
+        -2.398202886026797043e-02,
+        -1.957110465239037672e-02,
+        2.233492928605742764e-02,
+        6.107843207824020099e-03,
+        1.707078295947736047e-03,
+        -1.653994088976195043e-01,
+        3.894358678172111371e-02,
+        -2.169595969759342477e-02,
+        6.819704294738503786e-03,
+        -5.018242039618424008e-03,
+        2.640664428663210429e-03,
+        -1.985298275686078057e-03,
+        -3.638421609610945767e-02,
+        2.342932331075030239e-02,
+        -8.501331914753691710e-02,
+        -2.181253413538992297e-03,
+        4.311300069651782287e-03,
+        -1.910329328333908129e-03,
+        -1.808810159508548836e-03,
+        -1.540075281450827612e-03,
+        -1.173703213175551763e-02,
+        -2.596306629910121507e-03,
+        6.705025662372287101e-03,
+        -9.038455005073858795e-02,
+        3.011717773578577451e-02,
+        -5.083054073419784880e-02,
+        -2.951210292616929069e-03,
+        2.342445652898489383e-02,
+        -4.091207474993674431e-02,
+        -1.648470649301832236e-02,
+        -2.872261885460645689e-02,
+        4.763924972552112391e-02,
+        -8.300036532764677732e-02,
+        1.020429228955421243e-03,
+        -1.026734151199098881e-03,
+        5.678534096113684732e-02,
+        1.273635718045938205e-02,
+        -1.530143225195957322e-02,
+        -1.061671865629566225e-01,
+        -2.486859433265622629e-02,
+        2.875323131744185121e-02,
+    ]
+).reshape(6, 9)
+
+box = np.array([0, 13, 0, 13, 0, 13, 0, 0, 0])
+coord = np.array(
+    [
+        [12.83, 2.56, 2.18],
+        [12.09, 2.87, 2.74],
+        [0.25, 3.32, 1.68],
+        [3.36, 3.00, 1.81],
+        [3.51, 2.51, 2.60],
+        [4.27, 3.22, 1.56],
+    ]
+)
+type_OH = np.array([1, 1, 1, 1, 1, 1])
+
+
+sp.check_output(
+    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
+        sys.executable,
+        pbtxt_file.resolve(),
+        pb_file.resolve(),
+    ).split()
+)
+
+
+def setup_module():
+    write_lmp_data(box, coord, type_OH, data_file)
+
+
+def teardown_module():
+    os.remove(data_file)
+
+
+def _lammps(data_file, units="metal") -> PyLammps:
+    lammps = PyLammps()
+    lammps.units(units)
+    lammps.boundary("p p p")
+    lammps.atom_style("atomic")
+    if units == "metal" or units == "real":
+        lammps.neighbor("2.0 bin")
+    elif units == "si":
+        lammps.neighbor("2.0e-10 bin")
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.neigh_modify("every 10 delay 0 check no")
+    lammps.read_data(data_file.resolve())
+    if units == "metal" or units == "real":
+        lammps.mass("1 16")
+    elif units == "si":
+        lammps.mass("1 %.10e" % (16 * constants.mass_metal2si))
+    else:
+        raise ValueError("units should be metal, real, or si")
+    if units == "metal":
+        lammps.timestep(0.0005)
+    elif units == "real":
+        lammps.timestep(0.5)
+    elif units == "si":
+        lammps.timestep(5e-16)
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.fix("1 all nve")
+    return lammps
+
+
+@pytest.fixture
+def lammps():
+    lmp = _lammps(data_file=data_file)
+    yield lmp
+    lmp.close()
+
+
+def test_pair_deepmd(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()} fparam 0.25852028 aparam 0.25852028")
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    lammps.run(1)
+
+
+def test_pair_deepmd_virial(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()} fparam 0.25852028 aparam 0.25852028")
+    lammps.pair_coeff("* *")
+    lammps.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    idx_map = lammps.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])

From a342e94b4f71824e0a66b220646eec0643ad2b67 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 19 Nov 2023 19:51:02 -0500
Subject: [PATCH 18/97] fix restarting from compressed training with type
 embedding (#2996)

Fix #2989.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/dos.py                      |  4 +-
 deepmd/model/ener.py                     |  4 +-
 deepmd/model/model.py                    | 54 ++++++++++++++++++++
 deepmd/model/multi.py                    |  4 +-
 deepmd/model/pairwise_dprc.py            |  4 +-
 deepmd/model/tensor.py                   |  4 +-
 deepmd/utils/type_embed.py               |  8 +--
 source/tests/test_compressed_training.py | 63 ++++++++++++++++++++++++
 8 files changed, 133 insertions(+), 12 deletions(-)
 create mode 100644 source/tests/test_compressed_training.py

diff --git a/deepmd/model/dos.py b/deepmd/model/dos.py
index 697fad9a9e..22e291a0f0 100644
--- a/deepmd/model/dos.py
+++ b/deepmd/model/dos.py
@@ -155,10 +155,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                frz_model=frz_model,
+                ckpt_meta=ckpt_meta,
             )
             input_dict["type_embedding"] = type_embedding
         input_dict["atype"] = atype_
diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index 1976c1ad51..0d8d66b305 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -203,10 +203,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None and "type_embedding" not in input_dict:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                ckpt_meta=ckpt_meta,
+                frz_model=frz_model,
             )
             input_dict["type_embedding"] = type_embedding
         # spin if any
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index 3f24e42aec..dd439056b4 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -331,6 +331,60 @@ def build_descrpt(
             self.descrpt.pass_tensors_from_frz_model(*imported_tensors[:-1])
         return dout
 
+    def build_type_embedding(
+        self,
+        ntypes: int,
+        frz_model: Optional[str] = None,
+        ckpt_meta: Optional[str] = None,
+        suffix: str = "",
+        reuse: Optional[Union[bool, Enum]] = None,
+    ) -> tf.Tensor:
+        """Build the type embedding part of the model.
+
+        Parameters
+        ----------
+        ntypes : int
+            The number of types
+        frz_model : str, optional
+            The path to the frozen model
+        ckpt_meta : str, optional
+            The path prefix of the checkpoint and meta files
+        suffix : str, optional
+            The suffix of the scope
+        reuse : bool or tf.AUTO_REUSE, optional
+            Whether to reuse the variables
+
+        Returns
+        -------
+        tf.Tensor
+            The type embedding tensor
+        """
+        assert self.typeebd is not None
+        if frz_model is None and ckpt_meta is None:
+            dout = self.typeebd.build(
+                ntypes,
+                reuse=reuse,
+                suffix=suffix,
+            )
+        else:
+            # nothing input
+            feed_dict = {}
+            return_elements = [
+                f"t_typeebd{suffix}:0",
+            ]
+            if frz_model is not None:
+                imported_tensors = self._import_graph_def_from_frz_model(
+                    frz_model, feed_dict, return_elements
+                )
+            elif ckpt_meta is not None:
+                imported_tensors = self._import_graph_def_from_ckpt_meta(
+                    ckpt_meta, feed_dict, return_elements
+                )
+            else:
+                raise RuntimeError("should not reach here")  # pragma: no cover
+            dout = imported_tensors[-1]
+        return dout
+
     def _import_graph_def_from_frz_model(
         self, frz_model: str, feed_dict: dict, return_elements: List[str]
     ):
diff --git a/deepmd/model/multi.py b/deepmd/model/multi.py
index bfc67b9792..83b231c0e8 100644
--- a/deepmd/model/multi.py
+++ b/deepmd/model/multi.py
@@ -317,10 +317,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                frz_model=frz_model,
+                ckpt_meta=ckpt_meta,
             )
             input_dict["type_embedding"] = type_embedding
         input_dict["atype"] = atype_
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 6983a31cfd..f74571febb 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -173,10 +173,12 @@ def build(
         atype_qmmm = gather_placeholder(atype_qmmm, forward_qmmm_map, placeholder=-1)
         box_qm = box
 
-        type_embedding = self.typeebd.build(
+        type_embedding = self.build_type_embedding(
             self.ntypes,
             reuse=reuse,
             suffix=suffix,
+            frz_model=frz_model,
+            ckpt_meta=ckpt_meta,
         )
         input_dict_qm["type_embedding"] = type_embedding
         input_dict_qmmm["type_embedding"] = type_embedding
diff --git a/deepmd/model/tensor.py b/deepmd/model/tensor.py
index 9099b753a4..6a21e085f3 100644
--- a/deepmd/model/tensor.py
+++ b/deepmd/model/tensor.py
@@ -135,10 +135,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                ckpt_meta=ckpt_meta,
+                frz_model=frz_model,
             )
             input_dict["type_embedding"] = type_embedding
             input_dict["atype"] = atype_
diff --git a/deepmd/utils/type_embed.py b/deepmd/utils/type_embed.py
index aadbb3c6e0..c8ab01f7f5 100644
--- a/deepmd/utils/type_embed.py
+++ b/deepmd/utils/type_embed.py
@@ -16,7 +16,6 @@
     nvnmd_cfg,
 )
 from deepmd.utils.graph import (
-    get_tensor_by_name_from_graph,
     get_type_embedding_net_variables_from_graph_def,
 )
 from deepmd.utils.network import (
@@ -109,7 +108,6 @@ def __init__(
         self.trainable = trainable
         self.uniform_seed = uniform_seed
         self.type_embedding_net_variables = None
-        self.type_embedding_from_graph = None
         self.padding = padding
         self.model_type = None
 
@@ -135,8 +133,6 @@ def build(
         embedded_types
             The computational graph for embedded types
         """
-        if self.model_type is not None and self.model_type == "compressed_model":
-            return self.type_embedding_from_graph
         types = tf.convert_to_tensor(list(range(ntypes)), dtype=tf.int32)
         ebd_type = tf.cast(
             tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)),
@@ -166,7 +162,7 @@ def build(
         if self.padding:
             last_type = tf.cast(tf.zeros([1, self.neuron[-1]]), self.filter_precision)
             ebd_type = tf.concat([ebd_type, last_type], 0)  # (ntypes + 1) * neuron[-1]
-        self.ebd_type = tf.identity(ebd_type, name="t_typeebd")
+        self.ebd_type = tf.identity(ebd_type, name="t_typeebd" + suffix)
         return self.ebd_type
 
     def init_variables(
@@ -193,5 +189,3 @@ def init_variables(
         self.type_embedding_net_variables = (
             get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
         )
-        type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd")
-        self.type_embedding_from_graph = tf.convert_to_tensor(type_embedding)
diff --git a/source/tests/test_compressed_training.py b/source/tests/test_compressed_training.py
new file mode 100644
index 0000000000..0a0bbeaadf
--- /dev/null
+++ b/source/tests/test_compressed_training.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+
+# from deepmd.entrypoints.compress import compress
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+from packaging.version import parse as parse_version
+
+from deepmd.env import (
+    tf,
+)
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("2"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestCompressedTrainingSeAtten(unittest.TestCase):
+    def setUp(self) -> None:
+        data_file = str(tests_path / os.path.join("model_compression", "data"))
+        self.input_file = str(tests_path / "input.json")
+        self.frozen_model = str(tests_path / "dp-compress-training-original.pb")
+        self.compressed_model = str(tests_path / "dp-compress-training-compressed.pb")
+        self.frozen_compress_training_model = str(
+            tests_path / "dp-compress-training-compress-training.pb"
+        )
+        self.ckpt_file = str(tests_path / "dp-compress-training.ckpt")
+        self.checkpoint_dir = str(tests_path)
+        jdata = j_loader(
+            str(tests_path / os.path.join("model_compression", "input.json"))
+        )
+        jdata["model"]["descriptor"] = {}
+        jdata["model"]["descriptor"]["type"] = "se_atten_v2"
+        jdata["model"]["descriptor"]["sel"] = 20
+        jdata["model"]["descriptor"]["attn_layer"] = 0
+        jdata["training"]["training_data"]["systems"] = data_file
+        jdata["training"]["validation_data"]["systems"] = data_file
+        jdata["training"]["save_ckpt"] = self.ckpt_file
+        with open(self.input_file, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+
+    def test_compressed_training(self):
+        run_dp(f"dp train {self.input_file}")
+        run_dp(f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_model}")
+        run_dp(f"dp compress -i {self.frozen_model} -o {self.compressed_model}")
+        # compress training
+        run_dp(f"dp train {self.input_file} -f {self.compressed_model}")
+        # restart compress training
+        run_dp(f"dp train {self.input_file} -r {self.ckpt_file}")
+        # freeze compress training
+        run_dp(
+            f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_compress_training_model}"
+        )
+        # it should not be able to compress again
+        with self.assertRaises(RuntimeError):
+            run_dp(
+                f"dp compress -i {self.frozen_compress_training_model} -o {self.compressed_model}"
+            )

From af6eb43053655397116ab4e14c35dd1334501576 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Nov 2023 09:54:03 +0800
Subject: [PATCH 19/97] Bump docker/build-push-action from 5.0.0 to 5.1.0
 (#2999)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[docker/build-push-action](https://github.com/docker/build-push-action)
from 5.0.0 to 5.1.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/docker/build-push-action/releases">docker/build-push-action's
releases</a>.</em></p>
<blockquote>
<h2>v5.1.0</h2>
<ul>
<li>Add <code>annotations</code> input by <a
href="https://github.com/crazy-max"><code>@​crazy-max</code></a> in <a
href="https://redirect.github.com/docker/build-push-action/pull/992">docker/build-push-action#992</a></li>
<li>Add <code>secret-envs</code> input by <a
href="https://github.com/elias-lundgren"><code>@​elias-lundgren</code></a>
in <a
href="https://redirect.github.com/docker/build-push-action/pull/980">docker/build-push-action#980</a></li>
<li>Bump <code>@​babel/traverse</code> from 7.17.3 to 7.23.2 in <a
href="https://redirect.github.com/docker/build-push-action/pull/991">docker/build-push-action#991</a></li>
<li>Bump <code>@​docker/actions-toolkit</code> from 0.13.0-rc.1 to
0.14.0 in <a
href="https://redirect.github.com/docker/build-push-action/pull/990">docker/build-push-action#990</a>
<a
href="https://redirect.github.com/docker/build-push-action/pull/1006">docker/build-push-action#1006</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/docker/build-push-action/compare/v5.0.0...v5.1.0">https://github.com/docker/build-push-action/compare/v5.0.0...v5.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/docker/build-push-action/commit/4a13e500e55cf31b7a5d59a38ab2040ab0f42f56"><code>4a13e50</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/build-push-action/issues/1006">#1006</a>
from docker/dependabot/npm_and_yarn/docker/actions-t...</li>
<li><a
href="https://github.com/docker/build-push-action/commit/74166686865cdc289a02f214871fb53447b73447"><code>7416668</code></a>
chore: update generated content</li>
<li><a
href="https://github.com/docker/build-push-action/commit/b4f76a5dc6a67282180eddc6d460f23bc97bfcbc"><code>b4f76a5</code></a>
chore(deps): Bump <code>@​docker/actions-toolkit</code> from 0.13.0 to
0.14.0</li>
<li><a
href="https://github.com/docker/build-push-action/commit/b7feb766fae338d85274c87a9d0f24c09690dbe2"><code>b7feb76</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/build-push-action/issues/1005">#1005</a>
from crazy-max/ci-inspect</li>
<li><a
href="https://github.com/docker/build-push-action/commit/fae8018297c67066fff64a6e9c319c86f89b8982"><code>fae8018</code></a>
ci: inspect sbom and provenance</li>
<li><a
href="https://github.com/docker/build-push-action/commit/b625868b13c3feb675cabbf9bfeb52ae94166606"><code>b625868</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/build-push-action/issues/1004">#1004</a>
from crazy-max/ci-update-buildx</li>
<li><a
href="https://github.com/docker/build-push-action/commit/5193ef1da6ea0d66de97d22817c258b203ade96a"><code>5193ef1</code></a>
ci: update buildx to latest</li>
<li><a
href="https://github.com/docker/build-push-action/commit/d3afd779e409ac26db5374fb27fe4aae9f6adb42"><code>d3afd77</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/build-push-action/issues/991">#991</a>
from docker/dependabot/npm_and_yarn/babel/traverse-7....</li>
<li><a
href="https://github.com/docker/build-push-action/commit/7a786bb2b9408f7f997564f677248fabd4b886d5"><code>7a786bb</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/build-push-action/issues/992">#992</a>
from crazy-max/annotations</li>
<li><a
href="https://github.com/docker/build-push-action/commit/c66ae3adcfbf698ecd851c6bb782654a0c6ffcae"><code>c66ae3a</code></a>
chore: update generated content</li>
<li>Additional commits viewable in <a
href="https://github.com/docker/build-push-action/compare/0565240e2d4ab88bba5387d719585280857ece09...4a13e500e55cf31b7a5d59a38ab2040ab0f42f56">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=5.0.0&new-version=5.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 84c8ac4b74..2ff3ade215 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -129,7 +129,7 @@ jobs:
           images: ghcr.io/deepmodeling/deepmd-kit
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09
+        uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56
         with:
           context: source/install/docker
           push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }}

From 66259b18437715f12e58967e43226236e69f0bbc Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 20 Nov 2023 21:07:07 -0500
Subject: [PATCH 20/97] docs: document horovod on Conda-Forge (#3001)

Recently I contributed Horovod to conda-forge at
https://github.com/conda-forge/staged-recipes/pull/24472. I have tested
it with DeePMD-kit. Now the Conda-forge channel has all the features in
our deepmodeling channel and even more available platforms.

I wonder if we should recommend Conda-forge in favor of our DeepModeling
channel and use it for the offline installer. In 2019, Conda-forge
lacked support for TensorFlow, so we hosted and built our own channel
against the Anaconda channel. But now Conda-forge has become a mature
channel and has better support for TensorFlow than the Anaconda channel.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/install/easy-install.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index 7bd632694b..741ef632a8 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -58,7 +58,7 @@ conda activate deepmd
 DeePMD-kit is also available on the [conda-forge](https://conda-forge.org/) channel:
 
 ```bash
-conda create -n deepmd deepmd-kit lammps -c conda-forge
+conda create -n deepmd deepmd-kit lammps horovod -c conda-forge
 ```
 
 The supported platform includes Linux x86-64, macOS x86-64, and macOS arm64.

From 4d1c5fd4d11d0efe5fa645477e4970d050fda3bd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Nov 2023 10:07:28 +0800
Subject: [PATCH 21/97] [pre-commit.ci] pre-commit autoupdate (#3000)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.5 →
v0.1.6](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.5...v0.1.6)
- [github.com/pre-commit/mirrors-clang-format: v17.0.4 →
v17.0.5](https://github.com/pre-commit/mirrors-clang-format/compare/v17.0.4...v17.0.5)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8a8e98cac7..fd218813f8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.5
+    rev: v0.1.6
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -53,7 +53,7 @@ repos:
     -   id: blacken-docs
 # C++
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.4
+    rev: v17.0.5
     hooks:
     -   id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc

From f32104675e188891d4c1ab137d6ca6401b709f72 Mon Sep 17 00:00:00 2001
From: nahso <47053538+nahso@users.noreply.github.com>
Date: Fri, 24 Nov 2023 09:20:51 +0800
Subject: [PATCH 22/97] Add the missing initializations for extra embedding
 variables (#3005)

---
 deepmd/descriptor/se_a.py                     |  41 ++--
 deepmd/descriptor/se_atten.py                 |  49 ++---
 deepmd/utils/compress.py                      |   6 +-
 deepmd/utils/graph.py                         |  85 ++++++++
 source/tests/test_init_frz_model_se_a_tebd.py | 196 ++++++++++++++++++
 5 files changed, 331 insertions(+), 46 deletions(-)
 create mode 100644 source/tests/test_init_frz_model_se_a_tebd.py

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 2de0b63245..6b0a7f7ab1 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -41,6 +41,8 @@
     GraphWithoutTensorError,
 )
 from deepmd.utils.graph import (
+    get_extra_embedding_net_suffix,
+    get_extra_embedding_net_variables_from_graph_def,
     get_pattern_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
 )
@@ -204,7 +206,7 @@ def __init__(
         self.type_one_side = type_one_side
         self.spin = spin
         self.stripped_type_embedding = stripped_type_embedding
-        self.extra_embeeding_net_variables = None
+        self.extra_embedding_net_variables = None
         self.layer_size = len(neuron)
 
         # extend sel_a for spin system
@@ -470,11 +472,13 @@ def enable_compression(
             )
 
         if self.stripped_type_embedding:
+            one_side_suffix = get_extra_embedding_net_suffix(type_one_side=True)
+            two_side_suffix = get_extra_embedding_net_suffix(type_one_side=False)
             ret_two_side = get_pattern_nodes_from_graph_def(
-                graph_def, f"filter_type_all{suffix}/.+_two_side_ebd"
+                graph_def, f"filter_type_all{suffix}/.+{two_side_suffix}"
             )
             ret_one_side = get_pattern_nodes_from_graph_def(
-                graph_def, f"filter_type_all{suffix}/.+_one_side_ebd"
+                graph_def, f"filter_type_all{suffix}/.+{one_side_suffix}"
             )
             if len(ret_two_side) == 0 and len(ret_one_side) == 0:
                 raise RuntimeError(
@@ -487,19 +491,19 @@ def enable_compression(
             elif len(ret_two_side) != 0:
                 self.final_type_embedding = get_two_side_type_embedding(self, graph)
                 self.matrix = get_extra_side_embedding_net_variable(
-                    self, graph_def, "two_side", "matrix", suffix
+                    self, graph_def, two_side_suffix, "matrix", suffix
                 )
                 self.bias = get_extra_side_embedding_net_variable(
-                    self, graph_def, "two_side", "bias", suffix
+                    self, graph_def, two_side_suffix, "bias", suffix
                 )
                 self.extra_embedding = make_data(self, self.final_type_embedding)
             else:
                 self.final_type_embedding = get_type_embedding(self, graph)
                 self.matrix = get_extra_side_embedding_net_variable(
-                    self, graph_def, "one_side", "matrix", suffix
+                    self, graph_def, one_side_suffix, "matrix", suffix
                 )
                 self.bias = get_extra_side_embedding_net_variable(
-                    self, graph_def, "one_side", "bias", suffix
+                    self, graph_def, one_side_suffix, "bias", suffix
                 )
                 self.extra_embedding = make_data(self, self.final_type_embedding)
 
@@ -961,20 +965,21 @@ def _filter_lower(
 
                 if not self.compress:
                     if self.type_one_side:
-                        one_side_type_embedding_suffix = "_one_side_ebd"
                         net_output = embedding_net(
                             type_embedding,
                             self.filter_neuron,
                             self.filter_precision,
                             activation_fn=activation_fn,
                             resnet_dt=self.filter_resnet_dt,
-                            name_suffix=one_side_type_embedding_suffix,
+                            name_suffix=get_extra_embedding_net_suffix(
+                                self.type_one_side
+                            ),
                             stddev=stddev,
                             bavg=bavg,
                             seed=self.seed,
                             trainable=trainable,
                             uniform_seed=self.uniform_seed,
-                            initial_variables=self.extra_embeeding_net_variables,
+                            initial_variables=self.extra_embedding_net_variables,
                             mixed_prec=self.mixed_prec,
                         )
                         net_output = tf.nn.embedding_lookup(
@@ -1004,20 +1009,21 @@ def _filter_lower(
                         index_of_two_side = tf.reshape(idx, [-1])
                         self.extra_embedding_index = index_of_two_side
 
-                        two_side_type_embedding_suffix = "_two_side_ebd"
                         net_output = embedding_net(
                             two_side_type_embedding,
                             self.filter_neuron,
                             self.filter_precision,
                             activation_fn=activation_fn,
                             resnet_dt=self.filter_resnet_dt,
-                            name_suffix=two_side_type_embedding_suffix,
+                            name_suffix=get_extra_embedding_net_suffix(
+                                self.type_one_side
+                            ),
                             stddev=stddev,
                             bavg=bavg,
                             seed=self.seed,
                             trainable=trainable,
                             uniform_seed=self.uniform_seed,
-                            initial_variables=self.extra_embeeding_net_variables,
+                            initial_variables=self.extra_embedding_net_variables,
                             mixed_prec=self.mixed_prec,
                         )
                         net_output = tf.nn.embedding_lookup(net_output, idx)
@@ -1327,6 +1333,15 @@ def init_variables(
                 self.dstd = new_dstd
                 if self.original_sel is None:
                     self.original_sel = sel
+        if self.stripped_type_embedding:
+            self.extra_embedding_net_variables = (
+                get_extra_embedding_net_variables_from_graph_def(
+                    graph_def,
+                    suffix,
+                    get_extra_embedding_net_suffix(self.type_one_side),
+                    self.layer_size,
+                )
+            )
 
     @property
     def explicit_ntypes(self) -> bool:
diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 8e4c3c3ef6..1ceda23065 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -42,9 +42,10 @@
 )
 from deepmd.utils.graph import (
     get_attention_layer_variables_from_graph_def,
+    get_extra_embedding_net_suffix,
+    get_extra_embedding_net_variables_from_graph_def,
     get_pattern_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
-    get_tensor_by_type,
 )
 from deepmd.utils.network import (
     embedding_net,
@@ -391,11 +392,12 @@ def enable_compression(
             raise RuntimeError("can not compress model when attention layer is not 0.")
 
         ret = get_pattern_nodes_from_graph_def(
-            graph_def, f"filter_type_all{suffix}/.+_two_side_ebd"
+            graph_def,
+            f"filter_type_all{suffix}/.+{get_extra_embedding_net_suffix(type_one_side=False)}",
         )
         if len(ret) == 0:
             raise RuntimeError(
-                "can not find variables of embedding net `*_two_side_ebd` from graph_def, maybe it is not a compressible model."
+                f"can not find variables of embedding net `*{get_extra_embedding_net_suffix(type_one_side=False)}` from graph_def, maybe it is not a compressible model."
             )
 
         self.compress = True
@@ -420,11 +422,12 @@ def enable_compression(
         )
 
         self.final_type_embedding = get_two_side_type_embedding(self, graph)
+        type_side_suffix = get_extra_embedding_net_suffix(type_one_side=False)
         self.matrix = get_extra_side_embedding_net_variable(
-            self, graph_def, "two_side", "matrix", suffix
+            self, graph_def, type_side_suffix, "matrix", suffix
         )
         self.bias = get_extra_side_embedding_net_variable(
-            self, graph_def, "two_side", "bias", suffix
+            self, graph_def, type_side_suffix, "bias", suffix
         )
         self.two_embd = make_data(self, self.final_type_embedding)
 
@@ -1125,14 +1128,15 @@ def _filter_lower(
                             two_side_type_embedding,
                             [-1, two_side_type_embedding.shape[-1]],
                         )
-                        two_side_type_embedding_suffix = "_two_side_ebd"
                         embedding_of_two_side_type_embedding = embedding_net(
                             two_side_type_embedding,
                             self.filter_neuron,
                             self.filter_precision,
                             activation_fn=activation_fn,
                             resnet_dt=self.filter_resnet_dt,
-                            name_suffix=two_side_type_embedding_suffix,
+                            name_suffix=get_extra_embedding_net_suffix(
+                                type_one_side=False
+                            ),
                             stddev=stddev,
                             bavg=bavg,
                             seed=self.seed,
@@ -1292,18 +1296,6 @@ def init_variables(
         """
         super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
 
-        if self.stripped_type_embedding:
-            self.two_side_embeeding_net_variables = {}
-            for i in range(1, self.layer_size + 1):
-                matrix_pattern = f"filter_type_all{suffix}/matrix_{i}_two_side_ebd"
-                self.two_side_embeeding_net_variables[
-                    matrix_pattern
-                ] = self._get_two_embed_variables(graph_def, matrix_pattern)
-                bias_pattern = f"filter_type_all{suffix}/bias_{i}_two_side_ebd"
-                self.two_side_embeeding_net_variables[
-                    bias_pattern
-                ] = self._get_two_embed_variables(graph_def, bias_pattern)
-
         self.attention_layer_variables = get_attention_layer_variables_from_graph_def(
             graph_def, suffix=suffix
         )
@@ -1322,18 +1314,15 @@ def init_variables(
                     f"attention_layer_{i}{suffix}/layer_normalization_{i}/gamma"
                 ]
 
-    def _get_two_embed_variables(self, graph_def, pattern: str):
-        node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern]
-        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(
-                node.tensor_content,
-                dtype=tf.as_dtype(node.dtype).as_numpy_dtype,
+        if self.stripped_type_embedding:
+            self.two_side_embeeding_net_variables = (
+                get_extra_embedding_net_variables_from_graph_def(
+                    graph_def,
+                    suffix,
+                    get_extra_embedding_net_suffix(type_one_side=False),
+                    self.layer_size,
+                )
             )
-        else:
-            tensor_value = get_tensor_by_type(node, dtype)
-        return np.reshape(tensor_value, tensor_shape)
 
     def build_type_exclude_mask(
         self,
diff --git a/deepmd/utils/compress.py b/deepmd/utils/compress.py
index c6e68dfe19..7a79dec520 100644
--- a/deepmd/utils/compress.py
+++ b/deepmd/utils/compress.py
@@ -43,15 +43,15 @@ def get_two_side_type_embedding(self, graph):
 
 
 def get_extra_side_embedding_net_variable(
-    self, graph_def, type_side, varialbe_name, suffix
+    self, graph_def, type_side_suffix, varialbe_name, suffix
 ):
     ret = {}
     for i in range(1, self.layer_size + 1):
         target = get_pattern_nodes_from_graph_def(
             graph_def,
-            f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd",
+            f"filter_type_all{suffix}/{varialbe_name}_{i}{type_side_suffix}",
         )
-        node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd"]
+        node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}{type_side_suffix}"]
         ret["layer_" + str(i)] = node
     return ret
 
diff --git a/deepmd/utils/graph.py b/deepmd/utils/graph.py
index 2a795a45a2..ad4ee0224a 100644
--- a/deepmd/utils/graph.py
+++ b/deepmd/utils/graph.py
@@ -237,6 +237,91 @@ def get_embedding_net_variables_from_graph_def(
     return embedding_net_variables
 
 
+def get_extra_embedding_net_suffix(type_one_side: bool):
+    """Get the extra embedding net suffix according to the value of type_one_side.
+
+    Parameters
+    ----------
+    type_one_side
+        The value of type_one_side
+
+    Returns
+    -------
+    str
+        The extra embedding net suffix
+    """
+    if type_one_side:
+        extra_suffix = "_one_side_ebd"
+    else:
+        extra_suffix = "_two_side_ebd"
+    return extra_suffix
+
+
+def get_variables_from_graph_def_as_numpy_array(graph_def: tf.GraphDef, pattern: str):
+    """Get variables from the given tf.GraphDef object, with numpy array returns.
+
+    Parameters
+    ----------
+    graph_def
+        The input tf.GraphDef object
+    pattern : str
+        The name of variable
+
+    Returns
+    -------
+    np.ndarray
+        The numpy array of the variable
+    """
+    node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern]
+    dtype = tf.as_dtype(node.dtype).as_numpy_dtype
+    tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
+    if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
+        tensor_value = np.frombuffer(
+            node.tensor_content,
+            dtype=tf.as_dtype(node.dtype).as_numpy_dtype,
+        )
+    else:
+        tensor_value = get_tensor_by_type(node, dtype)
+    return np.reshape(tensor_value, tensor_shape)
+
+
+def get_extra_embedding_net_variables_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str, extra_suffix: str, layer_size: int
+):
+    """Get extra embedding net variables from the given tf.GraphDef object.
+    The "extra embedding net" means the embedding net with only type embeddings input,
+    which occurs in "se_atten_v2" and "se_a_ebd_v2" descriptor.
+
+    Parameters
+    ----------
+    graph_def
+        The input tf.GraphDef object
+    suffix : str
+        The "common" suffix in the descriptor
+    extra_suffix : str
+        This value depends on the value of "type_one_side".
+        It should always be "_one_side_ebd" or "_two_side_ebd"
+    layer_size : int
+        The layer size of the embedding net
+
+    Returns
+    -------
+    Dict
+        The extra embedding net variables within the given tf.GraphDef object
+    """
+    extra_embedding_net_variables = {}
+    for i in range(1, layer_size + 1):
+        matrix_pattern = f"filter_type_all{suffix}/matrix_{i}{extra_suffix}"
+        extra_embedding_net_variables[
+            matrix_pattern
+        ] = get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern)
+        bias_pattern = f"filter_type_all{suffix}/bias_{i}{extra_suffix}"
+        extra_embedding_net_variables[
+            bias_pattern
+        ] = get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern)
+    return extra_embedding_net_variables
+
+
 def get_embedding_net_variables(model_file: str, suffix: str = "") -> Dict:
     """Get the embedding net variables with the given frozen model(model_file).
 
diff --git a/source/tests/test_init_frz_model_se_a_tebd.py b/source/tests/test_init_frz_model_se_a_tebd.py
new file mode 100644
index 0000000000..e54cae9781
--- /dev/null
+++ b/source/tests/test_init_frz_model_se_a_tebd.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
+    default_places = 4
+else:
+    default_places = 10
+
+
+def _file_delete(file):
+    if os.path.isdir(file):
+        os.rmdir(file)
+    elif os.path.isfile(file):
+        os.remove(file)
+
+
+def _init_models():
+    data_file = str(tests_path / os.path.join("init_frz_model", "data"))
+    frozen_model = str(tests_path / "init_frz_se_a_tebd.pb")
+    ckpt = str(tests_path / "init_frz_se_a_tebd.ckpt")
+    run_opt_ckpt = RunOptions(init_model=ckpt, log_level=20)
+    run_opt_frz = RunOptions(init_frz_model=frozen_model, log_level=20)
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("init_frz_model", "input.json")))
+    jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2"
+    jdata["training"]["training_data"]["systems"] = data_file
+    jdata["training"]["validation_data"]["systems"] = data_file
+    jdata["training"]["save_ckpt"] = ckpt
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+    ret = run_dp("dp train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+
+    jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
+    jdata = normalize(jdata)
+    model_ckpt = DPTrainer(jdata, run_opt=run_opt_ckpt)
+    model_frz = DPTrainer(jdata, run_opt=run_opt_frz)
+    rcut = model_ckpt.model.get_rcut()
+    type_map = model_ckpt.model.get_type_map()
+    data = DeepmdDataSystem(
+        systems=[data_file],
+        batch_size=1,
+        test_size=1,
+        rcut=rcut,
+        type_map=type_map,
+        trn_all_set=True,
+    )
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
+    data.add_dict(data_requirement)
+    stop_batch = jdata["training"]["numb_steps"]
+
+    return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
+
+
+(
+    INPUT,
+    CKPT,
+    FROZEN_MODEL,
+    CKPT_TRAINER,
+    FRZ_TRAINER,
+    VALID_DATA,
+    STOP_BATCH,
+) = _init_models()
+
+
+class TestInitFrzModelA(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.dp_ckpt = CKPT_TRAINER
+        cls.dp_frz = FRZ_TRAINER
+        cls.valid_data = VALID_DATA
+        cls.stop_batch = STOP_BATCH
+
+    @classmethod
+    def tearDownClass(cls):
+        _file_delete(INPUT)
+        _file_delete(FROZEN_MODEL)
+        _file_delete("out.json")
+        _file_delete(str(tests_path / "checkpoint"))
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete("input_v2_compat.json")
+        _file_delete("lcurve.out")
+
+    def test_single_frame(self):
+        valid_batch = self.valid_data.get_batch()
+        natoms = valid_batch["natoms_vec"]
+        tf.reset_default_graph()
+        self.dp_ckpt.build(self.valid_data, self.stop_batch)
+        self.dp_ckpt._init_session()
+        feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
+        tf.reset_default_graph()
+
+        self.dp_frz.build(self.valid_data, self.stop_batch)
+        self.dp_frz._init_session()
+        feed_dict_frz = self.dp_frz.get_feed_dict(valid_batch, is_training=False)
+        ckpt_rmse_frz = self.dp_frz.loss.eval(self.dp_frz.sess, feed_dict_frz, natoms)
+        tf.reset_default_graph()
+
+        # check values
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )

From c03416d0be874ebda9fc831d001817c3eec159a8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 26 Nov 2023 04:42:41 -0500
Subject: [PATCH 23/97] add cu11 prebuilt packages (#3002)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/build_wheel.yml     | 24 ++++++++++++++++++++++--
 .github/workflows/package_c.yml       | 18 ++++++++++++++++--
 backend/find_tensorflow.py            | 23 +++++++++++++++++------
 doc/install/easy-install-dev.md       |  4 +++-
 doc/install/easy-install.md           |  6 ++++++
 doc/install/install-from-c-library.md |  2 +-
 pyproject.toml                        | 14 +++++++++++---
 source/install/docker/Dockerfile      |  8 +++++---
 source/install/docker_package_c.sh    |  4 ++--
 9 files changed, 83 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 2ff3ade215..c58a5925bf 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -33,6 +33,13 @@ jobs:
             python: 311
             platform_id: manylinux_x86_64
             dp_variant: cuda
+            cuda_version: 12.2
+          - os: ubuntu-latest
+            python: 311
+            platform_id: manylinux_x86_64
+            dp_variant: cuda
+            cuda_version: 11.8
+            dp_pkg_name: deepmd-kit-cu11
           # macos-x86-64
           - os: macos-latest
             python: 311
@@ -68,6 +75,8 @@ jobs:
           CIBW_ARCHS: all
           CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
           DP_VARIANT: ${{ matrix.dp_variant }}
+          CUDA_VERSION: ${{ matrix.cuda_version }}
+          DP_PKG_NAME: ${{ matrix.dp_pkg_name }}
       - uses: actions/upload-artifact@v3
         with:
           path: ./wheelhouse/*.whl
@@ -109,6 +118,14 @@ jobs:
     # use the already built wheels to build docker
     needs: [build_wheels]
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - variant: ""
+            cuda_version: "12"
+          - variant: "_cu11"
+            cuda_version: "11"
     steps:
       - uses: actions/checkout@v4
       - uses: actions/download-artifact@v3
@@ -133,8 +150,11 @@ jobs:
         with:
           context: source/install/docker
           push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }}
-          tags: ${{ steps.meta.outputs.tags }}
+          tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }}
           labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            VARIANT=${{ matrix.variant }}
+            CUDA_VERSION=${{ matrix.cuda_version }}
 
   build_pypi_index:
     needs: [build_wheels, build_sdist]
@@ -173,7 +193,7 @@ jobs:
 
   pass:
     name: Pass testing build wheels
-    needs: [build_wheels, build_sdist]
+    needs: [build_wheels, build_sdist, build_docker, build_pypi_index]
     runs-on: ubuntu-latest
     if: always()
     steps:
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index ada205be00..2b5f74b97d 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -8,23 +8,37 @@ jobs:
   build_c:
     name: Build C library
     runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        include:
+          - tensorflow_build_version: "2.15"
+            tensorflow_version: ""
+            filename: libdeepmd_c.tar.gz
+          - tensorflow_build_version: "2.14"
+            tensorflow_version: ">=2.5.0rc0,<2.15"
+            filename: libdeepmd_c_cu11.tar.gz
     steps:
       - uses: actions/checkout@v4
       - name: Package C library
         run: ./source/install/docker_package_c.sh
+        env:
+          TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }}
+          TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }}
+      - run: cp libdeepmd_c.tar.gz ${{ matrix.filename }}
+        if: matrix.filename != 'libdeepmd_c.tar.gz'
       # for download and debug
       - name: Upload artifact
         uses: actions/upload-artifact@v3
         with:
           name: libdeepmd_c
-          path: ./libdeepmd_c.tar.gz
+          path: ${{ matrix.filename }}
       - name: Test C library
         run: ./source/install/docker_test_package_c.sh
       - name: Release
         uses: softprops/action-gh-release@v1
         if: startsWith(github.ref, 'refs/tags/')
         with:
-          files: libdeepmd_c.tar.gz
+          files: ${{ matrix.filename }}
   test_c:
     name: Test building from C library
     needs: [build_c]
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index fbbe0e56c0..08a73f7252 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -88,12 +88,23 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
         if os.environ.get("CIBUILDWHEEL", "0") == "1":
-            # CUDA 12.2
-            requires.extend(
-                [
-                    "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'",
-                ]
-            )
+            cuda_version = os.environ.get("CUDA_VERSION", "12.2")
+            if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
+                # CUDA 12.2
+                requires.extend(
+                    [
+                        "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'",
+                    ]
+                )
+            elif cuda_version in SpecifierSet(">=11,<12"):
+                # CUDA 11.8
+                requires.extend(
+                    [
+                        "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'",
+                    ]
+                )
+            else:
+                raise RuntimeError("Unsupported CUDA version")
         requires.extend(get_tf_requirement()["cpu"])
         # setuptools will re-find tensorflow after installing setup_requires
         tf_install_dir = None
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index f3d4fa1a32..6fd9171730 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -6,12 +6,14 @@ The following is the way to install the pre-compiled packages without [building
 
 ## Install with docker
 
-The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA support:
+The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA 12.2 support:
 
 ```bash
 docker pull ghcr.io/deepmodeling/deepmd-kit:devel
 ```
 
+For CUDA 11.8 support, use the `devel_cu11` tag.
+
 ## Install with pip
 
 Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index 741ef632a8..3bc1f4b944 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -92,6 +92,12 @@ pip install deepmd-kit[gpu,cu12]
 
 `cu12` is required only when CUDA Toolkit and cuDNN were not installed.
 
+To install the package built against CUDA 11.8, use
+
+```bash
+pip install deepmd-kit-cu11[gpu,cu11]
+```
+
 Or install the CPU version without CUDA supported:
 ```bash
 pip install deepmd-kit[cpu]
diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md
index 04b71234db..eb89538277 100644
--- a/doc/install/install-from-c-library.md
+++ b/doc/install/install-from-c-library.md
@@ -2,7 +2,7 @@
 
 DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own.
 
-The library is built in Linux (GLIBC 2.17) with CUDA 12.2. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
+The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
 
 ## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch
 
diff --git a/pyproject.toml b/pyproject.toml
index e9ee563960..04bcc69f75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -133,7 +133,9 @@ test-command = [
 test-extras = ["cpu", "test", "lmp", "ipi"]
 build = ["cp310-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
-manylinux-x86_64-image = "manylinux_2_28"
+# TODO: uncomment when CUDA 11 is deprecated
+# manylinux-x86_64-image = "manylinux_2_28"
+manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
 manylinux-aarch64-image = "manylinux_2_28"
 
 [tool.cibuildwheel.macos]
@@ -148,10 +150,16 @@ repair-wheel-command = """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then rm -rf
 
 [tool.cibuildwheel.linux]
 repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}"
-environment-pass = ["CIBW_BUILD", "DP_VARIANT"]
+environment-pass = [
+    "CIBW_BUILD",
+    "DP_VARIANT",
+    "CUDA_VERSION",
+    "DP_PKG_NAME",
+]
 environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
 before-all = [
-    """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-12-2 cuda-cudart-devel-12-2; fi }""",
+    """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
+    """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
     "yum install -y mpich-devel",
 ]
 
diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile
index 9ac905dcd0..26b7be9f19 100644
--- a/source/install/docker/Dockerfile
+++ b/source/install/docker/Dockerfile
@@ -1,16 +1,18 @@
-FROM python:3.10 AS compile-image
+FROM python:3.11 AS compile-image
+ARG VARIANT=""
+ARG CUDA_VERSION="12"
 RUN python -m venv /opt/deepmd-kit
 # Make sure we use the virtualenv
 ENV PATH="/opt/deepmd-kit/bin:$PATH"
 # Install package
 COPY dist /dist
-RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu12,lmp,ipi]" \
+RUN pip install "$(ls /dist/deepmd_kit${VARIANT}-*manylinux*_x86_64.whl)[gpu,cu${CUDA_VERSION},lmp,ipi]" \
     && dp -h \
     && lmp -h \
     && dp_ipi \
     && python -m deepmd -h
 
-FROM python:3.10 AS build-image
+FROM python:3.11 AS build-image
 COPY --from=compile-image /opt/deepmd-kit /opt/deepmd-kit
 ENV PATH="/opt/deepmd-kit/bin:$PATH"
 CMD ["/bin/bash"]
diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh
index 75f2d1138b..544c175a0a 100755
--- a/source/install/docker_package_c.sh
+++ b/source/install/docker_package_c.sh
@@ -3,8 +3,8 @@ set -e
 SCRIPT_PATH=$(dirname $(realpath -s $0))
 
 docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \
-	tensorflow/build:2.15-python3.11 \
-	/bin/sh -c "pip install tensorflow cmake \
+	tensorflow/build:${TENSORFLOW_BUILD_VERSION:-2.15}-python3.11 \
+	/bin/sh -c "pip install \"tensorflow${TENSORFLOW_VERSION}\" cmake \
             && cd /root/deepmd-kit/source/install \
             && CC=/dt9/usr/bin/gcc \
                CXX=/dt9/usr/bin/g++ \

From efb0a34971899884b1b4d7ccc2fe87b6337c8cf6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 27 Nov 2023 22:39:53 -0500
Subject: [PATCH 24/97] print system prob using scientific natation (#3008)

When the probability of a system is smaller than 1e-3, it is shown as
`0.000`, which is useless. This commit changes it to the scientific
notation, displaying it like `1.547e-06`.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 deepmd/utils/data_system.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 09dcac2d8d..bf05b5faa7 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -560,11 +560,11 @@ def print_summary(self, name):
         log.info("found %d system(s):" % self.nsystems)
         log.info(
             ("%s  " % self._format_name_length("system", sys_width))
-            + ("%6s  %6s  %6s  %5s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
+            + ("%6s  %6s  %6s  %9s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
         )
         for ii in range(self.nsystems):
             log.info(
-                "%s  %6d  %6d  %6d  %5.3f  %3s"
+                "%s  %6d  %6d  %6d  %9.3e  %3s"
                 % (
                     self._format_name_length(self.system_dirs[ii], sys_width),
                     self.natoms[ii],

From b4bb3a902f521e9230cdf5e1a8cfa63db5717610 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 28 Nov 2023 15:08:30 +0800
Subject: [PATCH 25/97] [pre-commit.ci] pre-commit autoupdate (#3011)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/scop/pre-commit-shfmt: v3.7.0-3 →
v3.7.0-4](https://github.com/scop/pre-commit-shfmt/compare/v3.7.0-3...v3.7.0-4)
<!--pre-commit.ci end-->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fd218813f8..089118f9af 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -64,9 +64,9 @@ repos:
     -   id: csslint
 # Shell
 - repo: https://github.com/scop/pre-commit-shfmt
-  rev: v3.7.0-3
+  rev: v3.7.0-4
   hooks:
-    - id: shfmt-src
+    - id: shfmt
 # CMake
 - repo: https://github.com/cheshirekow/cmake-format-precommit
   rev: v0.6.13

From 12baf124e0ee7a989eb44b694b3357f6487ca321 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 28 Nov 2023 19:19:50 -0500
Subject: [PATCH 26/97] Fix macro issue with multiple arguments (#3016)

Modified the `DPMD_CVT_STR` macro to handle multiple arguments by using
`__VA_ARGS__`. This resolves #2952, the error caused by commas in the
branch name during compilation.

This commit (including the above message) is entirely generated by
ChatGPT 3.5.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lmp/deepmd_version.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/lmp/deepmd_version.h.in b/source/lmp/deepmd_version.h.in
index f4c72c8413..4b99bc7c33 100644
--- a/source/lmp/deepmd_version.h.in
+++ b/source/lmp/deepmd_version.h.in
@@ -5,7 +5,7 @@
 #define DEEPMD_ROOT @CMAKE_INSTALL_PREFIX@
 #define TensorFlow_INCLUDE_DIRS @TensorFlow_INCLUDE_DIRS@
 #define TensorFlow_LIBRARY @TensorFlow_LIBRARY@
-#define DPMD_CVT_STR(x) #x
+#define DPMD_CVT_STR(...) #__VA_ARGS__
 #define DPMD_CVT_ASSTR(X) DPMD_CVT_STR(X)
 #define STR_GIT_SUMM DPMD_CVT_ASSTR(GIT_SUMM)
 #define STR_GIT_HASH DPMD_CVT_ASSTR(GIT_HASH)

From ea452956d5f2f5754f82ee4f0da846f78ddbeeff Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Dec 2023 11:14:17 +0800
Subject: [PATCH 27/97] [pre-commit.ci] pre-commit autoupdate (#3032)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/pre-commit/mirrors-clang-format: v17.0.5 →
v17.0.6](https://github.com/pre-commit/mirrors-clang-format/compare/v17.0.5...v17.0.6)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 089118f9af..64a061dd54 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,7 +53,7 @@ repos:
     -   id: blacken-docs
 # C++
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.5
+    rev: v17.0.6
     hooks:
     -   id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc

From 44aba8352d897f87bd5e637b082893c0a296aaa0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 5 Dec 2023 20:10:25 -0500
Subject: [PATCH 28/97] bump scikit-build-core to 0.7 (#3038)

No breaking changes there

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 04bcc69f75..fdd4904eb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 requires = [
     # dynamic metadata API is still unstable
     # TODO: unpin the upper bound when it is stable
-    "scikit-build-core>=0.5,<0.7,!=0.6.0",
+    "scikit-build-core>=0.5,<0.8,!=0.6.0",
     "packaging",
 ]
 build-backend = "backend.dp_backend"

From 3c549492dbba017aaa994b090f926deeb29bb687 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 5 Dec 2023 20:11:19 -0500
Subject: [PATCH 29/97] breaking(lmp): do not apply scale factor to model
 deviation (#3036)

Resolves #3027.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/third-party/lammps-command.md |  1 +
 source/lmp/pair_deepmd.cpp        | 15 +++++++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 023345d638..150d755795 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -82,6 +82,7 @@ Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Po
 This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit), which can have either double or single float precision interface.
 
 The model deviation evalulates the consistency of the force predictions from multiple models. By default, only the maximal, minimal and average model deviations are output. If the key `atomic` is set, then the model deviation of force prediction of each atom will be output.
+The unit follows [LAMMPS units](#units) and the [scale factor](https://docs.lammps.org/pair_hybrid.html) is not applied.
 
 By default, the model deviation is output in absolute value. If the keyword `relative` is set, then the relative model deviation of the force will be output, including values output by the keyword `atomic`. The relative model deviation of the force on atom $i$ is defined by
 
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 600c4cae29..645bb9fec9 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -767,12 +767,12 @@ void PairDeepMD::compute(int eflag, int vflag) {
           all_v_avg = sqrt(all_v_avg / 9);
         }
         if (rank == 0) {
-          all_v_max *= scale[1][1] * ener_unit_cvt_factor;
-          all_v_min *= scale[1][1] * ener_unit_cvt_factor;
-          all_v_avg *= scale[1][1] * ener_unit_cvt_factor;
-          all_f_max *= scale[1][1] * force_unit_cvt_factor;
-          all_f_min *= scale[1][1] * force_unit_cvt_factor;
-          all_f_avg *= scale[1][1] * force_unit_cvt_factor;
+          all_v_max *= ener_unit_cvt_factor;
+          all_v_min *= ener_unit_cvt_factor;
+          all_v_avg *= ener_unit_cvt_factor;
+          all_f_max *= force_unit_cvt_factor;
+          all_f_min *= force_unit_cvt_factor;
+          all_f_avg *= force_unit_cvt_factor;
           fp << setw(12) << update->ntimestep << " " << setw(18) << all_v_max
              << " " << setw(18) << all_v_min << " " << setw(18) << all_v_avg
              << " " << setw(18) << all_f_max << " " << setw(18) << all_f_min
@@ -798,8 +798,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
                       displacements, MPI_DOUBLE, 0, world);
           if (rank == 0) {
             for (int dd = 0; dd < all_nlocal; ++dd) {
-              std_f_all[tagrecv[dd] - 1] =
-                  stdfrecv[dd] * scale[1][1] * force_unit_cvt_factor;
+              std_f_all[tagrecv[dd] - 1] = stdfrecv[dd] * force_unit_cvt_factor;
             }
             for (int dd = 0; dd < all_nlocal; ++dd) {
               fp << " " << setw(18) << std_f_all[dd];

From fe488a4560e86176414e1b9b04904902ac6ed488 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 6 Dec 2023 20:13:01 -0500
Subject: [PATCH 30/97] fix se_a_ebd_v2 when nloc != nall (#3037)

See also #2390 and #2505...

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_a.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 6b0a7f7ab1..9beec2db1d 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -782,16 +782,16 @@ def _pass_filter(
             type_i = -1
             if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor:
                 inputs_i = descrpt2r4(inputs_i, natoms)
+            self.atype_nloc = tf.reshape(
+                tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
+            )  # when nloc != nall, pass nloc to mask
             if len(self.exclude_types):
-                atype_nloc = tf.reshape(
-                    tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
-                )  # when nloc != nall, pass nloc to mask
                 mask = self.build_type_exclude_mask(
                     self.exclude_types,
                     self.ntypes,
                     self.sel_a,
                     self.ndescrpt,
-                    atype_nloc,
+                    self.atype_nloc,
                     tf.shape(inputs_i)[0],
                 )
                 inputs_i *= mask
@@ -956,7 +956,7 @@ def _filter_lower(
                     extra_embedding_index = self.nei_type_vec
                 else:
                     padding_ntypes = type_embedding.shape[0]
-                    atype_expand = tf.reshape(self.atype, [-1, 1])
+                    atype_expand = tf.reshape(self.atype_nloc, [-1, 1])
                     idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei])
                     idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei])
                     idx = idx_i + idx_j
@@ -1002,7 +1002,7 @@ def _filter_lower(
                             [-1, two_side_type_embedding.shape[-1]],
                         )
 
-                        atype_expand = tf.reshape(self.atype, [-1, 1])
+                        atype_expand = tf.reshape(self.atype_nloc, [-1, 1])
                         idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei])
                         idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei])
                         idx = idx_i + idx_j

From a21079913d125ab6cce6dec9b48454c6e948454b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 6 Dec 2023 20:16:46 -0500
Subject: [PATCH 31/97] skip pushing when an action is triggered by dependeabot
 (#3035)

See https://github.com/deepmodeling/deepmd-kit/pull/3029
See also
https://docs.github.com/en/code-security/dependabot/working-with-dependabot/automating-dependabot-with-github-actions#responding-to-events

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index c58a5925bf..ad4a17cfea 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -149,7 +149,7 @@ jobs:
         uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56
         with:
           context: source/install/docker
-          push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }}
+          push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }}
           tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }}
           labels: ${{ steps.meta.outputs.labels }}
           build-args: |

From 0547940baa74db029f05f4f95e0d2341d51ab97e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 7 Dec 2023 01:56:32 +0000
Subject: [PATCH 32/97] Bump docker/metadata-action from 5.0.0 to 5.3.0 (#3029)

---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index ad4a17cfea..67241e6e23 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -141,7 +141,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934
+        uses: docker/metadata-action@31cebacef4805868f9ce9a0cb03ee36c32df2ac4
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 

From ce25ff453a178df2ab451bb49c39df1789d7ca4a Mon Sep 17 00:00:00 2001
From: Futaki Haduki <812556867@qq.com>
Date: Mon, 11 Dec 2023 13:15:21 +0800
Subject: [PATCH 33/97] fix: invalid read and write when natom grows (#3031)

This PR fixes #3015, which reported an invalid memory read and write
when running `fix deposition` in Lammps to add atoms into the system.
The issue could not be reproduced when `atomic` was removed from the
`pair_style deepmd` line.

As suggested by @njzjz , the possible cause of the bug was the
allocation of `stdfsend`, `stdfrecv`, etc., which only happened when
`out_each == 1`, set by `atomic` in the `pair_style deepmd` command.
Moreover, their size was only determined in the initialization step,
based on the initial `natoms`. The valgrind log also supported this
hypothesis, as it showed `MPI_Gather` and `MPI_Gatherv` errors in the
traceback.

Therefore, we decided to destroy and recreate the memory when
`atom->natoms` became larger than `stdfsend`. This should prevent the
invalid memory access and resolve the issue.
---
 source/api_cc/src/common.cc |  4 ++--
 source/lmp/pair_deepmd.cpp  | 26 ++++++++++++++++++++------
 source/lmp/pair_deepmd.h    |  1 +
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 5994e9446f..33c433a90a 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -1277,10 +1277,10 @@ void deepmd::print_summary(const std::string& pre) {
   deepmd::get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   std::cout << pre << "installed to:       " + global_install_prefix << "\n";
   std::cout << pre << "source:             " + global_git_summ << "\n";
-  std::cout << pre << "source branch:       " + global_git_branch << "\n";
+  std::cout << pre << "source branch:      " + global_git_branch << "\n";
   std::cout << pre << "source commit:      " + global_git_hash << "\n";
   std::cout << pre << "source commit at:   " + global_git_date << "\n";
-  std::cout << pre << "surpport model ver.:" + global_model_version << "\n";
+  std::cout << pre << "support model ver.: " + global_model_version << "\n";
 #if defined(GOOGLE_CUDA)
   std::cout << pre << "build variant:      cuda"
             << "\n";
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 645bb9fec9..533e3538fe 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -394,6 +394,7 @@ PairDeepMD::PairDeepMD(LAMMPS *lmp)
   out_each = 0;
   out_rel = 0;
   out_rel_v = 0;
+  stdf_comm_buff_size = 0;
   eps = 0.;
   eps_v = 0.;
   scale = NULL;
@@ -720,13 +721,11 @@ void PairDeepMD::compute(int eflag, int vflag) {
         }
         double min = numeric_limits<double>::max(), max = 0, avg = 0;
         ana_st(max, min, avg, std_f, nlocal);
-        int all_nlocal = 0;
-        MPI_Reduce(&nlocal, &all_nlocal, 1, MPI_INT, MPI_SUM, 0, world);
         double all_f_min = 0, all_f_max = 0, all_f_avg = 0;
         MPI_Reduce(&min, &all_f_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
         MPI_Reduce(&max, &all_f_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
         MPI_Reduce(&avg, &all_f_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
-        all_f_avg /= double(all_nlocal);
+        all_f_avg /= double(atom->natoms);
         // std v
         std::vector<double> send_v(9 * numb_models);
         std::vector<double> recv_v(9 * numb_models);
@@ -779,10 +778,22 @@ void PairDeepMD::compute(int eflag, int vflag) {
              << " " << setw(18) << all_f_avg;
         }
         if (out_each == 1) {
-          vector<double> std_f_all(all_nlocal);
+          vector<double> std_f_all(atom->natoms);
           // Gather std_f and tags
           tagint *tag = atom->tag;
           int nprocs = comm->nprocs;
+          // Grow arrays if necessary
+          if (atom->natoms > stdf_comm_buff_size) {
+            stdf_comm_buff_size = atom->natoms;
+            memory->destroy(stdfsend);
+            memory->destroy(stdfrecv);
+            memory->destroy(tagsend);
+            memory->destroy(tagrecv);
+            memory->create(stdfsend, stdf_comm_buff_size, "deepmd:stdfsendall");
+            memory->create(stdfrecv, stdf_comm_buff_size, "deepmd:stdfrecvall");
+            memory->create(tagsend, stdf_comm_buff_size, "deepmd:tagsendall");
+            memory->create(tagrecv, stdf_comm_buff_size, "deepmd:tagrecvall");
+          }
           for (int ii = 0; ii < nlocal; ii++) {
             tagsend[ii] = tag[ii];
             stdfsend[ii] = std_f[ii];
@@ -797,10 +808,10 @@ void PairDeepMD::compute(int eflag, int vflag) {
           MPI_Gatherv(stdfsend, nlocal, MPI_DOUBLE, stdfrecv, counts,
                       displacements, MPI_DOUBLE, 0, world);
           if (rank == 0) {
-            for (int dd = 0; dd < all_nlocal; ++dd) {
+            for (int dd = 0; dd < atom->natoms; ++dd) {
               std_f_all[tagrecv[dd] - 1] = stdfrecv[dd] * force_unit_cvt_factor;
             }
-            for (int dd = 0; dd < all_nlocal; ++dd) {
+            for (int dd = 0; dd < atom->natoms; ++dd) {
               fp << " " << setw(18) << std_f_all[dd];
             }
           }
@@ -1278,6 +1289,9 @@ void PairDeepMD::init_style() {
   if (out_each == 1) {
     int ntotal = atom->natoms;
     int nprocs = comm->nprocs;
+    if (ntotal > stdf_comm_buff_size) {
+      stdf_comm_buff_size = ntotal;
+    }
     memory->create(counts, nprocs, "deepmd:counts");
     memory->create(displacements, nprocs, "deepmd:displacements");
     memory->create(stdfsend, ntotal, "deepmd:stdfsendall");
diff --git a/source/lmp/pair_deepmd.h b/source/lmp/pair_deepmd.h
index 0f704ab45c..cd72dc7b2a 100644
--- a/source/lmp/pair_deepmd.h
+++ b/source/lmp/pair_deepmd.h
@@ -98,6 +98,7 @@ class PairDeepMD : public Pair {
   int out_each;
   int out_rel;
   int out_rel_v;
+  int stdf_comm_buff_size;
   bool single_model;
   bool multi_models_mod_devi;
   bool multi_models_no_mod_devi;

From a6f133387ce0b7dbf823fad54bfe5725b90228ce Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 11 Dec 2023 00:21:01 -0500
Subject: [PATCH 34/97] build neighbor list with external Python program
 (#3046)

Fix #2877

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/calculator.py            |  5 +-
 deepmd/infer/__init__.py        |  7 +++
 deepmd/infer/deep_dipole.py     |  4 ++
 deepmd/infer/deep_eval.py       | 95 +++++++++++++++++++++++++++++++++
 deepmd/infer/deep_polar.py      | 13 ++++-
 deepmd/infer/deep_pot.py        | 58 +++++++++++++++++---
 deepmd/infer/deep_tensor.py     | 85 +++++++++++++++++++++++++----
 source/tests/test_deepdipole.py | 32 +++++++++--
 source/tests/test_deeppolar.py  | 34 +++++++++---
 source/tests/test_deeppot_a.py  | 23 ++++++++
 10 files changed, 326 insertions(+), 30 deletions(-)

diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 8636ff30d2..b9c0a81006 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -45,6 +45,8 @@ class DP(Calculator):
     type_dict : Dict[str, int], optional
         mapping of element types and their numbers, best left None and the calculator
         will infer this information from model, by default None
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Examples
     --------
@@ -83,10 +85,11 @@ def __init__(
         model: Union[str, "Path"],
         label: str = "DP",
         type_dict: Optional[Dict[str, int]] = None,
+        neighbor_list=None,
         **kwargs,
     ) -> None:
         Calculator.__init__(self, label=label, **kwargs)
-        self.dp = DeepPotential(str(Path(model).resolve()))
+        self.dp = DeepPotential(str(Path(model).resolve()), neighbor_list=neighbor_list)
         if type_dict:
             self.type_dict = type_dict
         else:
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index 14d75d0c44..c1071af35c 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -58,6 +58,7 @@ def DeepPotential(
     load_prefix: str = "load",
     default_tf_graph: bool = False,
     input_map: Optional[dict] = None,
+    neighbor_list=None,
 ) -> Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepDOS, DeepWFC]:
     """Factory function that will inialize appropriate potential read from `model_file`.
 
@@ -71,6 +72,8 @@ def DeepPotential(
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Returns
     -------
@@ -97,6 +100,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "dos":
         dp = DeepDOS(
@@ -111,6 +115,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "polar":
         dp = DeepPolar(
@@ -118,6 +123,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "global_polar":
         dp = DeepGlobalPolar(
@@ -125,6 +131,7 @@ def DeepPotential(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
     elif model_type == "wfc":
         dp = DeepWFC(
diff --git a/deepmd/infer/deep_dipole.py b/deepmd/infer/deep_dipole.py
index 6020118135..aba098a9f3 100644
--- a/deepmd/infer/deep_dipole.py
+++ b/deepmd/infer/deep_dipole.py
@@ -27,6 +27,8 @@ class DeepDipole(DeepTensor):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Warnings
     --------
@@ -41,6 +43,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # use this in favor of dict update to move attribute from class to
         # instance namespace
@@ -58,6 +61,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
     def get_dim_fparam(self) -> int:
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 3f5dede1ad..0ca9f21a77 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -45,6 +45,9 @@ class DeepEval:
         as the initial batch size.
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
     """
 
     load_prefix: str  # set by subclass
@@ -56,6 +59,7 @@ def __init__(
         default_tf_graph: bool = False,
         auto_batch_size: Union[bool, int, AutoBatchSize] = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ):
         self.graph = self._load_graph(
             model_file,
@@ -86,6 +90,8 @@ def __init__(
         else:
             raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
 
+        self.neighbor_list = neighbor_list
+
     @property
     @lru_cache(maxsize=None)
     def model_type(self) -> str:
@@ -360,3 +366,92 @@ def eval_typeebd(self) -> np.ndarray:
         t_typeebd = self._get_tensor("t_typeebd:0")
         [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={})
         return typeebd
+
+    def build_neighbor_list(
+        self,
+        coords: np.ndarray,
+        cell: Optional[np.ndarray],
+        atype: np.ndarray,
+        imap: np.ndarray,
+        neighbor_list,
+    ):
+        """Make the mesh with neighbor list for a single frame.
+
+        Parameters
+        ----------
+        coords : np.ndarray
+            The coordinates of atoms. Should be of shape [natoms, 3]
+        cell : Optional[np.ndarray]
+            The cell of the system. Should be of shape [3, 3]
+        atype : np.ndarray
+            The type of atoms. Should be of shape [natoms]
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [natoms]
+        neighbor_list : ase.neighborlist.NewPrimitiveNeighborList
+            ASE neighbor list. The following method or attribute will be
+            used/set: bothways, self_interaction, update, build, first_neigh,
+            pair_second, offset_vec.
+
+        Returns
+        -------
+        natoms_vec : np.ndarray
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: nloc
+            natoms[1]: nall
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc
+        coords : np.ndarray
+            The coordinates of atoms, including ghost atoms. Should be of
+            shape [nframes, nall, 3]
+        atype : np.ndarray
+            The type of atoms, including ghost atoms. Should be of shape [nall]
+        mesh : np.ndarray
+            The mesh in nei_mode=4.
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [nall]
+        ghost_map : np.ndarray
+            The index map of ghost atoms. Should be of shape [nghost]
+        """
+        pbc = np.repeat(cell is not None, 3)
+        cell = cell.reshape(3, 3)
+        positions = coords.reshape(-1, 3)
+        neighbor_list.bothways = True
+        neighbor_list.self_interaction = False
+        if neighbor_list.update(pbc, cell, positions):
+            neighbor_list.build(pbc, cell, positions)
+        first_neigh = neighbor_list.first_neigh.copy()
+        pair_second = neighbor_list.pair_second.copy()
+        offset_vec = neighbor_list.offset_vec.copy()
+        # get out-of-box neighbors
+        out_mask = np.any(offset_vec != 0, axis=1)
+        out_idx = pair_second[out_mask]
+        out_offset = offset_vec[out_mask]
+        out_coords = positions[out_idx] + out_offset.dot(cell)
+        atype = np.array(atype, dtype=int)
+        out_atype = atype[out_idx]
+
+        nloc = positions.shape[0]
+        nghost = out_idx.size
+        all_coords = np.concatenate((positions, out_coords), axis=0)
+        all_atype = np.concatenate((atype, out_atype), axis=0)
+        # convert neighbor indexes
+        ghost_map = pair_second[out_mask]
+        pair_second[out_mask] = np.arange(nloc, nloc + nghost)
+        # get the mesh
+        mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int)
+        mesh[0] = nloc
+        # ilist
+        mesh[16 : 16 + nloc] = np.arange(nloc)
+        # numnei
+        mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1]
+        # jlist
+        mesh[16 + nloc * 2 :] = pair_second
+
+        # natoms_vec
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        natoms_vec[0] = nloc
+        natoms_vec[1] = nloc + nghost
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atype == ii)
+        # imap append ghost atoms
+        imap = np.concatenate((imap, np.arange(nloc, nloc + nghost)))
+        return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map
diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py
index 118f8c98a7..c1f981ef86 100644
--- a/deepmd/infer/deep_polar.py
+++ b/deepmd/infer/deep_polar.py
@@ -30,6 +30,8 @@ class DeepPolar(DeepTensor):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
 
     Warnings
     --------
@@ -44,6 +46,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # use this in favor of dict update to move attribute from class to
         # instance namespace
@@ -61,6 +64,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
     def get_dim_fparam(self) -> int:
@@ -83,10 +87,16 @@ class DeepGlobalPolar(DeepTensor):
         The prefix in the load computational graph
     default_tf_graph : bool
         If uses the default tf graph, otherwise build a new tf graph for evaluation
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
     """
 
     def __init__(
-        self, model_file: str, load_prefix: str = "load", default_tf_graph: bool = False
+        self,
+        model_file: str,
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        neighbor_list=None,
     ) -> None:
         self.tensors.update(
             {
@@ -101,6 +111,7 @@ def __init__(
             model_file,
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
+            neighbor_list=None,
         )
 
     def eval(
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index fc9a6a76ed..81cfdde7a8 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -51,6 +51,9 @@ class DeepPot(DeepEval):
         as the initial batch size.
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
 
     Examples
     --------
@@ -78,6 +81,7 @@ def __init__(
         default_tf_graph: bool = False,
         auto_batch_size: Union[bool, int, AutoBatchSize] = True,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         # add these tensors on top of what is defined by DeepTensor Class
         # use this in favor of dict update to move attribute from class to
@@ -112,6 +116,7 @@ def __init__(
             default_tf_graph=default_tf_graph,
             auto_batch_size=auto_batch_size,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
 
         # load optional tensors
@@ -479,8 +484,30 @@ def _prepare_feed_dict(
             aparam = np.reshape(aparam, [nframes, natoms * fdim])
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -501,12 +528,12 @@ def _prepare_feed_dict(
             raise RuntimeError
         if self.has_efield:
             feed_dict_test[self.t_efield] = np.reshape(efield, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
         if self.has_fparam:
             feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
         if self.has_aparam:
             feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1])
-        return feed_dict_test, imap, natoms_vec
+        return feed_dict_test, imap, natoms_vec, ghost_map
 
     def _eval_inner(
         self,
@@ -522,10 +549,13 @@ def _eval_inner(
         natoms, nframes = self._get_natoms_and_nframes(
             coords, atom_types, mixed_type=mixed_type
         )
-        feed_dict_test, imap, natoms_vec = self._prepare_feed_dict(
+        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
             coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
         )
 
+        nloc = natoms_vec[0]
+        nall = natoms_vec[1]
+
         t_out = [self.t_energy, self.t_force, self.t_virial]
         if atomic:
             t_out += [self.t_ae, self.t_av]
@@ -548,6 +578,13 @@ def _eval_inner(
             )
         else:
             natoms_real = natoms
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            force = np.reshape(force, [nframes, -1, 3])
+            np.add.at(force[0], ghost_map, force[0, nloc:])
+            if atomic:
+                av = np.reshape(av, [nframes, -1, 9])
+                np.add.at(av[0], ghost_map, av[0, nloc:])
 
         # reverse map of the outputs
         force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap)
@@ -556,11 +593,15 @@ def _eval_inner(
             av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap)
 
         energy = np.reshape(energy, [nframes, 1])
-        force = np.reshape(force, [nframes, natoms, 3])
+        force = np.reshape(force, [nframes, nall, 3])
+        if nloc < nall:
+            force = force[:, :nloc, :]
         virial = np.reshape(virial, [nframes, 9])
         if atomic:
             ae = np.reshape(ae, [nframes, natoms_real, 1])
-            av = np.reshape(av, [nframes, natoms, 9])
+            av = np.reshape(av, [nframes, nall, 9])
+            if nloc < nall:
+                av = av[:, :nloc, :]
             return energy, force, virial, ae, av
         else:
             return energy, force, virial
@@ -640,10 +681,11 @@ def _eval_descriptor_inner(
         natoms, nframes = self._get_natoms_and_nframes(
             coords, atom_types, mixed_type=mixed_type
         )
-        feed_dict_test, imap, natoms_vec = self._prepare_feed_dict(
+        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
             coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
         )
         (descriptor,) = run_sess(
             self.sess, [self.t_descriptor], feed_dict=feed_dict_test
         )
+        imap = imap[:natoms]
         return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py
index 268523e959..a803eb0c6b 100644
--- a/deepmd/infer/deep_tensor.py
+++ b/deepmd/infer/deep_tensor.py
@@ -39,6 +39,8 @@ class DeepTensor(DeepEval):
         If uses the default tf graph, otherwise build a new tf graph for evaluation
     input_map : dict, optional
         The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
     """
 
     tensors: ClassVar[Dict[str, str]] = {
@@ -63,6 +65,7 @@ def __init__(
         load_prefix: str = "load",
         default_tf_graph: bool = False,
         input_map: Optional[dict] = None,
+        neighbor_list=None,
     ) -> None:
         """Constructor."""
         DeepEval.__init__(
@@ -71,6 +74,7 @@ def __init__(
             load_prefix=load_prefix,
             default_tf_graph=default_tf_graph,
             input_map=input_map,
+            neighbor_list=neighbor_list,
         )
         # check model type
         model_type = self.tensors["t_tensor"][2:-2]
@@ -209,8 +213,29 @@ def eval(
         )
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                _,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -223,7 +248,7 @@ def eval(
             )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
         feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
 
         if atomic:
             assert (
@@ -333,8 +358,30 @@ def eval_full(
         )
 
         # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
 
         # evaluate
         feed_dict_test = {}
@@ -347,7 +394,7 @@ def eval_full(
             )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
         feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
+        feed_dict_test[self.t_mesh] = mesh
 
         t_out = [self.t_global_tensor, self.t_force, self.t_virial]
         if atomic:
@@ -361,21 +408,39 @@ def eval_full(
             at = v_out[3]  # atom tensor
             av = v_out[4]  # atom virial
 
+        nloc = natoms_vec[0]
+        nall = natoms_vec[1]
+
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            force = np.reshape(force, [nframes * nout, -1, 3])
+            # TODO: is there some way not to use for loop?
+            for ii in range(nframes * nout):
+                np.add.at(force[ii], ghost_map, force[ii, nloc:])
+            if atomic:
+                av = np.reshape(av, [nframes * nout, -1, 9])
+                for ii in range(nframes * nout):
+                    np.add.at(av[ii], ghost_map, av[ii, nloc:])
+
         # please note here the shape are wrong!
-        force = self.reverse_map(np.reshape(force, [nframes * nout, natoms, 3]), imap)
+        force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap)
         if atomic:
             at = self.reverse_map(
                 np.reshape(at, [nframes, len(sel_at), nout]), sel_imap
             )
-            av = self.reverse_map(np.reshape(av, [nframes * nout, natoms, 9]), imap)
+            av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap)
 
         # make sure the shapes are correct here
         gt = np.reshape(gt, [nframes, nout])
-        force = np.reshape(force, [nframes, nout, natoms, 3])
+        force = np.reshape(force, [nframes, nout, nall, 3])
+        if nloc < nall:
+            force = force[:, :, :nloc, :]
         virial = np.reshape(virial, [nframes, nout, 9])
         if atomic:
             at = np.reshape(at, [nframes, len(sel_at), self.output_dim])
-            av = np.reshape(av, [nframes, nout, natoms, 9])
+            av = np.reshape(av, [nframes, nout, nall, 9])
+            if nloc < nall:
+                av = av[:, :, :nloc, :]
             return gt, force, virial, at, av
         else:
             return gt, force, virial
diff --git a/source/tests/test_deepdipole.py b/source/tests/test_deepdipole.py
index e26ad84a55..1d06b5fe92 100644
--- a/source/tests/test_deepdipole.py
+++ b/source/tests/test_deepdipole.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     finite_difference,
@@ -964,10 +965,6 @@ def test_1frame_full_atm(self):
         gt, ff, vv, at, av = self.dp.eval_full(
             self.coords, self.box, self.atype, atomic=True
         )
-        for dd in at, ff, av:
-            print("\n\n")
-            print(", ".join(f"{ii:.18e}" for ii in dd.reshape(-1)))
-            print("\n\n")
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1035,3 +1032,30 @@ def test_1frame_full_atm_shuffle(self):
         np.testing.assert_almost_equal(
             vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
         )
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepDipoleNewPBCNeighborList(TestDeepDipoleNewPBC):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole_new.pbtxt")),
+            "deepdipole_new.pb",
+        )
+        cls.dp = DeepDipole(
+            "deepdipole_new.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_full_atm(self):
+        pass
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_old_atm(self):
+        pass
diff --git a/source/tests/test_deeppolar.py b/source/tests/test_deeppolar.py
index 271d1650c0..9627851de4 100644
--- a/source/tests/test_deeppolar.py
+++ b/source/tests/test_deeppolar.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     tests_path,
@@ -980,12 +981,6 @@ def test_1frame_full_atm(self):
             self.coords, self.box, self.atype, atomic=True
         )
 
-        # print the values
-        for dd in (at, ff, av):
-            print("\n\n")
-            print(", ".join(f"{i:.18e}" for i in dd.reshape(-1)))
-            print("\n\n")
-
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1088,3 +1083,30 @@ def test_2frame_full_atm(self):
         np.testing.assert_almost_equal(
             vv.reshape([-1]), expected_gv.reshape([-1]), decimal=default_places
         )
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepPolarNewPBCNeighborList(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppolar_new.pbtxt")),
+            "deeppolar_new.pb",
+        )
+        cls.dp = DeepPolar(
+            "deeppolar_new.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_full_atm(self):
+        pass
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_old_atm(self):
+        pass
diff --git a/source/tests/test_deeppot_a.py b/source/tests/test_deeppot_a.py
index 1c6cdc4afc..c229b4302c 100644
--- a/source/tests/test_deeppot_a.py
+++ b/source/tests/test_deeppot_a.py
@@ -3,6 +3,7 @@
 import shutil
 import unittest
 
+import ase.neighborlist
 import numpy as np
 from common import (
     run_dp,
@@ -1096,3 +1097,25 @@ def test_2frame_atm_all_param(self):
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+
+
+class TestDeepPotAPBCNeighborList(TestDeepPotAPBC):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+        )
+        cls.dp = DeepPot(
+            "deeppot.pb",
+            neighbor_list=ase.neighborlist.NewPrimitiveNeighborList(
+                cutoffs=6, bothways=True
+            ),
+        )
+
+    @unittest.skip("multiple frames not supported")
+    def test_2frame_atm(self):
+        pass
+
+    @unittest.skip("Zero atoms not supported")
+    def test_zero_input(self):
+        pass

From 2204ec197d669445e812139b0c16239ea72672f7 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 11 Dec 2023 00:23:37 -0500
Subject: [PATCH 35/97] fix GPU mapping error for Horovod + finetune (#3048)

When fine-tuning with Horovod, the same error as
https://github.com/deepmodeling/deepmd-kit/issues/2712 is thrown at the
place I modified in this PR.

It seems `tf.test.is_gpu_available` will try to use all GPUs, but
`tf.config.get_visible_devices` won't.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/utils/batch_size.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 2b3117d849..fe876a65a5 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -7,8 +7,12 @@
 )
 
 import numpy as np
+from packaging.version import (
+    Version,
+)
 
 from deepmd.env import (
+    TF_VERSION,
     tf,
 )
 from deepmd.utils.errors import (
@@ -59,7 +63,10 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
             self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
         else:
             self.maximum_working_batch_size = initial_batch_size
-            if tf.test.is_gpu_available():
+            if (
+                Version(TF_VERSION) >= Version("1.14")
+                and tf.config.experimental.get_visible_devices("GPU")
+            ) or tf.test.is_gpu_available():
                 self.minimal_not_working_batch_size = 2**31
             else:
                 self.minimal_not_working_batch_size = (

From 7648d7ad9bff931a47f62a0b21f775905cb0d046 Mon Sep 17 00:00:00 2001
From: nahso <47053538+nahso@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:02:38 +0800
Subject: [PATCH 36/97] remove unused codes in se_a.py (#3049)

---
 deepmd/descriptor/se_a.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 9beec2db1d..721bb0d534 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -1002,13 +1002,6 @@ def _filter_lower(
                             [-1, two_side_type_embedding.shape[-1]],
                         )
 
-                        atype_expand = tf.reshape(self.atype_nloc, [-1, 1])
-                        idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei])
-                        idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei])
-                        idx = idx_i + idx_j
-                        index_of_two_side = tf.reshape(idx, [-1])
-                        self.extra_embedding_index = index_of_two_side
-
                         net_output = embedding_net(
                             two_side_type_embedding,
                             self.filter_neuron,

From 406e8f77005d992af80c3936e268d30444d1684b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:03:15 +0800
Subject: [PATCH 37/97] Bump actions/deploy-pages from 2 to 3 (#3052)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/deploy-pages](https://github.com/actions/deploy-pages)
from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/deploy-pages/releases">actions/deploy-pages's
releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<h1>Changelog</h1>
<ul>
<li>Update action to node20 <a
href="https://github.com/takost"><code>@​takost</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/256">#256</a>)</li>
</ul>
<hr />
<p>See details of <a
href="https://github.com/actions/deploy-pages/compare/v2.0.5...v3.0.0">all
code changes</a> since previous release.</p>
<p>:warning: For use with products other than GitHub.com, such as GitHub
Enterprise Server, please consult the <a
href="https://github.com/actions/deploy-pages/#compatibilty">compatibility
table</a>.</p>
<h2>v2.0.5</h2>
<h1>Changelog</h1>
<ul>
<li>Bump prettier from 3.0.0 to 3.1.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/264">#264</a>)</li>
<li>Bump <code>@​octokit/request-error</code> from 5.0.0 to 5.0.1 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/263">#263</a>)</li>
<li>Bump <code>@​actions/http-client</code> from 2.1.0 to 2.2.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/262">#262</a>)</li>
<li>Bump <code>@​vercel/ncc</code> from 0.36.1 to 0.38.1 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/260">#260</a>)</li>
<li>Bump eslint from 8.44.0 to 8.54.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/257">#257</a>)</li>
<li>Bump nock from 13.3.1 to 13.3.8 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/254">#254</a>)</li>
<li>Bump actions/setup-node from 3 to 4 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/246">#246</a>)</li>
<li>Bump release-drafter/release-drafter from 5.24.0 to 5.25.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/245">#245</a>)</li>
<li>Bump eslint-plugin-github from 4.8.0 to 4.10.1 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/232">#232</a>)</li>
<li>Bump jest from 29.6.1 to 29.7.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/224">#224</a>)</li>
<li>Fix cosmetic error by replacing comma with period <a
href="https://github.com/JamesMGreene"><code>@​JamesMGreene</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/210">#210</a>)</li>
<li>Bump actions/checkout from 3 to 4 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/220">#220</a>)</li>
<li>Bump eslint-config-prettier from 8.8.0 to 9.0.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/209">#209</a>)</li>
<li>Remove &quot;beta&quot; disclaimer from README <a
href="https://github.com/JamesMGreene"><code>@​JamesMGreene</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/243">#243</a>)</li>
</ul>
<hr />
<p>See details of <a
href="https://github.com/actions/deploy-pages/compare/v2.0.4...v2.0.5">all
code changes</a> since previous release.</p>
<p>:warning: For use with products other than GitHub.com, such as GitHub
Enterprise Server, please consult the <a
href="https://github.com/actions/deploy-pages/#compatibilty">compatibility
table</a>.</p>
<h2>v2.0.4</h2>
<h1>Changelog</h1>
<ul>
<li>Update GHES compatibility table after verifying with
<code>3.9.x</code> <a
href="https://github.com/JamesMGreene"><code>@​JamesMGreene</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/201">#201</a>)</li>
<li>Bump <code>@​octokit/request-error</code> from 4.0.1 to 5.0.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/194">#194</a>)</li>
<li>Bump prettier from 2.8.8 to 3.0.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/196">#196</a>)</li>
<li>Bump jest from 29.5.0 to 29.6.1 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/195">#195</a>)</li>
<li>Bump release-drafter/release-drafter from 5.23.0 to 5.24.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/192">#192</a>)</li>
<li>Bump eslint from 8.42.0 to 8.44.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/191">#191</a>)</li>
<li>Remove circular JSON references for error debugging <a
href="https://github.com/JamesMGreene"><code>@​JamesMGreene</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/197">#197</a>)</li>
</ul>
<hr />
<p>See details of <a
href="https://github.com/actions/deploy-pages/compare/v2.0.3...v2.0.4">all
code changes</a> since previous release.</p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/deploy-pages/commit/13b55b33dd8996121833dbc1db458c793a334630"><code>13b55b3</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/272">#272</a>
from kbdharun/main</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/c284911fe17a5e719089f29411dceb772a857d85"><code>c284911</code></a>
chore/docs: update version, fix typos</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/ae44070880dce4efcb556237318223e3046a97b8"><code>ae44070</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/266">#266</a>
from actions/dependabot/npm_and_yarn/eslint-8.55.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/34a8f6066ad82e61355552e626e3fc7e07b6d31b"><code>34a8f60</code></a>
Bump eslint from 8.54.0 to 8.55.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/b65643c0ab5380851bfd04701efaa6dac1527129"><code>b65643c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/267">#267</a>
from actions/dependabot/npm_and_yarn/nock-13.4.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/faceed1585e750e07a0f28d7d7ea6e0ed58ebd1d"><code>faceed1</code></a>
Bump nock from 13.3.8 to 13.4.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/230466c7069409d62b096d905ecda6363461ea56"><code>230466c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/268">#268</a>
from actions/dependabot/npm_and_yarn/eslint-config-pr...</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/38d34f8e6f95dd9152de092f3709be25a4bd11a7"><code>38d34f8</code></a>
Bump eslint-config-prettier from 9.0.0 to 9.1.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/cd33125d26441f1d6cb94bab9bae0b9950a3b0a0"><code>cd33125</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/269">#269</a>
from actions/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/dd970aea72241188830c63c1d52ff2b56f7feb63"><code>dd970ae</code></a>
Update distributables after Dependabot 🤖</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/deploy-pages/compare/v2...v3">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/deploy-pages&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 67241e6e23..174d803146 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -189,7 +189,7 @@ jobs:
     steps:
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v2
+        uses: actions/deploy-pages@v3
 
   pass:
     name: Pass testing build wheels

From 8db5e6a33fa05d8bb118265f2101dc566dd92b55 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:04:06 +0800
Subject: [PATCH 38/97] Bump actions/setup-python from 4 to 5 (#3051)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-python](https://github.com/actions/setup-python)
from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-python/releases">actions/setup-python's
releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release, we update node version runtime from node16
to node20 (<a
href="https://redirect.github.com/actions/setup-python/pull/772">actions/setup-python#772</a>).
Besides, we update dependencies to the latest versions.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4.8.0...v5.0.0">https://github.com/actions/setup-python/compare/v4.8.0...v5.0.0</a></p>
<h2>v4.8.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release we added support for GraalPy (<a
href="https://redirect.github.com/actions/setup-python/pull/694">actions/setup-python#694</a>).
You can use this snippet to set up GraalPy:</p>
<pre lang="yaml"><code>steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
  with:
    python-version: 'graalpy-22.3'
- run: python my_script.py
</code></pre>
<p>Besides, the release contains such changes as:</p>
<ul>
<li>Trim python version when reading from file by <a
href="https://github.com/FerranPares"><code>@​FerranPares</code></a> in
<a
href="https://redirect.github.com/actions/setup-python/pull/628">actions/setup-python#628</a></li>
<li>Use non-deprecated versions in examples by <a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/724">actions/setup-python#724</a></li>
<li>Change deprecation comment to past tense by <a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/723">actions/setup-python#723</a></li>
<li>Bump <code>@​babel/traverse</code> from 7.9.0 to 7.23.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/743">actions/setup-python#743</a></li>
<li>advanced-usage.md: Encourage the use actions/checkout@v4 by <a
href="https://github.com/cclauss"><code>@​cclauss</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/729">actions/setup-python#729</a></li>
<li>Examples now use checkout@v4 by <a
href="https://github.com/simonw"><code>@​simonw</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/738">actions/setup-python#738</a></li>
<li>Update actions/checkout to v4 by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-python/pull/761">actions/setup-python#761</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/FerranPares"><code>@​FerranPares</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/628">actions/setup-python#628</a></li>
<li><a href="https://github.com/timfel"><code>@​timfel</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/694">actions/setup-python#694</a></li>
<li><a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/724">actions/setup-python#724</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4...v4.8.0">https://github.com/actions/setup-python/compare/v4...v4.8.0</a></p>
<h2>v4.7.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Bump word-wrap from 1.2.3 to 1.2.4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/702">actions/setup-python#702</a></li>
<li>Add range validation for toml files by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-python/pull/726">actions/setup-python#726</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4...v4.7.1">https://github.com/actions/setup-python/compare/v4...v4.7.1</a></p>
<h2>v4.7.0</h2>
<p>In scope of this release, the support for reading python version from
pyproject.toml was added (<a
href="https://redirect.github.com/actions/setup-python/pull/669">actions/setup-python#669</a>).</p>
<pre lang="yaml"><code>      - name: Setup Python
        uses: actions/setup-python@v4
&lt;/tr&gt;&lt;/table&gt;
</code></pre>
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/setup-python/commit/0a5c61591373683505ea898e09a3ea4f39ef2b9c"><code>0a5c615</code></a>
Update action to node20 (<a
href="https://redirect.github.com/actions/setup-python/issues/772">#772</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/0ae58361cdfd39e2950bed97a1e26aa20c3d8955"><code>0ae5836</code></a>
Add example of GraalPy to docs (<a
href="https://redirect.github.com/actions/setup-python/issues/773">#773</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/b64ffcaf5b410884ad320a9cfac8866006a109aa"><code>b64ffca</code></a>
update actions/checkout to v4 (<a
href="https://redirect.github.com/actions/setup-python/issues/761">#761</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/8d2896179abf658742de432b3f203d2c2d86a587"><code>8d28961</code></a>
Examples now use checkout@v4 (<a
href="https://redirect.github.com/actions/setup-python/issues/738">#738</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/7bc6abb01e0555719edc2dbca70a2fde309e5e56"><code>7bc6abb</code></a>
advanced-usage.md: Encourage the use actions/checkout@v4 (<a
href="https://redirect.github.com/actions/setup-python/issues/729">#729</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/e8111cec9d3dc15220d8a3b638f08419f57b906a"><code>e8111ce</code></a>
Bump <code>@​babel/traverse</code> from 7.9.0 to 7.23.2 (<a
href="https://redirect.github.com/actions/setup-python/issues/743">#743</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/a00ea43da65e7c04d2bdae58b3afecd77057eb9e"><code>a00ea43</code></a>
add fix for graalpy ci (<a
href="https://redirect.github.com/actions/setup-python/issues/741">#741</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/8635b1ccc5934e73ed3510980fd2e7790b85839b"><code>8635b1c</code></a>
Change deprecation comment to past tense (<a
href="https://redirect.github.com/actions/setup-python/issues/723">#723</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/f6cc428f535856f9c23558d01765a42a4d6cf758"><code>f6cc428</code></a>
Use non-deprecated versions in examples (<a
href="https://redirect.github.com/actions/setup-python/issues/724">#724</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/5f2af211d616f86005883b44826180b21abb4060"><code>5f2af21</code></a>
Add GraalPy support (<a
href="https://redirect.github.com/actions/setup-python/issues/694">#694</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/setup-python/compare/v4...v5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_cc.yml    | 2 +-
 .github/workflows/build_wheel.yml | 4 ++--
 .github/workflows/test_cc.yml     | 2 +-
 .github/workflows/test_cuda.yml   | 2 +-
 .github/workflows/test_python.yml | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index e6377f4fab..f029517d80 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -21,7 +21,7 @@ jobs:
           dp_variant: clang
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         cache: 'pip'
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 174d803146..e700109cce 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -87,7 +87,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
           python-version: '3.11'
@@ -164,7 +164,7 @@ jobs:
         with:
           name: artifact
           path: dist/packages
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
           python-version: '3.11'
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index a98afa7a94..ef6fade8e5 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         cache: 'pip'
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index d8eddaa44f..e74c0abde2 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -18,7 +18,7 @@ jobs:
     - name: Make sudo and git work
       run: apt-get update && apt-get install -y sudo git
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
         # cache: 'pip'
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index b6011cb523..1bd78bfae0 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -18,7 +18,7 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python }}
         cache: 'pip'

From 558051e94d8a43a4b3eaa5637348200ee843f312 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:04:44 +0800
Subject: [PATCH 39/97] Bump actions/labeler from 4 to 5 (#3050)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/labeler](https://github.com/actions/labeler) from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/labeler/releases">actions/labeler's
releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<p>This release contains the following breaking changes:</p>
<ol>
<li>
<p>The ability to apply labels based on the names of base and/or head
branches was added (<a
href="https://redirect.github.com/actions/labeler/issues/186">#186</a>
and <a
href="https://redirect.github.com/actions/labeler/issues/54">#54</a>).
The match object for changed files was expanded with new combinations in
order to make it more intuitive and flexible (<a
href="https://redirect.github.com/actions/labeler/issues/423">#423</a>
and <a
href="https://redirect.github.com/actions/labeler/issues/101">#101</a>).
As a result, the configuration file structure was significantly
redesigned and is not compatible with the structure of the previous
version. Please read the <a
href="https://github.com/actions/labeler/tree/main#pull-request-labeler">action
documentation</a> to find out how to adapt your configuration files for
use with the new action version.</p>
</li>
<li>
<p>The bug related to the <code>sync-labels</code> input was fixed (<a
href="https://redirect.github.com/actions/labeler/issues/112">#112</a>).
Now the input value is read correctly.</p>
</li>
<li>
<p>By default, <code>dot</code> input is set to <code>true</code>. Now,
paths starting with a dot (e.g. <code>.github</code>) are matched by
default.</p>
</li>
<li>
<p>Version 5 of this action updated the <a
href="https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runs-for-javascript-actions">runtime
to Node.js 20</a>. All scripts are now run with Node.js 20 instead of
Node.js 16 and are affected by any breaking changes between Node.js 16
and 20.</p>
</li>
</ol>
<p>For more information, please read the <a
href="https://github.com/actions/labeler/tree/main#pull-request-labeler">action
documentation</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/joshdales"><code>@​joshdales</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/labeler/pull/203">actions/labeler#203</a></li>
<li><a
href="https://github.com/dusan-trickovic"><code>@​dusan-trickovic</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/labeler/pull/626">actions/labeler#626</a></li>
<li><a href="https://github.com/sungh0lim"><code>@​sungh0lim</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/labeler/pull/630">actions/labeler#630</a></li>
<li><a
href="https://github.com/TrianguloY"><code>@​TrianguloY</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/labeler/pull/629">actions/labeler#629</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/labeler/compare/v4...v5.0.0">https://github.com/actions/labeler/compare/v4...v5.0.0</a></p>
<h2>v5.0.0-beta.1</h2>
<h2>What's Changed</h2>
<p>In scope of this beta release, the structure of the configuration
file (<code>.github/labeler.yml</code>) was changed from</p>
<pre lang="yml"><code>LabelName:
- any:
  - changed-files: ['list', 'of', 'globs']
  - base-branch: ['list', 'of', 'regexps']
  - head-branch: ['list', 'of', 'regexps']
- all:
  - changed-files: ['list', 'of', 'globs']
  - base-branch: ['list', 'of', 'regexps']
  - head-branch: ['list', 'of', 'regexps']
</code></pre>
<p>to</p>
<pre lang="yml"><code>LabelName:
- any:
  - changed-files:
    - AnyGlobToAnyFile: ['list', 'of', 'globs']
    - AnyGlobToAllFiles: ['list', 'of', 'globs']
    - AllGlobsToAnyFile: ['list', 'of', 'globs']
    - AllGlobsToAllFiles: ['list', 'of', 'globs']
  - base-branch: ['list', 'of', 'regexps']
  - head-branch: ['list', 'of', 'regexps']
- all:
  - changed-files:
    - AnyGlobToAnyFile: ['list', 'of', 'globs']
    - AnyGlobToAllFiles: ['list', 'of', 'globs']
    - AllGlobsToAnyFile: ['list', 'of', 'globs']
&lt;/tr&gt;&lt;/table&gt;
</code></pre>
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/labeler/commit/8558fd74291d67161a8a78ce36a881fa63b766a9"><code>8558fd7</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/labeler/issues/709">#709</a>
from actions/v5.0.0-beta</li>
<li><a
href="https://github.com/actions/labeler/commit/000ca75fe6c5838c790ca73b764419065c1594a6"><code>000ca75</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/labeler/issues/700">#700</a>
from MaksimZhukov/apply-suggestions-and-update-docume...</li>
<li><a
href="https://github.com/actions/labeler/commit/cb66c2f0788d382da1dabd06a094c0bc6ed3e26a"><code>cb66c2f</code></a>
Update dist</li>
<li><a
href="https://github.com/actions/labeler/commit/9181355e36dc8e434c93ba5aaa33f699c4162f38"><code>9181355</code></a>
Apply suggestions for the beta vesrion and update the documentation</li>
<li><a
href="https://github.com/actions/labeler/commit/efe4c1c90edf0ec238b5ee13e66e1abcbbe7446e"><code>efe4c1c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/labeler/issues/699">#699</a>
from MaksimZhukov/update-node-runtime-and-dependencies</li>
<li><a
href="https://github.com/actions/labeler/commit/c0957ad7c30fb0638e275122d51df2330459854a"><code>c0957ad</code></a>
Run Prettier</li>
<li><a
href="https://github.com/actions/labeler/commit/8dc8d1842f2f3ed1cf6f4190490ad02e0a755f0c"><code>8dc8d18</code></a>
Update Node.js version in reusable workflows</li>
<li><a
href="https://github.com/actions/labeler/commit/d0d0bbebfbe119c8403a0e2724c5fa5b555c78b9"><code>d0d0bbe</code></a>
Update documentation</li>
<li><a
href="https://github.com/actions/labeler/commit/1375c42512e0b855687040307d6dcaf403da9a4e"><code>1375c42</code></a>
5.0.0</li>
<li><a
href="https://github.com/actions/labeler/commit/ab7411ec21ea67102994adc0fa01b3f5cf63509f"><code>ab7411e</code></a>
Change version of Node.js runtime to node20</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/labeler/compare/v4...v5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/labeler&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/labeler.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 2c8ba30ba1..877c780f1f 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -9,6 +9,6 @@ jobs:
       pull-requests: write
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/labeler@v4
+    - uses: actions/labeler@v5
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"

From 33c2c8d6d95ddc540cdd397564eb6e5b90718c3f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:08:09 +0800
Subject: [PATCH 40/97] [pre-commit.ci] pre-commit autoupdate (#3055)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/PyCQA/isort: 5.12.0 →
5.13.0](https://github.com/PyCQA/isort/compare/5.12.0...5.13.0)
- [github.com/astral-sh/ruff-pre-commit: v0.1.6 →
v0.1.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.6...v0.1.7)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 64a061dd54..ce83792f10 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,14 +23,14 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.0
     hooks:
     - id: isort
       files: \.py$
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.6
+    rev: v0.1.7
     hooks:
     - id: ruff
       args: ["--fix"]

From e5c9c590cab12d8dd9e8d509706547d155f1bf9f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 11 Dec 2023 20:09:55 -0500
Subject: [PATCH 41/97] docs: document external neighbor list (#3056)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/inference/python.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/doc/inference/python.md b/doc/inference/python.md
index 48eb1d7df0..b5d3ca1efc 100644
--- a/doc/inference/python.md
+++ b/doc/inference/python.md
@@ -27,3 +27,20 @@ model_devi = calc_model_devi(coord, cell, atype, graphs)
 ```
 
 Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. Otherwise, tensorFlow will never release the memory and this may lead to an out-of-memory (OOM) error.
+
+## External neighbor list algorithm
+
+The native neighbor list algorithm of the DeePMD-kit is in $O(N^2)$ complexity ($N$ is the number of atoms).
+While this is not a problem for small systems that quantum methods can afford, the large systems for molecular dynamics have slow performance.
+In this case, one may pass an external neighbor list that has lower complexity to {class}`DeepPot <deepmd.infer.DeepPot>`, once it is compatible with {class}`ase.neighborlist.NewPrimitiveNeighborList`.
+
+```py
+import ase.neighborlist
+
+neighbor_list = ase.neighborlist.NewPrimitiveNeighborList(
+    cutoffs=6, bothways=True, self_interaction=False
+)
+dp = DeepPot("graph.pb", neighbor_list=neighbor_list)
+```
+
+The `update` and `build` methods will be called by {class}`DeepPot <deepmd.infer.DeepPot>`, and `first_neigh`, `pair_second`, and `offset_vec` properties will be used.

From ddfa7819435d1665f1bae18460ec57665f0a9610 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 11 Dec 2023 20:10:37 -0500
Subject: [PATCH 42/97] print NaN loss when labeled data is not found (#3047)

Currently, when loss is defined and the labeled data is not found,
`lcurve.out` shows wrong RMSE (assuming data is all zero). In this case,
printing NaN is better.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/loss/dos.py      | 12 +++++++----
 deepmd/loss/ener.py     | 44 +++++++++++++++++++++++++++++------------
 deepmd/loss/loss.py     | 19 ++++++++++++++++++
 deepmd/loss/tensor.py   |  4 ++--
 deepmd/train/trainer.py |  1 +
 5 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/deepmd/loss/dos.py b/deepmd/loss/dos.py
index fa30552486..7d38f2b17a 100644
--- a/deepmd/loss/dos.py
+++ b/deepmd/loss/dos.py
@@ -143,16 +143,20 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_dos:
             l2_loss += atom_norm_ener * (pref_dos * l2_dos_loss)
-            more_loss["l2_dos_loss"] = l2_dos_loss
+            more_loss["l2_dos_loss"] = self.display_if_exist(l2_dos_loss, find_dos)
         if self.has_cdf:
             l2_loss += atom_norm_ener * (pref_cdf * l2_cdf_loss)
-            more_loss["l2_cdf_loss"] = l2_cdf_loss
+            more_loss["l2_cdf_loss"] = self.display_if_exist(l2_cdf_loss, find_dos)
         if self.has_ados:
             l2_loss += global_cvt_2_ener_float(pref_ados * l2_atom_dos_loss)
-            more_loss["l2_atom_dos_loss"] = l2_atom_dos_loss
+            more_loss["l2_atom_dos_loss"] = self.display_if_exist(
+                l2_atom_dos_loss, find_atom_dos
+            )
         if self.has_acdf:
             l2_loss += global_cvt_2_ener_float(pref_acdf * l2_atom_cdf_loss)
-            more_loss["l2_atom_cdf_loss"] = l2_atom_cdf_loss
+            more_loss["l2_atom_cdf_loss"] = self.display_if_exist(
+                l2_atom_cdf_loss, find_atom_dos
+            )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py
index 95997bad10..d7f83f09e5 100644
--- a/deepmd/loss/ener.py
+++ b/deepmd/loss/ener.py
@@ -291,22 +291,32 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_e:
             l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-            more_loss["l2_ener_loss"] = l2_ener_loss
+            more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
         if self.has_f:
             l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss)
-            more_loss["l2_force_loss"] = l2_force_loss
+            more_loss["l2_force_loss"] = self.display_if_exist(
+                l2_force_loss, find_force
+            )
         if self.has_v:
             l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-            more_loss["l2_virial_loss"] = l2_virial_loss
+            more_loss["l2_virial_loss"] = self.display_if_exist(
+                l2_virial_loss, find_virial
+            )
         if self.has_ae:
             l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-            more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss
+            more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+                l2_atom_ener_loss, find_atom_ener
+            )
         if self.has_pf:
             l2_loss += global_cvt_2_ener_float(pref_pf * l2_pref_force_loss)
-            more_loss["l2_pref_force_loss"] = l2_pref_force_loss
+            more_loss["l2_pref_force_loss"] = self.display_if_exist(
+                l2_pref_force_loss, find_atom_pref
+            )
         if self.has_gf:
             l2_loss += global_cvt_2_ener_float(pref_gf * l2_gen_force_loss)
-            more_loss["l2_gen_force_loss"] = l2_gen_force_loss
+            more_loss["l2_gen_force_loss"] = self.display_if_exist(
+                l2_gen_force_loss, find_drdq
+            )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
@@ -553,19 +563,25 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         if self.has_e:
             l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-        more_loss["l2_ener_loss"] = l2_ener_loss
+        more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
         if self.has_fr:
             l2_loss += global_cvt_2_ener_float(pref_fr * l2_force_r_loss)
-        more_loss["l2_force_r_loss"] = l2_force_r_loss
+        more_loss["l2_force_r_loss"] = self.display_if_exist(
+            l2_force_r_loss, find_force
+        )
         if self.has_fm:
             l2_loss += global_cvt_2_ener_float(pref_fm * l2_force_m_loss)
-        more_loss["l2_force_m_loss"] = l2_force_m_loss
+        more_loss["l2_force_m_loss"] = self.display_if_exist(
+            l2_force_m_loss, find_force
+        )
         if self.has_v:
             l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-        more_loss["l2_virial_loss"] = l2_virial_loss
+        more_loss["l2_virial_loss"] = self.display_if_exist(l2_virial_loss, find_virial)
         if self.has_ae:
             l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-        more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss
+        more_loss["l2_atom_ener_loss"] = self.display_if_exist(
+            l2_atom_ener_loss, find_atom_ener
+        )
 
         # only used when tensorboard was set as true
         self.l2_loss_summary = tf.summary.scalar("l2_loss", tf.sqrt(l2_loss))
@@ -785,8 +801,10 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         more_loss = {}
         l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
         l2_loss += global_cvt_2_ener_float(pref_ed * l2_ener_dipole_loss)
-        more_loss["l2_ener_loss"] = l2_ener_loss
-        more_loss["l2_ener_dipole_loss"] = l2_ener_dipole_loss
+        more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy)
+        more_loss["l2_ener_dipole_loss"] = self.display_if_exist(
+            l2_ener_dipole_loss, find_ener_dipole
+        )
 
         self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
         self.l2_loss_ener_summary = tf.summary.scalar(
diff --git a/deepmd/loss/loss.py b/deepmd/loss/loss.py
index 9324077691..a719a08d81 100644
--- a/deepmd/loss/loss.py
+++ b/deepmd/loss/loss.py
@@ -8,6 +8,8 @@
     Tuple,
 )
 
+import numpy as np
+
 from deepmd.env import (
     tf,
 )
@@ -72,3 +74,20 @@ def eval(
             A dictionary that maps keys to values. It
             should contain key `natoms`
         """
+
+    @staticmethod
+    def display_if_exist(loss: tf.Tensor, find_property: float) -> tf.Tensor:
+        """Display NaN if labeled property is not found.
+
+        Parameters
+        ----------
+        loss : tf.Tensor
+            the loss tensor
+        find_property : float
+            whether the property is found
+        """
+        return tf.cond(
+            tf.cast(find_property, tf.bool),
+            lambda: loss,
+            lambda: tf.cast(np.nan, dtype=loss.dtype),
+        )
diff --git a/deepmd/loss/tensor.py b/deepmd/loss/tensor.py
index 74eb2b74dc..a40f95a18e 100644
--- a/deepmd/loss/tensor.py
+++ b/deepmd/loss/tensor.py
@@ -87,7 +87,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
             local_loss = global_cvt_2_tf_float(find_atomic) * tf.reduce_mean(
                 tf.square(self.scale * (polar - atomic_polar_hat)), name="l2_" + suffix
             )
-            more_loss["local_loss"] = local_loss
+            more_loss["local_loss"] = self.display_if_exist(local_loss, find_atomic)
             l2_loss += self.local_weight * local_loss
             self.l2_loss_local_summary = tf.summary.scalar(
                 "l2_local_loss_" + suffix, tf.sqrt(more_loss["local_loss"])
@@ -118,7 +118,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
                 tf.square(self.scale * (global_polar - polar_hat)), name="l2_" + suffix
             )
 
-            more_loss["global_loss"] = global_loss
+            more_loss["global_loss"] = self.display_if_exist(global_loss, find_global)
             self.l2_loss_global_summary = tf.summary.scalar(
                 "l2_global_loss_" + suffix,
                 tf.sqrt(more_loss["global_loss"]) / global_cvt_2_tf_float(atoms),
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index bbcb305404..3b81740a93 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -943,6 +943,7 @@ def print_header(fp, train_results, valid_results, multi_task_mode=False):
                     for k in train_results[fitting_key].keys():
                         print_str += prop_fmt % (k + "_trn")
                 print_str += "   %8s\n" % (fitting_key + "_lr")
+        print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n"
         fp.write(print_str)
         fp.flush()
 

From 06cd3c1dd4d4ecfdbe3bf37f50c7fab70199c0ee Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 14 Dec 2023 03:13:20 -0500
Subject: [PATCH 43/97] fix labeler.yml with actions/labeler v5 (#3059)

See
https://github.com/actions/labeler/tree/main?tab=readme-ov-file#breaking-changes-in-v5;
The action version was bumped by #3050

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/labeler.yml | 52 +++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 049c9badff..b0a85679de 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,15 +1,39 @@
 Python:
-- deepmd/**/*
-- deepmd_utils/**/*
-- source/tests/**/*
-Docs: doc/**/*
-Examples: examples/**/*
-Core: source/lib/**/*
-CUDA: source/lib/src/gpu/**/*
-ROCM: source/lib/src/gpu/**/*
-OP: source/op/**/*
-C++: source/api_cc/**/*
-C: source/api_c/**/*
-LAMMPS: source/lmp/**/*
-Gromacs: source/gmx/**/*
-i-Pi: source/ipi/**/*
+- changed-files:
+  - any-glob-to-any-file:
+    - deepmd/**/*
+    - deepmd_utils/**/*
+    - source/tests/**/*
+Docs:
+- changed-files:
+  - any-glob-to-any-file: doc/**/*
+Examples:
+- changed-files:
+  - any-glob-to-any-file: examples/**/*
+Core:
+- changed-files:
+  - any-glob-to-any-file: source/lib/**/*
+CUDA:
+- changed-files:
+  - any-glob-to-any-file: source/lib/src/gpu/**/*
+ROCM:
+- changed-files:
+  - any-glob-to-any-file: source/lib/src/gpu/**/*
+OP:
+- changed-files:
+  - any-glob-to-any-file: source/op/**/*
+C++:
+- changed-files:
+  - any-glob-to-any-file: source/api_cc/**/*
+C:
+- changed-files:
+  - any-glob-to-any-file: source/api_c/**/*
+LAMMPS:
+- changed-files:
+  - any-glob-to-any-file: source/lmp/**/*
+Gromacs:
+- changed-files:
+  - any-glob-to-any-file: source/gmx/**/*
+i-Pi:
+- changed-files:
+  - any-glob-to-any-file: source/ipi/**/*

From 1a7e25209a234fc446fcb5b1ffdd1eee5a7df8af Mon Sep 17 00:00:00 2001
From: LiuGroupHNU <55585514+LiuGroupHNU@users.noreply.github.com>
Date: Thu, 14 Dec 2023 16:31:36 +0800
Subject: [PATCH 44/97] nvnmd: init-model feature and 256 neighbors (#3058)

1. Add init-modele feature. you can initialize the CNN model by:
`dp train-nvnmd train_cnn.json -s s1 -i nvnmd_cnn_bck/model.ckpt `.
2. The maximum number of neighbors has been increased to 256.
3. Update the doc of nvnmd.

---------

Co-authored-by: LiuGroupHNU <liujie123@HNU>
Co-authored-by: MoPinghui <mopinghui1020@gmail.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/fit/ener.py                           |  5 ++
 deepmd/nvnmd/data/data.py                    | 46 ++++++++++++++
 deepmd/nvnmd/descriptor/se_a.py              |  9 ++-
 deepmd/nvnmd/descriptor/se_atten.py          |  9 ++-
 deepmd/nvnmd/entrypoints/freeze.py           |  2 +
 deepmd/nvnmd/entrypoints/mapt.py             | 28 ++++++++-
 deepmd/nvnmd/entrypoints/train.py            |  3 +
 deepmd/nvnmd/entrypoints/wrap.py             | 56 ++++++++++++++---
 deepmd/nvnmd/utils/argcheck.py               |  2 +
 deepmd/nvnmd/utils/config.py                 | 52 +++++++++++++---
 deepmd_utils/main.py                         | 16 +++++
 doc/nvnmd/nvnmd.md                           | 11 ++++
 examples/nvnmd/train/train_cnn.json          |  1 +
 examples/nvnmd/train/train_qnn.json          |  1 +
 source/op/prod_env_mat_multi_device_nvnmd.cc | 64 +++++++++++++++-----
 source/tests/test_nvnmd_entrypoints.py       |  4 +-
 16 files changed, 269 insertions(+), 40 deletions(-)

diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index e74d4a7e6d..4c15e57124 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -514,6 +514,11 @@ def build(
                     self.bias_atom_e[type_i] = self.bias_atom_e[type_i]
             self.bias_atom_e = self.bias_atom_e[:ntypes_atom]
 
+        if nvnmd_cfg.enable:
+            # fix the bug: CNN and QNN have different t_bias_atom_e.
+            if "t_bias_atom_e" in nvnmd_cfg.weight.keys():
+                self.bias_atom_e = nvnmd_cfg.weight["t_bias_atom_e"]
+
         with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
             t_dfparam = tf.constant(self.numb_fparam, name="dfparam", dtype=tf.int32)
             t_daparam = tf.constant(self.numb_aparam, name="daparam", dtype=tf.int32)
diff --git a/deepmd/nvnmd/data/data.py b/deepmd/nvnmd/data/data.py
index 29c8b84a37..9e6dd4cc89 100644
--- a/deepmd/nvnmd/data/data.py
+++ b/deepmd/nvnmd/data/data.py
@@ -60,6 +60,7 @@
     },
     "ctrl": {
         # NSTDM
+        "MAX_NNEI": 128,
         "NSTDM": 64,
         "NSTDM_M1": 32,
         "NSTDM_M2": 2,
@@ -67,6 +68,7 @@
         "NSEL": "NSTDM*NTYPE_MAX",
         "NSADV": "NSTDM+1",
         "VERSION": 0,
+        "SUB_VERSION": 1,
     },
     "nbit": {
         # general
@@ -116,6 +118,22 @@
     "end": "",
 }
 
+# change the configuration accordng to the max_nnei
+jdata_config_v0_ni128 = jdata_config_v0.copy()
+jdata_config_v0_ni256 = jdata_config_v0.copy()
+jdata_config_v0_ni256["ctrl"] = {
+    "MAX_NNEI": 256,
+    "NSTDM": 128,
+    "NSTDM_M1": 32,
+    "NSTDM_M2": 4,
+    "NSTDM_M1X": 8,
+    "NSEL": "NSTDM*NTYPE_MAX",
+    "NSADV": "NSTDM+1",
+    "VERSION": 0,
+    "SUB_VERSION": 1,
+}
+jdata_config_v0_ni256["nbit"]["NBIT_NEIB"] = 9
+
 jdata_config_v1 = {
     "dscp": {
         # basic config from deepmd model
@@ -174,6 +192,7 @@
     },
     "ctrl": {
         # NSTDM
+        "MAX_NNEI": 128,
         "NSTDM": 64,
         "NSTDM_M1": 32,
         "NSTDM_M2": 2,
@@ -181,6 +200,7 @@
         "NSEL": "NSTDM",
         "NSADV": "NSTDM+1",
         "VERSION": 1,
+        "SUB_VERSION": 1,
     },
     "nbit": {
         # general
@@ -230,6 +250,22 @@
     "end": "",
 }
 
+# change the configuration accordng to the max_nnei
+jdata_config_v1_ni128 = jdata_config_v1.copy()
+jdata_config_v1_ni256 = jdata_config_v1.copy()
+jdata_config_v1_ni256["ctrl"] = {
+    "MAX_NNEI": 256,
+    "NSTDM": 128,
+    "NSTDM_M1": 32,
+    "NSTDM_M2": 4,
+    "NSTDM_M1X": 8,
+    "NSEL": "NSTDM",
+    "NSADV": "NSTDM+1",
+    "VERSION": 1,
+    "SUB_VERSION": 1,
+}
+jdata_config_v1_ni256["nbit"]["NBIT_NEIB"] = 9
+
 jdata_deepmd_input_v0 = {
     "model": {
         "descriptor": {
@@ -247,6 +283,7 @@
     },
     "nvnmd": {
         "version": 0,
+        "max_nnei": 128,  # 128 or 256
         "net_size": 128,
         "config_file": "none",
         "weight_file": "none",
@@ -286,6 +323,10 @@
     },
 }
 
+jdata_deepmd_input_v0_ni128 = jdata_deepmd_input_v0.copy()
+jdata_deepmd_input_v0_ni256 = jdata_deepmd_input_v0.copy()
+jdata_deepmd_input_v0_ni256["nvnmd"]["max_nnei"] = 256
+
 jdata_deepmd_input_v1 = {
     "model": {
         "descriptor": {
@@ -308,6 +349,7 @@
     },
     "nvnmd": {
         "version": 1,
+        "max_nnei": 128,  # 128 or 256
         "net_size": 128,
         "config_file": "none",
         "weight_file": "none",
@@ -347,6 +389,10 @@
     },
 }
 
+jdata_deepmd_input_v1_ni128 = jdata_deepmd_input_v1.copy()
+jdata_deepmd_input_v1_ni256 = jdata_deepmd_input_v1.copy()
+jdata_deepmd_input_v1_ni256["nvnmd"]["max_nnei"] = 256
+
 NVNMD_WELCOME = (
     r" _   _  __     __  _   _   __  __   ____  ",
     r"| \ | | \ \   / / | \ | | |  \/  | |  _ \ ",
diff --git a/deepmd/nvnmd/descriptor/se_a.py b/deepmd/nvnmd/descriptor/se_a.py
index 67ea45924b..816f17cfa3 100644
--- a/deepmd/nvnmd/descriptor/se_a.py
+++ b/deepmd/nvnmd/descriptor/se_a.py
@@ -50,12 +50,17 @@ def check_switch_range(davg, dstd):
         else:
             min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
     else:
-        min_dist = rmin
+        min_dist = None
+
+    # fix the bug: if model initial mode is 'init_from_model',
+    # we need dmin to calculate smin and smax in mapt.py
+    if min_dist is not None:
+        nvnmd_cfg.dscp["dmin"] = min_dist
+        nvnmd_cfg.save()
 
     # if davg and dstd is None, the model initial mode is in
     #  'init_from_model', 'restart', 'init_from_frz_model', 'finetune'
     if (davg is not None) and (dstd is not None):
-        nvnmd_cfg.dscp["dmin"] = min_dist
         nvnmd_cfg.get_s_range(davg, dstd)
 
 
diff --git a/deepmd/nvnmd/descriptor/se_atten.py b/deepmd/nvnmd/descriptor/se_atten.py
index 727a93ca45..cfffb8a90b 100644
--- a/deepmd/nvnmd/descriptor/se_atten.py
+++ b/deepmd/nvnmd/descriptor/se_atten.py
@@ -49,7 +49,13 @@ def check_switch_range(davg, dstd):
         else:
             min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
     else:
-        min_dist = rmin
+        min_dist = None
+
+    # fix the bug: if model initial mode is 'init_from_model',
+    # we need dmin to calculate smin and smax in mapt.py
+    if min_dist is not None:
+        nvnmd_cfg.dscp["dmin"] = min_dist
+        nvnmd_cfg.save()
 
     # if davg and dstd is None, the model initial mode is in
     #  'init_from_model', 'restart', 'init_from_frz_model', 'finetune'
@@ -58,7 +64,6 @@ def check_switch_range(davg, dstd):
             davg = np.zeros([ntype, ndescrpt])
         if dstd is None:
             dstd = np.ones([ntype, ndescrpt])
-        nvnmd_cfg.dscp["dmin"] = min_dist
         nvnmd_cfg.get_s_range(davg, dstd)
 
 
diff --git a/deepmd/nvnmd/entrypoints/freeze.py b/deepmd/nvnmd/entrypoints/freeze.py
index 6c356c6118..e56a0c2130 100644
--- a/deepmd/nvnmd/entrypoints/freeze.py
+++ b/deepmd/nvnmd/entrypoints/freeze.py
@@ -52,6 +52,7 @@ def filter_tensorVariableList(tensorVariableList) -> dict:
         p1 = p1 or name.startswith("filter_type_")
         p1 = p1 or name.startswith("layer_")
         p1 = p1 or name.startswith("final_layer")
+        p1 = p1 or name.endswith("t_bias_atom_e")
         p2 = "Adam" not in name
         p3 = "XXX" not in name
         if p1 and p2 and p3:
@@ -75,4 +76,5 @@ def save_weight(sess, file_name: str = "nvnmd/weight.npy"):
     else:
         min_dist = 0.0
     dic_key_value["train_attr.min_nbor_dist"] = min_dist
+    dic_key_value["t_bias_atom_e"] = dic_key_value["fitting_attr.t_bias_atom_e"]
     FioDic().save(file_name, dic_key_value)
diff --git a/deepmd/nvnmd/entrypoints/mapt.py b/deepmd/nvnmd/entrypoints/mapt.py
index eb77913983..1299d7a74e 100644
--- a/deepmd/nvnmd/entrypoints/mapt.py
+++ b/deepmd/nvnmd/entrypoints/mapt.py
@@ -87,9 +87,22 @@ def __init__(self, config_file: str, weight_file: str, map_file: str):
         jdata["weight_file"] = weight_file
         jdata["enable"] = True
 
+        # 0 : xyz_scatter = xyz_scatter * two_embd + xyz_scatter;
+        # Gs + 1, Gt + 0
+        # 1 : xyz_scatter = xyz_scatter * two_embd + two_embd   ;
+        # Gs + 0, Gt + 1
+        self.Gs_Gt_mode = 1
+
         nvnmd_cfg.init_from_jdata(jdata)
 
     def build_map(self):
+        if self.Gs_Gt_mode == 0:
+            self.shift_Gs = 1
+            self.shift_Gt = 0
+        if self.Gs_Gt_mode == 1:
+            self.shift_Gs = 0
+            self.shift_Gt = 1
+        #
         M = nvnmd_cfg.dscp["M1"]
         if nvnmd_cfg.version == 0:
             ndim = nvnmd_cfg.dscp["ntype"]
@@ -482,7 +495,7 @@ def build_s2g_grad(self):
             shift = 0
         if nvnmd_cfg.version == 1:
             ndim = 1
-            shift = 1
+            shift = self.shift_Gs
         #
         dic_ph = {}
         dic_ph["s"] = tf.placeholder(tf.float64, [None, 1], "t_s")
@@ -496,6 +509,13 @@ def run_s2g(self):
         r"""Build s-> graph and run it to get value of mapping table."""
         smin = nvnmd_cfg.dscp["smin"]
         smax = nvnmd_cfg.dscp["smax"]
+        # fix the bug: if model initial mode is 'init_from_model',
+        # we need dmin to calculate smin and smax in mapt.py
+        if smin == -2:
+            davg, dstd = get_normalize(nvnmd_cfg.weight)
+            nvnmd_cfg.get_s_range(davg, dstd)
+            smin = nvnmd_cfg.dscp["smin"]
+            smax = nvnmd_cfg.dscp["smax"]
 
         tf.reset_default_graph()
         dic_ph = self.build_s2g_grad()
@@ -567,9 +587,11 @@ def build_t2g(self):
             two_side_type_embedding,
             [-1, two_side_type_embedding.shape[-1]],
         )
-
+        # see se_atten.py in dp
         wbs = [get_filter_type_weight(nvnmd_cfg.weight, ll) for ll in range(1, 5)]
-        dic_ph["gt"] = self.build_embedding_net(two_side_type_embedding, wbs)
+        dic_ph["gt"] = (
+            self.build_embedding_net(two_side_type_embedding, wbs) + self.shift_Gt
+        )
         return dic_ph
 
     def run_t2g(self):
diff --git a/deepmd/nvnmd/entrypoints/train.py b/deepmd/nvnmd/entrypoints/train.py
index cb3dad0792..6e14b6f865 100644
--- a/deepmd/nvnmd/entrypoints/train.py
+++ b/deepmd/nvnmd/entrypoints/train.py
@@ -100,6 +100,7 @@ def normalized_input_qnn(jdata, PATH_QNN, CONFIG_CNN, WEIGHT_CNN, MAP_CNN):
     jdata_nvnmd = jdata_deepmd_input_v0["nvnmd"]
     jdata_nvnmd["enable"] = True
     jdata_nvnmd["version"] = nvnmd_cfg.version
+    jdata_nvnmd["max_nnei"] = nvnmd_cfg.max_nnei
     jdata_nvnmd["config_file"] = CONFIG_CNN
     jdata_nvnmd["weight_file"] = WEIGHT_CNN
     jdata_nvnmd["map_file"] = MAP_CNN
@@ -117,6 +118,7 @@ def normalized_input_qnn(jdata, PATH_QNN, CONFIG_CNN, WEIGHT_CNN, MAP_CNN):
 def train_nvnmd(
     *,
     INPUT: str,
+    init_model: Optional[str],
     restart: Optional[str],
     step: str,
     skip_neighbor_stat: bool = False,
@@ -142,6 +144,7 @@ def train_nvnmd(
         jdata = jdata_cmd_train.copy()
         jdata["INPUT"] = INPUT_CNN
         jdata["log_path"] = LOG_CNN
+        jdata["init_model"] = init_model
         jdata["restart"] = restart
         jdata["skip_neighbor_stat"] = skip_neighbor_stat
         train(**jdata)
diff --git a/deepmd/nvnmd/entrypoints/wrap.py b/deepmd/nvnmd/entrypoints/wrap.py
index 455dd999df..1ba2ed7384 100644
--- a/deepmd/nvnmd/entrypoints/wrap.py
+++ b/deepmd/nvnmd/entrypoints/wrap.py
@@ -156,33 +156,75 @@ def wrap_head(self, nhs, nws):
         r"""Wrap the head information.
 
         version
+        nhead
         nheight
-        nweight
-        rcut
+        nwidth
+        rcut       cut-off radius
+        ntype      number of atomic species
+        nnei       number of neighbors
+        atom_ener  atom bias energy
         """
         nbit = nvnmd_cfg.nbit
         ctrl = nvnmd_cfg.ctrl
+        dscp = nvnmd_cfg.dscp
+        fitn = nvnmd_cfg.fitn
+        weight = nvnmd_cfg.weight
         VERSION = ctrl["VERSION"]
+        SUB_VERSION = ctrl["SUB_VERSION"]
+        MAX_NNEI = ctrl["MAX_NNEI"]
+        nhead = 128
         NBIT_MODEL_HEAD = nbit["NBIT_MODEL_HEAD"]
         NBIT_FIXD_FL = nbit["NBIT_FIXD_FL"]
-        rcut = nvnmd_cfg.dscp["rcut"]
+        rcut = dscp["rcut"]
+        ntype = dscp["ntype"]
+        SEL = dscp["SEL"]
 
         bs = ""
         e = Encode()
         # version
-        bs = e.dec2bin(VERSION, NBIT_MODEL_HEAD)[0] + bs
+        vv = VERSION + 256 * SUB_VERSION + 256 * 256 * MAX_NNEI
+        bs = e.dec2bin(vv, NBIT_MODEL_HEAD)[0] + bs
+        # nhead
+        bs = e.dec2bin(nhead, NBIT_MODEL_HEAD)[0] + bs
         # height
         for n in nhs:
             bs = e.dec2bin(n, NBIT_MODEL_HEAD)[0] + bs
-        # weight
+        # width
         for n in nws:
             bs = e.dec2bin(n, NBIT_MODEL_HEAD)[0] + bs
-        # dscp
+        # rcut
         RCUT = e.qr(rcut, NBIT_FIXD_FL)
         bs = e.dec2bin(RCUT, NBIT_MODEL_HEAD)[0] + bs
+        # ntype
+        bs = e.dec2bin(ntype, NBIT_MODEL_HEAD)[0] + bs
+        # nnei
+        if VERSION == 0:
+            for tt in range(ntype):
+                bs = e.dec2bin(SEL[tt], NBIT_MODEL_HEAD)[0] + bs
+        if VERSION == 1:
+            bs = e.dec2bin(SEL, NBIT_MODEL_HEAD)[0] + bs
+        # atom_ener
+        # fix the bug: the different energy between qnn and lammps
+        if "t_bias_atom_e" in weight.keys():
+            atom_ener = weight["t_bias_atom_e"]
+        else:
+            atom_ener = [0] * 32
+        nlayer_fit = fitn["nlayer_fit"]
+        if VERSION == 0:
+            for tt in range(ntype):
+                w, b, _idt = get_fitnet_weight(weight, tt, nlayer_fit - 1, nlayer_fit)
+                shift = atom_ener[tt] + b[0]
+                SHIFT = e.qr(shift, NBIT_FIXD_FL)
+                bs = e.dec2bin(SHIFT, NBIT_MODEL_HEAD, signed=True)[0] + bs
+        if VERSION == 1:
+            for tt in range(ntype):
+                w, b, _idt = get_fitnet_weight(weight, 0, nlayer_fit - 1, nlayer_fit)
+                shift = atom_ener[tt] + b[0]
+                SHIFT = e.qr(shift, NBIT_FIXD_FL)
+                bs = e.dec2bin(SHIFT, NBIT_MODEL_HEAD, signed=True)[0] + bs
         # extend
         hs = e.bin2hex(bs)
-        hs = e.extend_hex(hs, NBIT_MODEL_HEAD * 32)
+        hs = e.extend_hex(hs, NBIT_MODEL_HEAD * nhead)
         return hs
 
     def wrap_dscp(self):
diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/nvnmd/utils/argcheck.py
index 2cbff3cbdc..2dc17ebc27 100644
--- a/deepmd/nvnmd/utils/argcheck.py
+++ b/deepmd/nvnmd/utils/argcheck.py
@@ -8,6 +8,7 @@ def nvnmd_args():
     doc_version = (
         "configuration the nvnmd version (0 | 1), 0 for 4 types, 1 for 32 types"
     )
+    doc_max_nnei = "configuration the max number of neighbors, 128|256 for version 0, 128 for version 1"
     doc_net_size_file = (
         "configuration the number of nodes of fitting_net, just can be set as 128"
     )
@@ -25,6 +26,7 @@ def nvnmd_args():
     doc_quantize_fitting_net = "enable the quantizatioin of fitting_net"
     args = [
         Argument("version", int, optional=False, default=0, doc=doc_version),
+        Argument("max_nnei", int, optional=False, default=128, doc=doc_max_nnei),
         Argument("net_size", int, optional=False, default=128, doc=doc_net_size_file),
         Argument("map_file", str, optional=False, default="none", doc=doc_map_file),
         Argument(
diff --git a/deepmd/nvnmd/utils/config.py b/deepmd/nvnmd/utils/config.py
index 96ca74c4c9..5bfd9ea54f 100644
--- a/deepmd/nvnmd/utils/config.py
+++ b/deepmd/nvnmd/utils/config.py
@@ -7,9 +7,15 @@
     NVNMD_CITATION,
     NVNMD_WELCOME,
     jdata_config_v0,
-    jdata_config_v1,
+    jdata_config_v0_ni128,
+    jdata_config_v0_ni256,
+    jdata_config_v1_ni128,
+    jdata_config_v1_ni256,
     jdata_deepmd_input_v0,
-    jdata_deepmd_input_v1,
+    jdata_deepmd_input_v0_ni128,
+    jdata_deepmd_input_v0_ni256,
+    jdata_deepmd_input_v1_ni128,
+    jdata_deepmd_input_v1_ni256,
 )
 from deepmd.nvnmd.utils.fio import (
     FioDic,
@@ -50,6 +56,7 @@ def init_from_jdata(self, jdata: dict = {}):
             return None
 
         self.version = jdata["version"]
+        self.max_nnei = jdata["max_nnei"]
         self.net_size = jdata["net_size"]
         self.map_file = jdata["map_file"]
         self.config_file = jdata["config_file"]
@@ -65,7 +72,7 @@ def init_from_jdata(self, jdata: dict = {}):
             self.map = FioDic().load(self.map_file, {})
             self.weight = FioDic().load(self.weight_file, {})
 
-            self.init_config_by_version(self.version)
+            self.init_config_by_version(self.version, self.max_nnei)
             load_config = FioDic().load(self.config_file, self.config)
             self.init_from_config(load_config)
             # if load the file, set net_size
@@ -106,7 +113,11 @@ def init_from_config(self, jdata):
         r"""Initialize member element one by one."""
         if "ctrl" in jdata.keys():
             if "VERSION" in jdata["ctrl"].keys():
-                self.init_config_by_version(jdata["ctrl"]["VERSION"])
+                if "MAX_NNEI" not in jdata["ctrl"].keys():
+                    jdata["ctrl"]["MAX_NNEI"] = 128
+                self.init_config_by_version(
+                    jdata["ctrl"]["VERSION"], jdata["ctrl"]["MAX_NNEI"]
+                )
         #
         self.config = FioDic().update(jdata, self.config)
         self.config["dscp"] = self.init_dscp(self.config["dscp"], self.config)
@@ -117,16 +128,29 @@ def init_from_config(self, jdata):
         self.config["nbit"] = self.init_nbit(self.config["nbit"], self.config)
         self.init_value()
 
-    def init_config_by_version(self, version):
+    def init_config_by_version(self, version, max_nnei):
         r"""Initialize version-dependent parameters."""
         self.version = version
+        self.max_nnei = max_nnei
         log.debug("#Set nvnmd version as %d " % self.version)
         if self.version == 0:
-            self.jdata_deepmd_input = jdata_deepmd_input_v0.copy()
-            self.config = jdata_config_v0.copy()
+            if self.max_nnei == 128:
+                self.jdata_deepmd_input = jdata_deepmd_input_v0_ni128.copy()
+                self.config = jdata_config_v0_ni128.copy()
+            elif self.max_nnei == 256:
+                self.jdata_deepmd_input = jdata_deepmd_input_v0_ni256.copy()
+                self.config = jdata_config_v0_ni256.copy()
+            else:
+                log.error("The max_nnei only can be set as 128|256 for version 0")
         if self.version == 1:
-            self.jdata_deepmd_input = jdata_deepmd_input_v1.copy()
-            self.config = jdata_config_v1.copy()
+            if self.max_nnei == 128:
+                self.jdata_deepmd_input = jdata_deepmd_input_v1_ni128.copy()
+                self.config = jdata_config_v1_ni128.copy()
+            elif self.max_nnei == 256:
+                self.jdata_deepmd_input = jdata_deepmd_input_v1_ni256.copy()
+                self.config = jdata_config_v1_ni256.copy()
+            else:
+                log.error("The max_nnei only can be set as 128|256 for version 1")
 
     def init_net_size(self):
         r"""Initialize net_size."""
@@ -154,10 +178,15 @@ def init_dscp(self, jdata: dict, jdata_parent: dict = {}) -> dict:
             jdata["M1"] = jdata["neuron"][-1]
             jdata["M2"] = jdata["axis_neuron"]
             jdata["SEL"] = (jdata["sel"] + [0, 0, 0, 0])[0:4]
+            for s in jdata["sel"]:
+                if s > self.max_nnei:
+                    log.error("The sel cannot be greater than the max_nnei")
+                    exit(1)
             jdata["NNODE_FEAS"] = [1] + jdata["neuron"]
             jdata["nlayer_fea"] = len(jdata["neuron"])
             jdata["same_net"] = 1 if jdata["type_one_side"] else 0
             # neighbor
+            jdata["NI"] = self.max_nnei
             jdata["NIDP"] = int(np.sum(jdata["sel"]))
             jdata["NIX"] = 2 ** int(np.ceil(np.log2(jdata["NIDP"] / 1.5)))
             # type
@@ -168,10 +197,14 @@ def init_dscp(self, jdata: dict, jdata_parent: dict = {}) -> dict:
             jdata["M1"] = jdata["neuron"][-1]
             jdata["M2"] = jdata["axis_neuron"]
             jdata["SEL"] = jdata["sel"]
+            if jdata["sel"] > self.max_nnei:
+                log.error("The sel cannot be greater than the max_nnei")
+                exit(1)
             jdata["NNODE_FEAS"] = [1] + jdata["neuron"]
             jdata["nlayer_fea"] = len(jdata["neuron"])
             jdata["same_net"] = 1 if jdata["type_one_side"] else 0
             # neighbor
+            jdata["NI"] = self.max_nnei
             jdata["NIDP"] = int(jdata["sel"])
             jdata["NIX"] = 2 ** int(np.ceil(np.log2(jdata["NIDP"] / 1.5)))
             # type
@@ -306,6 +339,7 @@ def get_nvnmd_jdata(self):
         r"""Generate `nvnmd` in input script."""
         jdata = self.jdata_deepmd_input["nvnmd"]
         jdata["net_size"] = self.net_size
+        jdata["max_nnei"] = self.max_nnei
         jdata["config_file"] = self.config_file
         jdata["weight_file"] = self.weight_file
         jdata["map_file"] = self.map_file
diff --git a/deepmd_utils/main.py b/deepmd_utils/main.py
index 3dc54db052..19afaeee1f 100644
--- a/deepmd_utils/main.py
+++ b/deepmd_utils/main.py
@@ -552,10 +552,26 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log],
         help="train nvnmd model",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        epilog=textwrap.dedent(
+            """\
+        examples:
+            dp train-nvnmd input_cnn.json -s s1
+            dp train-nvnmd input_qnn.json -s s2
+            dp train-nvnmd input_cnn.json -s s1 --restart model.ckpt
+            dp train-nvnmd input_cnn.json -s s2 --init-model model.ckpt
+        """
+        ),
     )
     parser_train_nvnmd.add_argument(
         "INPUT", help="the input parameter file in json format"
     )
+    parser_train_nvnmd.add_argument(
+        "-i",
+        "--init-model",
+        type=str,
+        default=None,
+        help="Initialize the model by the provided path prefix of checkpoint files.",
+    )
     parser_train_nvnmd.add_argument(
         "-r",
         "--restart",
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index 7a11e3170e..c11fee0bc9 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -60,6 +60,7 @@ The "nvnmd" section is defined as
 ```json
 {
     "version": 0,
+    "max_nnei":128,
     "net_size":128,
     "sel":[60, 60],
     "rcut":6.0,
@@ -73,6 +74,7 @@ where items are defined as:
 | Item      | Mean                        | Optional Value                                |
 | --------- | --------------------------- | --------------------------------------------- |
 | version | the version of network structure | 0 or 1 |
+| max_nnei  | the maximum number of neighbors that do not distinguish element types | 128  or 256 |
 | net_size  | the size of nueral network  | 128                                     |
 | sel       | the number of neighbors     | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer |
 | rcut      | the cutoff radial           | (0, 8.0]                                      |
@@ -187,6 +189,15 @@ You can also restart the CNN training from the path prefix of checkpoint files (
 dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1
 ```
 
+You can also initialize the CNN model and train it by
+
+``` bash
+mv nvnmd_cnn nvnmd_cnn_bck
+cp train_cnn.json train_cnn2.json
+# please edit train_cnn2.json
+dp train-nvnmd train_cnn2.json -s s1 -i nvnmd_cnn_bck/model.ckpt
+```
+
 
 # Testing
 
diff --git a/examples/nvnmd/train/train_cnn.json b/examples/nvnmd/train/train_cnn.json
index c89c8b13d6..1865106909 100644
--- a/examples/nvnmd/train/train_cnn.json
+++ b/examples/nvnmd/train/train_cnn.json
@@ -1,6 +1,7 @@
 {
   "nvnmd": {
     "version": 0,
+    "max_nnei": 128,
     "net_size": 128,
     "sel": [
       60,
diff --git a/examples/nvnmd/train/train_qnn.json b/examples/nvnmd/train/train_qnn.json
index 0235575f52..72b299f70d 100644
--- a/examples/nvnmd/train/train_qnn.json
+++ b/examples/nvnmd/train/train_qnn.json
@@ -1,6 +1,7 @@
 {
   "nvnmd": {
     "version": 0,
+    "max_nnei": 128,
     "net_size": 128,
     "sel": [
       60,
diff --git a/source/op/prod_env_mat_multi_device_nvnmd.cc b/source/op/prod_env_mat_multi_device_nvnmd.cc
index abca947f0a..1cbfb968f1 100644
--- a/source/op/prod_env_mat_multi_device_nvnmd.cc
+++ b/source/op/prod_env_mat_multi_device_nvnmd.cc
@@ -411,6 +411,9 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
       // no pbc
       assert(nloc == nall);
       nei_mode = -1;
+    } else if (mesh_tensor.shape().dim_size(0) > 16) {
+      // pass neighbor list inside the tensor
+      nei_mode = 4;
     } else if (mesh_tensor.shape().dim_size(0) == 7 ||
                mesh_tensor.shape().dim_size(0) == 1) {
       throw deepmd::deepmd_exception(
@@ -422,16 +425,16 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     // Create output tensors
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(int_64(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    descrpt_deriv_shape.AddDim(int_64(nloc) * ndescrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(int_64(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(int_64(nloc) * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
@@ -460,8 +463,16 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
 
+    // must declar out of if, otherwise the memory will be destroyed!
+    Tensor int_temp;
+    Tensor uint64_temp;
+    std::vector<Tensor> tensor_list(7);
+    if (device == "GPU") {
+      // UNDEFINE
+    }
+
     // loop over samples
-    for (int ff = 0; ff < nsamples; ++ff) {
+    for (int_64 ff = 0; ff < nsamples; ++ff) {
       FPTYPE* em = p_em + ff * nloc * ndescrpt;
       FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
       FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
@@ -633,15 +644,18 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    } else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6 ||
+               mesh_tensor.shape().dim_size(0) == 7) {
       // manual copied pbc
-      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    } else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0 ||
+               mesh_tensor.shape().dim_size(0) == 1) {
       // no pbc
-      assert(nloc == nall);
       nei_mode = -1;
+    } else if (mesh_tensor.shape().dim_size(0) > 16) {
+      // pass neighbor list inside the tensor
+      nei_mode = 4;
     } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
@@ -691,6 +705,12 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
                    context->allocate_output(context_output_index++, nmask_shape,
                                             &nmask_tensor));
 
+    Tensor fake_type_tensor;  // all zeros
+    TensorShape fake_type_shape;
+    fake_type_shape.AddDim(nsamples * nall);
+    OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape,
+                                                   &fake_type_tensor));
+
     FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
     FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
     FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
@@ -702,7 +722,25 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
     const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
     const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
     const int* p_type = type_tensor.flat<int>().data();
+    int* p_f_type = fake_type_tensor.flat<int>().data();
+
+    if (device == "GPU") {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+// UNDEFINE
+#endif
+    } else if (device == "CPU") {
+      for (int ii = 0; ii < nsamples * nall; ii++) {
+        p_f_type[ii] = (p_type[ii] < 0) ? -1 : 0;
+      }
+    }
 
+    // must declar out of if, otherwise the memory will be destroyed!
+    Tensor int_temp;
+    Tensor uint64_temp;
+    std::vector<Tensor> tensor_list(7);
+    if (device == "GPU") {
+      // UNDEFINE
+    }
     // loop over samples
     for (int_64 ff = 0; ff < nsamples; ++ff) {
       FPTYPE* em = p_em + ff * nloc * ndescrpt;
@@ -714,6 +752,7 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
       const FPTYPE* coord = p_coord + ff * nall * 3;
       const FPTYPE* box = p_box + ff * 9;
       const int* type = p_type + ff * nall;
+      const int* f_type = p_f_type + ff * nall;
 
       if (device == "GPU") {
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@@ -729,13 +768,6 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
         std::vector<FPTYPE> coord_cpy;
         std::vector<int> type_cpy;
         int frame_nall = nall;
-        std::vector<int> fake_type(nall, 0);
-        for (int ii = 0; ii < nall; ii++) {
-          if (type[ii] < 0) {
-            fake_type[ii] = -1;
-          }
-        }
-        const int* f_type = &fake_type[0];
         // prepare coord and nlist
         _prepare_coord_nlist_cpu<FPTYPE>(
             context, &coord, coord_cpy, &f_type, type_cpy, idx_mapping, inlist,
diff --git a/source/tests/test_nvnmd_entrypoints.py b/source/tests/test_nvnmd_entrypoints.py
index 3e721516f1..d82c905024 100644
--- a/source/tests/test_nvnmd_entrypoints.py
+++ b/source/tests/test_nvnmd_entrypoints.py
@@ -521,6 +521,7 @@ def test_mapt_cnn_v1(self):
         map_file = str(tests_path / "nvnmd" / "out" / "map_v1_cnn.npy")
         # mapt
         mapObj = MapTable(config_file, weight_file, map_file)
+        mapObj.Gs_Gt_mode = 0
         mapt = mapObj.build_map()
         #
         N = 32
@@ -859,8 +860,9 @@ def test_wrap_qnn_v1(self):
         # test
         data = FioBin().load(jdata["nvnmd_model"])
         idx = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]
+        idx = [i + 128 * 4 for i in idx]
         pred = [data[i] for i in idx]
-        red_dout = [1, 0, 0, 128, 0, 0, 0, 8, 249, 0, 0, 0, 91, 252, 183, 254]
+        red_dout = [249, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 95, 24, 176]
         np.testing.assert_equal(pred, red_dout)
         # close
         nvnmd_cfg.enable = False

From e048389846ecf49f764ba3717bdef27a9723ddff Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 14 Dec 2023 19:55:51 -0500
Subject: [PATCH 45/97] Fix possible memory leak in constructors (#3062)

When a constructor throws an exception, it will not call the destructor
as the constructor is not finished. The memory leak happens here and is
detected by LeakSanitizer (thanks, @Cloudac7, for reminding me to use
it). We must catch the exception, delete the memory, and re-throw the
exception.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 source/api_cc/src/DataModifier.cc            |  8 +++++++-
 source/api_cc/src/DeepPot.cc                 | 18 ++++++++++++++++--
 source/api_cc/src/DeepTensor.cc              |  8 +++++++-
 source/api_cc/tests/test_deepmd_exception.cc | 20 +++++++++++++++++++-
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index 658ec68442..d687c02e75 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -11,7 +11,13 @@ DipoleChargeModifier::DipoleChargeModifier(const std::string& model,
                                            const int& gpu_rank,
                                            const std::string& name_scope_)
     : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  init(model, gpu_rank, name_scope_);
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DipoleChargeModifier::~DipoleChargeModifier() { delete graph_def; };
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 23a0a7e663..018c3aca09 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -404,7 +404,13 @@ DeepPot::DeepPot(const std::string& model,
                  const int& gpu_rank,
                  const std::string& file_content)
     : inited(false), init_nbor(false), graph_def(new GraphDef()) {
-  init(model, gpu_rank, file_content);
+  try {
+    init(model, gpu_rank, file_content);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DeepPot::~DeepPot() { delete graph_def; }
@@ -1236,7 +1242,15 @@ DeepPotModelDevi::DeepPotModelDevi(
     const int& gpu_rank,
     const std::vector<std::string>& file_contents)
     : inited(false), init_nbor(false), numb_models(0) {
-  init(models, gpu_rank, file_contents);
+  try {
+    init(models, gpu_rank, file_contents);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    for (unsigned ii = 0; ii < numb_models; ++ii) {
+      delete graph_defs[ii];
+    }
+    throw;
+  }
 }
 
 DeepPotModelDevi::~DeepPotModelDevi() {
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 30ff99497c..655819e086 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -10,7 +10,13 @@ DeepTensor::DeepTensor(const std::string &model,
                        const int &gpu_rank,
                        const std::string &name_scope_)
     : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  init(model, gpu_rank, name_scope_);
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
 }
 
 DeepTensor::~DeepTensor() { delete graph_def; }
diff --git a/source/api_cc/tests/test_deepmd_exception.cc b/source/api_cc/tests/test_deepmd_exception.cc
index 1cbec270b5..dd97f2786f 100644
--- a/source/api_cc/tests/test_deepmd_exception.cc
+++ b/source/api_cc/tests/test_deepmd_exception.cc
@@ -10,7 +10,9 @@
 #include <string>
 #include <vector>
 
+#include "DataModifier.h"
 #include "DeepPot.h"
+#include "DeepTensor.h"
 #include "errors.h"
 TEST(TestDeepmdException, deepmdexception) {
   std::string expected_error_message = "DeePMD-kit Error: unittest";
@@ -21,6 +23,22 @@ TEST(TestDeepmdException, deepmdexception) {
   }
 }
 
-TEST(TestDeepmdException, deepmdexception_nofile) {
+TEST(TestDeepmdException, deepmdexception_nofile_deeppot) {
   ASSERT_THROW(deepmd::DeepPot("_no_such_file.pb"), deepmd::deepmd_exception);
 }
+
+TEST(TestDeepmdException, deepmdexception_nofile_deeppotmodeldevi) {
+  ASSERT_THROW(
+      deepmd::DeepPotModelDevi({"_no_such_file.pb", "_no_such_file.pb"}),
+      deepmd::deepmd_exception);
+}
+
+TEST(TestDeepmdException, deepmdexception_nofile_deeptensor) {
+  ASSERT_THROW(deepmd::DeepTensor("_no_such_file.pb"),
+               deepmd::deepmd_exception);
+}
+
+TEST(TestDeepmdException, deepmdexception_nofile_dipolechargemodifier) {
+  ASSERT_THROW(deepmd::DipoleChargeModifier("_no_such_file.pb"),
+               deepmd::deepmd_exception);
+}

From 18902be6358e54a65c166a601be20ab7cc1b84c9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 14 Dec 2023 19:58:35 -0500
Subject: [PATCH 46/97] fix memory leaks related to `char*` (#3063)

Make sure no leaks reported from `string_to_char` by LeakSanitizer.

Most memory leaks happen only once, but `DP_CHECK_OK` causes 1 byte
leaking every step, meaning 10 MB leaks for 10 million steps.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_c/include/c_api.h         |  7 +++++++
 source/api_c/include/deepmd.hpp      | 18 ++++++++++++------
 source/api_c/src/c_api.cc            |  2 ++
 source/api_c/tests/test_deeppot_a.cc |  1 +
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index b0c030962a..d05f790bf9 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -1271,6 +1271,13 @@ void DP_SelectMapInt(const int* in,
                      const int nall2,
                      int* out);
 
+/**
+ * @brief Destroy a char array.
+ *
+ * @param c_str The char array.
+ */
+void DP_DeleteChar(const char* c_str);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 90c1c1c918..4a376e0bec 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -35,10 +35,14 @@ struct deepmd_exception : public std::runtime_error {
 /**
  * @brief Check if any exceptions throw in the C++ API. Throw if possible.
  */
-#define DP_CHECK_OK(check_func, dp)     \
-  const char *err_msg = check_func(dp); \
-  if (std::strlen(err_msg))             \
-    throw deepmd::hpp::deepmd_exception(std::string(err_msg));
+#define DP_CHECK_OK(check_func, dp)                   \
+  const char *err_msg = check_func(dp);               \
+  if (std::strlen(err_msg)) {                         \
+    std::string err_msg_str = std::string(err_msg);   \
+    DP_DeleteChar(err_msg);                           \
+    throw deepmd::hpp::deepmd_exception(err_msg_str); \
+  }                                                   \
+  DP_DeleteChar(err_msg);
 
 template <typename FPTYPE>
 inline void _DP_DeepPotCompute(DP_DeepPot *dp,
@@ -1019,7 +1023,7 @@ class DeepPot {
   void get_type_map(std::string &type_map) {
     const char *type_map_c = DP_DeepPotGetTypeMap(dp);
     type_map.assign(type_map_c);
-    delete[] type_map_c;
+    DP_DeleteChar(type_map_c);
   };
   /**
    * @brief Print the summary of DeePMD-kit, including the version and the build
@@ -1864,7 +1868,7 @@ class DeepTensor {
   void get_type_map(std::string &type_map) {
     const char *type_map_c = DP_DeepTensorGetTypeMap(dt);
     type_map.assign(type_map_c);
-    delete[] type_map_c;
+    DP_DeleteChar(type_map_c);
   };
 
  private:
@@ -2009,9 +2013,11 @@ void inline read_file_to_string(std::string model, std::string &file_content) {
   if (size < 0) {
     // negtive size indicates error
     std::string error_message = std::string(c_file_content, -size);
+    DP_DeleteChar(c_file_content);
     throw deepmd::hpp::deepmd_exception(error_message);
   }
   file_content = std::string(c_file_content, size);
+  DP_DeleteChar(c_file_content);
 };
 
 /**
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 9d1ed7d323..935e812cf0 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -1421,4 +1421,6 @@ void DP_SelectMapInt(const int* in,
   }
 }
 
+void DP_DeleteChar(const char* c_str) { delete[] c_str; }
+
 }  // extern "C"
diff --git a/source/api_c/tests/test_deeppot_a.cc b/source/api_c/tests/test_deeppot_a.cc
index 50e8131cc0..63f53e16e9 100644
--- a/source/api_c/tests/test_deeppot_a.cc
+++ b/source/api_c/tests/test_deeppot_a.cc
@@ -172,6 +172,7 @@ TEST_F(TestInferDeepPotA, type_map) {
   const char* type_map = DP_DeepPotGetTypeMap(dp);
   char expected_type_map[] = "O H";
   EXPECT_EQ(strcmp(type_map, expected_type_map), 0);
+  DP_DeleteChar(type_map);
 }
 
 class TestInferDeepPotANoPBC : public ::testing::Test {

From a266b4888925d829f59f3aba63589c72cb05f83b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Dec 2023 03:22:59 -0500
Subject: [PATCH 47/97] add utils for DP native model format (#3064)

Split from #2987.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd_utils/model_format.py            | 240 ++++++++++++++++++++++++
 source/tests/test_model_format_utils.py |  73 +++++++
 2 files changed, 313 insertions(+)
 create mode 100644 deepmd_utils/model_format.py
 create mode 100644 source/tests/test_model_format_utils.py

diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format.py
new file mode 100644
index 0000000000..68a6d4045b
--- /dev/null
+++ b/deepmd_utils/model_format.py
@@ -0,0 +1,240 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Native DP model format for multiple backends.
+
+See issue #2982 for more information.
+"""
+import json
+from typing import (
+    List,
+    Optional,
+)
+
+import h5py
+import numpy as np
+
+try:
+    from deepmd_utils._version import version as __version__
+except ImportError:
+    __version__ = "unknown"
+
+
+def traverse_model_dict(model_obj, callback: callable, is_variable: bool = False):
+    """Traverse a model dict and call callback on each variable.
+
+    Parameters
+    ----------
+    model_obj : object
+        The model object to traverse.
+    callback : callable
+        The callback function to call on each variable.
+    is_variable : bool, optional
+        Whether the current node is a variable.
+
+    Returns
+    -------
+    object
+        The model object after traversing.
+    """
+    if isinstance(model_obj, dict):
+        for kk, vv in model_obj.items():
+            model_obj[kk] = traverse_model_dict(
+                vv, callback, is_variable=is_variable or kk == "@variables"
+            )
+    elif isinstance(model_obj, list):
+        for ii, vv in enumerate(model_obj):
+            model_obj[ii] = traverse_model_dict(vv, callback, is_variable=is_variable)
+    elif is_variable:
+        model_obj = callback(model_obj)
+    return model_obj
+
+
+class Counter:
+    """A callable counter.
+
+    Examples
+    --------
+    >>> counter = Counter()
+    >>> counter()
+    0
+    >>> counter()
+    1
+    """
+
+    def __init__(self):
+        self.count = -1
+
+    def __call__(self):
+        self.count += 1
+        return self.count
+
+
+def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] = None):
+    """Save a DP model to a file in the native format.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to save to.
+    model_dict : dict
+        The model dict to save.
+    extra_info : dict, optional
+        Extra meta information to save.
+    """
+    model_dict = model_dict.copy()
+    variable_counter = Counter()
+    if extra_info is not None:
+        extra_info = extra_info.copy()
+    else:
+        extra_info = {}
+    with h5py.File(filename, "w") as f:
+        model_dict = traverse_model_dict(
+            model_dict,
+            lambda x: f.create_dataset(
+                f"variable_{variable_counter():04d}", data=x
+            ).name,
+        )
+        save_dict = {
+            "model": model_dict,
+            "software": "deepmd-kit",
+            "version": __version__,
+            **extra_info,
+        }
+        f.attrs["json"] = json.dumps(save_dict, separators=(",", ":"))
+
+
+def load_dp_model(filename: str) -> dict:
+    """Load a DP model from a file in the native format.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to load from.
+
+    Returns
+    -------
+    dict
+        The loaded model dict, including meta information.
+    """
+    with h5py.File(filename, "r") as f:
+        model_dict = json.loads(f.attrs["json"])
+        model_dict = traverse_model_dict(model_dict, lambda x: f[x][()].copy())
+    return model_dict
+
+
+class NativeLayer:
+    """Native representation of a layer.
+
+    Parameters
+    ----------
+    w : np.ndarray, optional
+        The weights of the layer.
+    b : np.ndarray, optional
+        The biases of the layer.
+    idt : np.ndarray, optional
+        The identity matrix of the layer.
+    """
+
+    def __init__(
+        self,
+        w: Optional[np.ndarray] = None,
+        b: Optional[np.ndarray] = None,
+        idt: Optional[np.ndarray] = None,
+    ) -> None:
+        self.w = w
+        self.b = b
+        self.idt = idt
+
+    def serialize(self) -> dict:
+        """Serialize the layer to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized layer.
+        """
+        data = {
+            "w": self.w,
+            "b": self.b,
+        }
+        if self.idt is not None:
+            data["idt"] = self.idt
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "NativeLayer":
+        """Deserialize the layer from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        return cls(data["w"], data["b"], data.get("idt", None))
+
+    def __setitem__(self, key, value):
+        if key in ("w", "matrix"):
+            self.w = value
+        elif key in ("b", "bias"):
+            self.b = value
+        elif key == "idt":
+            self.idt = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("w", "matrix"):
+            return self.w
+        elif key in ("b", "bias"):
+            return self.b
+        elif key == "idt":
+            return self.idt
+        else:
+            raise KeyError(key)
+
+
+class NativeNet:
+    """Native representation of a neural network.
+
+    Parameters
+    ----------
+    layers : list[NativeLayer], optional
+        The layers of the network.
+    """
+
+    def __init__(self, layers: Optional[List[NativeLayer]] = None) -> None:
+        if layers is None:
+            layers = []
+        self.layers = layers
+
+    def serialize(self) -> dict:
+        """Serialize the network to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized network.
+        """
+        return {"layers": [layer.serialize() for layer in self.layers]}
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "NativeNet":
+        """Deserialize the network from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        return cls([NativeLayer.deserialize(layer) for layer in data["layers"]])
+
+    def __getitem__(self, key):
+        assert isinstance(key, int)
+        if len(self.layers) <= key:
+            self.layers.extend([NativeLayer()] * (key - len(self.layers) + 1))
+        return self.layers[key]
+
+    def __setitem__(self, key, value):
+        assert isinstance(key, int)
+        if len(self.layers) <= key:
+            self.layers.extend([NativeLayer()] * (key - len(self.layers) + 1))
+        self.layers[key] = value
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
new file mode 100644
index 0000000000..b959ace3f6
--- /dev/null
+++ b/source/tests/test_model_format_utils.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+from copy import (
+    deepcopy,
+)
+
+import numpy as np
+
+from deepmd_utils.model_format import (
+    NativeNet,
+    load_dp_model,
+    save_dp_model,
+)
+
+
+class TestNativeNet(unittest.TestCase):
+    def setUp(self) -> None:
+        self.w = np.full((3, 2), 3.0)
+        self.b = np.full((3,), 4.0)
+
+    def test_serialize(self):
+        network = NativeNet()
+        network[1]["w"] = self.w
+        network[1]["b"] = self.b
+        network[0]["w"] = self.w
+        network[0]["b"] = self.b
+        jdata = network.serialize()
+        np.testing.assert_array_equal(jdata["layers"][0]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][0]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][1]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][1]["b"], self.b)
+
+    def test_deserialize(self):
+        network = NativeNet.deserialize(
+            {
+                "layers": [
+                    {"w": self.w, "b": self.b},
+                    {"w": self.w, "b": self.b},
+                ]
+            }
+        )
+        np.testing.assert_array_equal(network[0]["w"], self.w)
+        np.testing.assert_array_equal(network[0]["b"], self.b)
+        np.testing.assert_array_equal(network[1]["w"], self.w)
+        np.testing.assert_array_equal(network[1]["b"], self.b)
+
+
+class TestDPModel(unittest.TestCase):
+    def setUp(self) -> None:
+        self.w = np.full((3, 2), 3.0)
+        self.b = np.full((3,), 4.0)
+        self.model_dict = {
+            "type": "some_type",
+            "@variables": {
+                "layers": [
+                    {"w": self.w, "b": self.b},
+                    {"w": self.w, "b": self.b},
+                ]
+            },
+        }
+        self.filename = "test_dp_model_format.dp"
+
+    def test_save_load_model(self):
+        save_dp_model(self.filename, deepcopy(self.model_dict))
+        model = load_dp_model(self.filename)
+        np.testing.assert_equal(model["model"], self.model_dict)
+        assert "software" in model
+        assert "version" in model
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.filename):
+            os.remove(self.filename)

From a0f30f4f1cda17565b740ffb5bfcf9dbb7a30fda Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 17 Dec 2023 22:56:00 -0500
Subject: [PATCH 48/97] bump LAMMPS to stable_2Aug2023_update2 (#3066)

---
 backend/dynamic_metadata.py     |  2 +-
 doc/install/install-lammps.md   | 22 +++++++++++-----------
 pyproject.toml                  |  4 ++--
 source/install/build_cc.sh      |  2 +-
 source/install/build_from_c.sh  |  2 +-
 source/install/build_lammps.sh  |  2 +-
 source/install/test_cc.sh       |  2 +-
 source/install/test_cc_local.sh |  2 +-
 8 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index dad9c5b597..ab955c3cf8 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -57,7 +57,7 @@ def dynamic_metadata(
                 "sphinxcontrib-bibtex",
             ],
             "lmp": [
-                "lammps~=2023.8.2.1.0",
+                "lammps~=2023.8.2.2.0",
                 *find_libpython_requires,
             ],
             "ipi": [
diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index e643660cd1..5dbf690c67 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -14,10 +14,10 @@ make lammps
 DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory, which supports either double or single float precision interface. Now download the LAMMPS code, and uncompress it.
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update1.tar.gz
-tar xf stable_2Aug2023_update1.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz
+tar xf stable_2Aug2023_update2.tar.gz
 ```
-The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update1`.
+The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`.
 
 Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake.
 
@@ -25,7 +25,7 @@ Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either mak
 
 Now go into the LAMMPS code and copy the DeePMD-kit module like this
 ```bash
-cd lammps-stable_2Aug2023_update1/src/
+cd lammps-stable_2Aug2023_update2/src/
 cp -r $deepmd_source_dir/source/build/USER-DEEPMD .
 make yes-kspace
 make yes-extra-fix
@@ -51,8 +51,8 @@ make no-user-deepmd
 Now go into the LAMMPS directory and create a directory called `build`:
 
 ```bash
-mkdir -p lammps-stable_2Aug2023_update1/build/
-cd lammps-stable_2Aug2023_update1/build/
+mkdir -p lammps-stable_2Aug2023_update2/build/
+cd lammps-stable_2Aug2023_update2/build/
 ```
 
 Patch the LAMMPS `CMakeLists.txt` file:
@@ -81,15 +81,15 @@ Starting from `8Apr2021`, LAMMPS also provides a plugin mode, allowing one to bu
 Now download the LAMMPS code (`8Apr2021` or later), and uncompress it:
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update1.tar.gz
-tar xf stable_2Aug2023_update1.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz
+tar xf stable_2Aug2023_update2.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update1`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
+The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
 
 ```bash
-mkdir -p lammps-stable_2Aug2023_update1/build/
-cd lammps-stable_2Aug2023_update1/build/
+mkdir -p lammps-stable_2Aug2023_update2/build/
+cd lammps-stable_2Aug2023_update2/build/
 ```
 Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want.
 ```bash
diff --git a/pyproject.toml b/pyproject.toml
index fdd4904eb5..7b733adc81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,7 +139,7 @@ manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
 manylinux-aarch64-image = "manylinux_2_28"
 
 [tool.cibuildwheel.macos]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1" }
+environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update2", DP_ENABLE_IPI="1" }
 before-all = [
     """if [[ "$CIBW_BUILD" != *macosx_arm64* ]]; then brew install mpich; fi""",
 ]
@@ -156,7 +156,7 @@ environment-pass = [
     "CUDA_VERSION",
     "DP_PKG_NAME",
 ]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
+environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update2", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
 before-all = [
     """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
     """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index 74e3835b74..fef9e82ebc 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -20,7 +20,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DUSE_TF_PYTHON_LIBS=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_2Aug2023_update1 ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DUSE_TF_PYTHON_LIBS=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_2Aug2023_update2 ..
 cmake --build . -j${NPROC}
 cmake --install .
 
diff --git a/source/install/build_from_c.sh b/source/install/build_from_c.sh
index cd0aeca089..c1188252ab 100755
--- a/source/install/build_from_c.sh
+++ b/source/install/build_from_c.sh
@@ -13,7 +13,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_2Aug2023_update1 ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_2Aug2023_update2 ..
 cmake --build . -j${NPROC}
 cmake --install .
 cmake --build . --target=lammps
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index c8cfa6ea54..2b5bf0a643 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -14,7 +14,7 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_lammps
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
-LAMMPS_VERSION=stable_2Aug2023_update1
+LAMMPS_VERSION=stable_2Aug2023_update2
 if [ ! -d "lammps-${LAMMPS_VERSION}" ]; then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz
 	tar vxzf lammps.tar.gz
diff --git a/source/install/test_cc.sh b/source/install/test_cc.sh
index 0a8700b275..0dd35f5615 100755
--- a/source/install/test_cc.sh
+++ b/source/install/test_cc.sh
@@ -17,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update1 ${CUDA_ARGS} ..
+cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update2 ${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure
diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh
index 74477a8c2a..22d22a27f6 100755
--- a/source/install/test_cc_local.sh
+++ b/source/install/test_cc_local.sh
@@ -18,7 +18,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update1 ${CUDA_ARGS} ..
+cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update2 ${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure

From a47cd0623f8e7293b7aa0fd1a4e1dfcde433746e Mon Sep 17 00:00:00 2001
From: Ye Ding <dingye@westlake.edu.cn>
Date: Thu, 21 Dec 2023 09:33:00 +0800
Subject: [PATCH 49/97] Update the path to training and validation data dir in
 zinc_se_a_mask.json (#3068)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/descriptor/se_a_mask.py            | 13 +++++++++++++
 examples/zinc_protein/zinc_se_a_mask.json |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/descriptor/se_a_mask.py
index 780b34d294..cc2e6b4fc8 100644
--- a/deepmd/descriptor/se_a_mask.py
+++ b/deepmd/descriptor/se_a_mask.py
@@ -417,3 +417,16 @@ def prod_force_virial(
         atom_virial = tf.zeros([1, natoms[1], 9], dtype=force.dtype)
 
         return force, virial, atom_virial
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        return local_jdata
diff --git a/examples/zinc_protein/zinc_se_a_mask.json b/examples/zinc_protein/zinc_se_a_mask.json
index b23987cf5d..04f63aa4ed 100644
--- a/examples/zinc_protein/zinc_se_a_mask.json
+++ b/examples/zinc_protein/zinc_se_a_mask.json
@@ -68,14 +68,14 @@
   "training": {
     "training_data": {
       "systems": [
-        "example/zinc_protein/train_data_dp_mask/"
+        "examples/zinc_protein/train_data_dp_mask/"
       ],
       "batch_size": 2,
       "_comment7": "that's all"
     },
     "validation_data": {
       "systems": [
-        "example/zinc_protein/val_data_dp_mask/"
+        "examples/zinc_protein/val_data_dp_mask/"
       ],
       "batch_size": 2,
       "_comment8": "that's all"

From 2b52187e32a302796720f159dfdb0d1ca36e7710 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 21 Dec 2023 09:38:33 +0800
Subject: [PATCH 50/97] [pre-commit.ci] pre-commit autoupdate (#3073)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/PyCQA/isort: 5.13.0 →
5.13.2](https://github.com/PyCQA/isort/compare/5.13.0...5.13.2)
- [github.com/astral-sh/ruff-pre-commit: v0.1.7 →
v0.1.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.7...v0.1.8)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ce83792f10..edca939faa 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,14 +23,14 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.13.0
+    rev: 5.13.2
     hooks:
     - id: isort
       files: \.py$
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.7
+    rev: v0.1.8
     hooks:
     - id: ruff
       args: ["--fix"]

From 0032f5d5ae19d4fa4cbdb2529beec663744307d4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 20 Dec 2023 20:48:10 -0500
Subject: [PATCH 51/97] add CodeQL checks (#3075)

It can scan the codes and report as follows:


![image](https://github.com/deepmodeling/deepmd-kit/assets/9496702/77b518b9-56f5-414a-b116-68da3aa9d67a)

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/codeql.yml | 58 ++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 .github/workflows/codeql.yml

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 0000000000..a9a162432c
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,58 @@
+name: "CodeQL"
+
+on:
+  push:
+  pull_request:
+  schedule:
+    - cron: '45 2 * * 2'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'c-cpp', 'javascript-typescript', 'python' ]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+        cache: 'pip'
+      if: matrix.language == 'c-cpp'
+    - name: "Setup dependencies"
+      if: matrix.language == 'c-cpp'
+      run: |
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
+         && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
+         && sudo apt-get update \
+         && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2
+        python -m pip install tensorflow
+      env:
+        DEBIAN_FRONTEND: noninteractive
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        queries: security-extended,security-and-quality
+    - name: "Run, Build Application using script"
+      run: source/install/build_cc.sh
+      env:
+        DP_VARIANT: cuda
+        DOWNLOAD_TENSORFLOW: "FALSE"
+      if: matrix.language == 'c-cpp'
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"

From f79eac1f9c5b1d661a5e2425afbcfcf63ebe2af1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 21 Dec 2023 09:49:38 +0800
Subject: [PATCH 52/97] Bump actions/upload-artifact from 3 to 4 (#3070)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[actions/upload-artifact](https://github.com/actions/upload-artifact)
from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/upload-artifact/releases">actions/upload-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<h2>What's Changed</h2>
<p>The release of upload-artifact@v4 and download-artifact@v4 are major
changes to the backend architecture of Artifacts. They have numerous
performance and behavioral improvements.</p>
<p>For more information, see the <a
href="https://github.com/actions/toolkit/tree/main/packages/artifact"><code>@​actions/artifact</code></a>
documentation.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/vmjoseph"><code>@​vmjoseph</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/464">actions/upload-artifact#464</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v3...v4.0.0">https://github.com/actions/upload-artifact/compare/v3...v4.0.0</a></p>
<h2>v3.1.3</h2>
<h2>What's Changed</h2>
<ul>
<li>chore(github): remove trailing whitespaces by <a
href="https://github.com/ljmf00"><code>@​ljmf00</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/313">actions/upload-artifact#313</a></li>
<li>Bump <code>@​actions/artifact</code> version to v1.1.2 by <a
href="https://github.com/bethanyj28"><code>@​bethanyj28</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/436">actions/upload-artifact#436</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v3...v3.1.3">https://github.com/actions/upload-artifact/compare/v3...v3.1.3</a></p>
<h2>v3.1.2</h2>
<ul>
<li>Update all <code>@actions/*</code> NPM packages to their latest
versions- <a
href="https://redirect.github.com/actions/upload-artifact/issues/374">#374</a></li>
<li>Update all dev dependencies to their most recent versions - <a
href="https://redirect.github.com/actions/upload-artifact/issues/375">#375</a></li>
</ul>
<h2>v3.1.1</h2>
<ul>
<li>Update actions/core package to latest version to remove
<code>set-output</code> deprecation warning <a
href="https://redirect.github.com/actions/upload-artifact/issues/351">#351</a></li>
</ul>
<h2>v3.1.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Bump <code>@​actions/artifact</code> to v1.1.0 (<a
href="https://redirect.github.com/actions/upload-artifact/pull/327">actions/upload-artifact#327</a>)
<ul>
<li>Adds checksum headers on artifact upload (<a
href="https://redirect.github.com/actions/toolkit/pull/1095">actions/toolkit#1095</a>)
(<a
href="https://redirect.github.com/actions/toolkit/pull/1063">actions/toolkit#1063</a>)</li>
</ul>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/upload-artifact/commit/c7d193f32edcb7bfad88892161225aeda64e9392"><code>c7d193f</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/466">#466</a>
from actions/v4-beta</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/13131bb095770b4070a7477c3cd2d96e1c16d9f4"><code>13131bb</code></a>
licensed cache</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/4a6c273b9834f66a1d05c170dc3f80f9cdb9def1"><code>4a6c273</code></a>
Merge branch 'main' into v4-beta</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/f391bb91a3d3118aeca171c365bb319ece276b37"><code>f391bb9</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/465">#465</a>
from actions/robherley/v4-documentation</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/9653d03c4b74c32144e02dae644fea70e079d4b3"><code>9653d03</code></a>
Apply suggestions from code review</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/875b63076402f25ef9d52c294c86ba4f97810575"><code>875b630</code></a>
add limitations section</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/ecb21463e93740a6be75c3116242169bfdbcb15a"><code>ecb2146</code></a>
add compression example</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/5e7604f84a055838f64ed68bb9904751523081ae"><code>5e7604f</code></a>
trim some repeated info</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/d6437d07581fe318a364512e6cf6b1dca6b4f92c"><code>d6437d0</code></a>
naming</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/1b561557037b4957d7d184e9aac02bec86c771eb"><code>1b56155</code></a>
s/v4-beta/v4/g</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/upload-artifact/compare/v3...v4">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/build_wheel.yml | 19 +++++++++++--------
 .github/workflows/package_c.yml   |  9 +++++----
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index e700109cce..40fc036419 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -77,8 +77,9 @@ jobs:
           DP_VARIANT: ${{ matrix.dp_variant }}
           CUDA_VERSION: ${{ matrix.cuda_version }}
           DP_PKG_NAME: ${{ matrix.dp_pkg_name }}
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
+          name: cibw-cp${{ matrix.python }}-${{ matrix.platform_id }}-cu${{ matrix.cuda_version }}-${{ strategy.job-index }}
           path: ./wheelhouse/*.whl
   build_sdist:
     name: Build source distribution
@@ -96,8 +97,9 @@ jobs:
       - name: Build sdist
         run: python -m build --sdist
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
+          name: cibw-sdist
           path: dist/*.tar.gz
 
   upload_pypi:
@@ -108,10 +110,11 @@ jobs:
       id-token: write
     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
-          name: artifact
+          pattern: cibw-*
           path: dist
+          merge-multiple: true
       - uses: pypa/gh-action-pypi-publish@release/v1
 
   build_docker:
@@ -128,10 +131,10 @@ jobs:
             cuda_version: "11"
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
-          name: artifact
           path: source/install/docker/dist
+          merge-multiple: true
       - name: Log in to the Container registry
         uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d
         with:
@@ -160,10 +163,10 @@ jobs:
     needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
-          name: artifact
           path: dist/packages
+          merge-multiple: true
       - uses: actions/setup-python@v5
         name: Install Python
         with:
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index 2b5f74b97d..5594c79181 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -28,9 +28,9 @@ jobs:
         if: matrix.filename != 'libdeepmd_c.tar.gz'
       # for download and debug
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
-          name: libdeepmd_c
+          name: libdeepmd_c-${{ strategy.job-index }}-${{ matrix.filename }}
           path: ${{ matrix.filename }}
       - name: Test C library
         run: ./source/install/docker_test_package_c.sh
@@ -46,9 +46,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Download artifact
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
-          name: libdeepmd_c
+          pattern: libdeepmd_c-*
+          merge-multiple: true
       - run: tar -vxzf ./libdeepmd_c.tar.gz
       - name: Test C library
         run: ./source/install/build_from_c.sh

From b4fe171a75d466bccc7e2f2b8052483696872c87 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 20 Dec 2023 20:55:51 -0500
Subject: [PATCH 53/97] Fix catching by value (#3077)

Fix:
- https://github.com/deepmodeling/deepmd-kit/security/code-scanning/1229
- https://github.com/deepmodeling/deepmd-kit/security/code-scanning/1230
- https://github.com/deepmodeling/deepmd-kit/security/code-scanning/1231
- https://github.com/deepmodeling/deepmd-kit/security/code-scanning/1232
---
 source/api_cc/src/DeepPot.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 018c3aca09..bb39a9fd60 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -474,7 +474,7 @@ void DeepPot::init(const std::string& model,
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   try {
     ntypes_spin = get_scalar<int>("spin_attr/ntypes_spin");
-  } catch (deepmd::deepmd_exception) {
+  } catch (const deepmd::deepmd_exception) {
     ntypes_spin = 0;
   }
   dfparam = get_scalar<int>("fitting_attr/dfparam");
@@ -488,7 +488,7 @@ void DeepPot::init(const std::string& model,
   if (daparam > 0) {
     try {
       aparam_nall = get_scalar<bool>("fitting_attr/aparam_nall");
-    } catch (deepmd::deepmd_exception) {
+    } catch (const deepmd::deepmd_exception) {
       aparam_nall = false;
     }
   } else {
@@ -1331,7 +1331,7 @@ void DeepPotModelDevi::init(const std::vector<std::string>& models,
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   try {
     ntypes_spin = get_scalar<int>("spin_attr/ntypes_spin");
-  } catch (deepmd::deepmd_exception) {
+  } catch (const deepmd::deepmd_exception) {
     ntypes_spin = 0;
   }
   dfparam = get_scalar<int>("fitting_attr/dfparam");
@@ -1345,7 +1345,7 @@ void DeepPotModelDevi::init(const std::vector<std::string>& models,
   if (daparam > 0) {
     try {
       aparam_nall = get_scalar<bool>("fitting_attr/aparam_nall");
-    } catch (deepmd::deepmd_exception) {
+    } catch (const deepmd::deepmd_exception) {
       aparam_nall = false;
     }
   } else {

From 39f13c00cedd0a1fcac089e5fd1e7007800afca6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 24 Dec 2023 20:42:06 -0500
Subject: [PATCH 54/97] docs: update documentation for pre-compiled C library
 (#3083)

The link was changed by #3070.
---
 doc/install/easy-install-dev.md       | 4 ++--
 doc/install/install-from-c-library.md | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index 6fd9171730..f3cf52c1f5 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -26,8 +26,8 @@ pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmode
 
 ## Download pre-compiled C Library
 
-The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c.zip), or via a shell command:
+The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip), or via a shell command:
 
 ```sh
-wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c.zip && unzip libdeepmd_c.zip
+wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip && unzip libdeepmd_c-0-libdeepmd_c.tar.gz.zip
 ```
diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md
index eb89538277..7613fdb772 100644
--- a/doc/install/install-from-c-library.md
+++ b/doc/install/install-from-c-library.md
@@ -1,6 +1,12 @@
 # Install from pre-compiled C library
 
 DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own.
+It can be downloaded via the shell command:
+
+```sh
+wget https://github.com/deepmodeling/deepmd-kit/releases/latest/download/libdeepmd_c.tar.gz
+tar xzf libdeepmd_c.tar.gz
+```
 
 The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
 

From 412bc3771851a2f04337557acf69976c6eb04510 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 07:43:49 +0800
Subject: [PATCH 55/97] Bump actions/deploy-pages from 3 to 4 (#3085)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/deploy-pages](https://github.com/actions/deploy-pages)
from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/deploy-pages/releases">actions/deploy-pages's
releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<h1>Changelog</h1>
<ul>
<li>Deploy pages using artifact IDs <a
href="https://github.com/konradpabjan"><code>@​konradpabjan</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/251">#251</a>)</li>
<li>This version requires the permission <code>actions: read</code> in
the workflows which use it.</li>
</ul>
<hr />
<p>ℹ️ This version of <code>actions/deploy-pages</code> is
<strong>ONLY</strong> compatible with artifacts uploaded by either:</p>
<ul>
<li><a
href="https://github.com/actions/upload-pages-artifact/releases/tag/v3.0.0"><code>actions/upload-pages-artifact@v3</code></a>
or newer</li>
<li><a
href="https://github.com/actions/upload-artifact/releases/tag/v4.0.0"><code>actions/upload-artifact@v4</code></a>
or newer.</li>
</ul>
<p>See details of <a
href="https://github.com/actions/deploy-pages/compare/v3.0.1...v4.0.0">all
code changes</a> since previous release.</p>
<p>:warning: For use with products other than GitHub.com, such as GitHub
Enterprise Server, please consult the <a
href="https://github.com/actions/deploy-pages/#compatibilty">compatibility
table</a>.</p>
<h2>v3.0.1</h2>
<h1>Changelog</h1>
<ul>
<li>Bump eslint from 8.54.0 to 8.55.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/266">#266</a>)</li>
<li>Bump nock from 13.3.8 to 13.4.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/267">#267</a>)</li>
<li>Bump eslint-config-prettier from 9.0.0 to 9.1.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/268">#268</a>)</li>
<li>Bump <code>@​actions/core</code> from 1.10.0 to 1.10.1 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/269">#269</a>)</li>
<li>Bump <code>@​actions/github</code> from 5.1.1 to 6.0.0 <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/261">#261</a>)</li>
<li>Update compatibility table for v3 <a
href="https://github.com/JamesMGreene"><code>@​JamesMGreene</code></a>
(<a
href="https://redirect.github.com/actions/deploy-pages/issues/270">#270</a>)</li>
</ul>
<h2>🧰 Maintenance</h2>
<ul>
<li>chore/docs: update version, fix typos <a
href="https://github.com/kbdharun"><code>@​kbdharun</code></a> (<a
href="https://redirect.github.com/actions/deploy-pages/issues/272">#272</a>)</li>
</ul>
<hr />
<p>See details of <a
href="https://github.com/actions/deploy-pages/compare/v3.0.0...v3.0.1">all
code changes</a> since previous release.</p>
<p>:warning: For use with products other than GitHub.com, such as GitHub
Enterprise Server, please consult the <a
href="https://github.com/actions/deploy-pages/#compatibilty">compatibility
table</a>.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/deploy-pages/commit/7a9bd943aa5e5175aeb8502edcc6c1c02d398e10"><code>7a9bd94</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/290">#290</a>
from actions/dependabot/npm_and_yarn/undici-6.2.1</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/eee8a27158326d4a4993574decb74691422aa9ff"><code>eee8a27</code></a>
Update distributables after Dependabot 🤖</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/b6e5c8516028359d6b81db642652a6634db91767"><code>b6e5c85</code></a>
Bump undici from 6.0.1 to 6.2.1</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/b8d2528df3fc21f73e431c0648ff7e80808433e0"><code>b8d2528</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/282">#282</a>
from actions/dependabot/github_actions/github/codeql-...</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/53d1eac7fd4d43bdb78c7492d45ce17598a8fbf6"><code>53d1eac</code></a>
Bump github/codeql-action from 2 to 3</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/3f0ef9d75df3fc372f78051e41dfa694bb50f83f"><code>3f0ef9d</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/281">#281</a>
from actions/dependabot/github_actions/actions/upload...</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/82751044df20afbd0ae563fb79e45eb19524ed36"><code>8275104</code></a>
Bump actions/upload-artifact from 3 to 4</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/9be9d731c91469d80c7cee3e565a1b54220def07"><code>9be9d73</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/280">#280</a>
from actions/dependabot/npm_and_yarn/eslint-8.56.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/d8afefafec56df80907efefd0d5fc75007540533"><code>d8afefa</code></a>
Bump eslint from 8.55.0 to 8.56.0</li>
<li><a
href="https://github.com/actions/deploy-pages/commit/304d0b77f8ce8509118d3d01565a9ac8dc244104"><code>304d0b7</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/deploy-pages/issues/277">#277</a>
from actions/dependabot/github_actions/actions/publis...</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/deploy-pages/compare/v3...v4">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/deploy-pages&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 40fc036419..1f59e5173b 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -192,7 +192,7 @@ jobs:
     steps:
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v3
+        uses: actions/deploy-pages@v4
 
   pass:
     name: Pass testing build wheels

From 6819b8ed7667c0bfbe90e27c139adb58405c9809 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 07:44:08 +0800
Subject: [PATCH 56/97] Bump docker/metadata-action from 5.3.0 to 5.4.0 (#3086)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[docker/metadata-action](https://github.com/docker/metadata-action) from
5.3.0 to 5.4.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/docker/metadata-action/releases">docker/metadata-action's
releases</a>.</em></p>
<blockquote>
<h2>v5.4.0</h2>
<ul>
<li>Bump <code>@​docker/actions-toolkit</code> from 0.15.0 to 0.16.0 in
<a
href="https://redirect.github.com/docker/metadata-action/pull/369">docker/metadata-action#369</a></li>
<li>Bump csv-parse from 5.5.2 to 5.5.3 in <a
href="https://redirect.github.com/docker/metadata-action/pull/365">docker/metadata-action#365</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/docker/metadata-action/compare/v5.3.0...v5.4.0">https://github.com/docker/metadata-action/compare/v5.3.0...v5.4.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/docker/metadata-action/commit/9dc751fe249ad99385a2583ee0d084c400eee04e"><code>9dc751f</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/369">#369</a>
from docker/dependabot/npm_and_yarn/docker/actions-to...</li>
<li><a
href="https://github.com/docker/metadata-action/commit/c94c54c4e6ef3734c17f3f3aa488848d7f9b50d9"><code>c94c54c</code></a>
chore: update generated content</li>
<li><a
href="https://github.com/docker/metadata-action/commit/187f09259dab00ccff5266c9a438eeb7bcd87b83"><code>187f092</code></a>
chore(deps): Bump <code>@​docker/actions-toolkit</code> from 0.15.0 to
0.16.0</li>
<li><a
href="https://github.com/docker/metadata-action/commit/6d6eaf34518db036006be0b031c21fd4eb72ef69"><code>6d6eaf3</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/365">#365</a>
from docker/dependabot/npm_and_yarn/csv-parse-5.5.3</li>
<li><a
href="https://github.com/docker/metadata-action/commit/1484a7ec95cda8ca32484f85f0d71de83d7b6557"><code>1484a7e</code></a>
chore(deps): Bump csv-parse from 5.5.2 to 5.5.3</li>
<li>See full diff in <a
href="https://github.com/docker/metadata-action/compare/31cebacef4805868f9ce9a0cb03ee36c32df2ac4...9dc751fe249ad99385a2583ee0d084c400eee04e">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/metadata-action&package-manager=github_actions&previous-version=5.3.0&new-version=5.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 1f59e5173b..18767d2137 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -144,7 +144,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@31cebacef4805868f9ce9a0cb03ee36c32df2ac4
+        uses: docker/metadata-action@9dc751fe249ad99385a2583ee0d084c400eee04e
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 

From 54e5988501890030d5b3c87f0d16e4c2938fe25a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Dec 2023 20:05:23 -0500
Subject: [PATCH 57/97] Bump actions/upload-pages-artifact from 2 to 3 (#3087)

---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 18767d2137..e47f14b1f4 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -176,7 +176,7 @@ jobs:
           ls dist/packages > package_list.txt
           dumb-pypi --output-dir dist --packages-url ../../packages --package-list package_list.txt --title "DeePMD-kit Developed Packages"
       - name: Upload Pages artifact
-        uses: actions/upload-pages-artifact@v2
+        uses: actions/upload-pages-artifact@v3
         with:
           path: dist
   deploy_pypi_index:

From 228711136f245cada12e9a5e4332173da577f702 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Dec 2023 17:58:13 +0800
Subject: [PATCH 58/97] [pre-commit.ci] pre-commit autoupdate (#3089)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.8 →
v0.1.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.8...v0.1.9)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index edca939faa..efa2bc1675 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.8
+    rev: v0.1.9
     hooks:
     - id: ruff
       args: ["--fix"]

From 985a88676745f6f68af161a6aba693210796ff7d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 1 Jan 2024 19:45:06 -0500
Subject: [PATCH 59/97] lmp: Register styles when using CMake (#3097)

Fix #3092.
---
 source/lmp/builtin.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/source/lmp/builtin.cmake b/source/lmp/builtin.cmake
index 507fe7bf1a..f29e9d3319 100644
--- a/source/lmp/builtin.cmake
+++ b/source/lmp/builtin.cmake
@@ -29,3 +29,7 @@ target_include_directories(
                  ${LAMMPS_SOURCE_DIR}/KSPACE ${LAMMPS_SOURCE_DIR}/EXTRA-FIX)
 target_compile_definitions(
   lammps PRIVATE "LAMMPS_VERSION_NUMBER=${LAMMPS_VERSION_NUMBER}")
+
+# register styles
+registerstyles(${CMAKE_CURRENT_LIST_DIR})
+generatestyleheaders(${LAMMPS_STYLE_HEADERS_DIR})

From a905817f77a59306e2a393e91f34ee4e62f611c6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 2 Jan 2024 21:41:46 -0500
Subject: [PATCH 60/97] Add pairwise tabulation as an independent model (#3101)

Add pairwise tabulation as an independent model, which can be summed
with DP (DP + PairTab) by the linear model, other than interpolation.
PairTab can be used for any pairwise potentials, e.g., d3, LJ, ZBL, etc.

Fix #3099.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md                          |   2 +-
 deepmd/model/model.py              |   5 +
 deepmd/model/pairtab.py            | 288 +++++++++++++++++++++++++++++
 deepmd/utils/argcheck.py           |  21 +++
 doc/model/index.md                 |   2 +-
 doc/model/pairtab.md               |  55 +++++-
 examples/water/d3/README.md        |  11 ++
 examples/water/d3/dftd3.txt        | 100 ++++++++++
 examples/water/d3/input.json       |  95 ++++++++++
 source/tests/test_model_pairtab.py | 127 +++++++++++++
 10 files changed, 703 insertions(+), 3 deletions(-)
 create mode 100644 deepmd/model/pairtab.py
 create mode 100644 examples/water/d3/README.md
 create mode 100644 examples/water/d3/dftd3.txt
 create mode 100644 examples/water/d3/input.json
 create mode 100644 source/tests/test_model_pairtab.py

diff --git a/README.md b/README.md
index a1e9c9484a..81fdead098 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Deep potential long-range](doc/model/dplr.md)
     - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
     - [Linear model](doc/model/linear.md)
-    - [Interpolation with a pairwise potential](doc/model/pairtab.md)
+    - [Interpolation or combination with a pairwise potential](doc/model/pairtab.md)
 - [Training](doc/train/index.md)
     - [Training a model](doc/train/training.md)
     - [Advanced options](doc/train/training-advanced.md)
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index dd439056b4..6117b4942d 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -97,6 +97,9 @@ def get_class_by_input(cls, input: dict):
         from deepmd.model.multi import (
             MultiModel,
         )
+        from deepmd.model.pairtab import (
+            PairTabModel,
+        )
         from deepmd.model.pairwise_dprc import (
             PairwiseDPRc,
         )
@@ -112,6 +115,8 @@ def get_class_by_input(cls, input: dict):
             return FrozenModel
         elif model_type == "linear_ener":
             return LinearEnergyModel
+        elif model_type == "pairtab":
+            return PairTabModel
         else:
             raise ValueError(f"unknown model type: {model_type}")
 
diff --git a/deepmd/model/pairtab.py b/deepmd/model/pairtab.py
new file mode 100644
index 0000000000..38934818e6
--- /dev/null
+++ b/deepmd/model/pairtab.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from enum import (
+    Enum,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    MODEL_VERSION,
+    global_cvt_2_ener_float,
+    op_module,
+    tf,
+)
+from deepmd.fit.fitting import (
+    Fitting,
+)
+from deepmd.loss.loss import (
+    Loss,
+)
+from deepmd.model.model import (
+    Model,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+
+class PairTabModel(Model):
+    """Pairwise tabulation energy model.
+
+    This model can be used to tabulate the pairwise energy between atoms for either
+    short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not
+    be used alone, but rather as one submodel of a linear (sum) model, such as
+    DP+D3.
+
+    Do not put the model on the first model of a linear model, since the linear
+    model fetches the type map from the first model.
+
+    At this moment, the model does not smooth the energy at the cutoff radius, so
+    one needs to make sure the energy has been smoothed to zero.
+
+    Parameters
+    ----------
+    tab_file : str
+        The path to the tabulation file.
+    rcut : float
+        The cutoff radius
+    sel : int or list[int]
+        The maxmum number of atoms in the cut-off radius
+    """
+
+    model_type = "ener"
+
+    def __init__(
+        self, tab_file: str, rcut: float, sel: Union[int, List[int]], **kwargs
+    ):
+        super().__init__()
+        self.tab_file = tab_file
+        self.tab = PairTab(self.tab_file)
+        self.ntypes = self.tab.ntypes
+        self.rcut = rcut
+        if isinstance(sel, int):
+            self.sel = sel
+        elif isinstance(sel, list):
+            self.sel = sum(sel)
+        else:
+            raise TypeError("sel must be int or list[int]")
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        frz_model: Optional[str] = None,
+        ckpt_meta: Optional[str] = None,
+        suffix: str = "",
+        reuse: Optional[Union[bool, Enum]] = None,
+    ):
+        """Build the model.
+
+        Parameters
+        ----------
+        coord_ : tf.Tensor
+            The coordinates of atoms
+        atype_ : tf.Tensor
+            The atom types of atoms
+        natoms : tf.Tensor
+            The number of atoms
+        box : tf.Tensor
+            The box vectors
+        mesh : tf.Tensor
+            The mesh vectors
+        input_dict : dict
+            The input dict
+        frz_model : str, optional
+            The path to the frozen model
+        ckpt_meta : str, optional
+            The path prefix of the checkpoint and meta files
+        suffix : str, optional
+            The suffix of the scope
+        reuse : bool or tf.AUTO_REUSE, optional
+            Whether to reuse the variables
+
+        Returns
+        -------
+        dict
+            The output dict
+        """
+        tab_info, tab_data = self.tab.get()
+        with tf.variable_scope("model_attr" + suffix, reuse=reuse):
+            self.tab_info = tf.get_variable(
+                "t_tab_info",
+                tab_info.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_info, dtype=tf.float64),
+            )
+            self.tab_data = tf.get_variable(
+                "t_tab_data",
+                tab_data.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_data, dtype=tf.float64),
+            )
+            t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
+            t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
+            t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
+
+        with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
+            t_dfparam = tf.constant(0, name="dfparam", dtype=tf.int32)
+            t_daparam = tf.constant(0, name="daparam", dtype=tf.int32)
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_rcut = tf.constant(
+                self.rcut, name="rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+            )
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
+        box = tf.reshape(box, [-1, 9])
+        # perhaps we need a OP that only outputs rij and nlist
+        (
+            _,
+            _,
+            rij,
+            nlist,
+            _,
+            _,
+        ) = op_module.prod_env_mat_a_mix(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            np.zeros([self.ntypes, self.sel * 4]),
+            np.ones([self.ntypes, self.sel * 4]),
+            rcut_a=-1,
+            rcut_r=self.rcut,
+            rcut_r_smth=self.rcut,
+            sel_a=[self.sel],
+            sel_r=[0],
+        )
+        scale = tf.ones([tf.shape(coord)[0], natoms[0]], dtype=tf.float64)
+        tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab(
+            self.tab_info,
+            self.tab_data,
+            atype,
+            rij,
+            nlist,
+            natoms,
+            scale,
+            sel_a=[self.sel],
+            sel_r=[0],
+        )
+        energy_raw = tf.reshape(
+            tab_atom_ener, [-1, natoms[0]], name="o_atom_energy" + suffix
+        )
+        energy = tf.reduce_sum(
+            global_cvt_2_ener_float(energy_raw), axis=1, name="o_energy" + suffix
+        )
+        force = tf.reshape(tab_force, [-1, 3 * natoms[1]], name="o_force" + suffix)
+        virial = tf.reshape(
+            tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1),
+            [-1, 9],
+            name="o_virial" + suffix,
+        )
+        atom_virial = tf.reshape(
+            tab_atom_virial, [-1, 9 * natoms[1]], name="o_atom_virial" + suffix
+        )
+        model_dict = {}
+        model_dict["energy"] = energy
+        model_dict["force"] = force
+        model_dict["virial"] = virial
+        model_dict["atom_ener"] = energy_raw
+        model_dict["atom_virial"] = atom_virial
+        model_dict["coord"] = coord
+        model_dict["atype"] = atype
+
+        return model_dict
+
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
+    ) -> None:
+        """Init the embedding net variables with the given frozen model.
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        model_type : str
+            the type of the model
+        suffix : str
+            suffix to name scope
+        """
+        # skip. table can be initialized from the file
+
+    def get_fitting(self) -> Union[Fitting, dict]:
+        """Get the fitting(s)."""
+        # nothing needs to do
+        return {}
+
+    def get_loss(self, loss: dict, lr) -> Optional[Union[Loss, dict]]:
+        """Get the loss function(s)."""
+        # nothing nees to do
+        return
+
+    def get_rcut(self) -> float:
+        """Get cutoff radius of the model."""
+        return self.rcut
+
+    def get_ntypes(self) -> int:
+        """Get the number of types."""
+        return self.ntypes
+
+    def data_stat(self, data: dict):
+        """Data staticis."""
+        # nothing needs to do
+
+    def enable_compression(self, suffix: str = "") -> None:
+        """Enable compression.
+
+        Parameters
+        ----------
+        suffix : str
+            suffix to name scope
+        """
+        # nothing needs to do
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict:
+        """Update the selection and perform neighbor statistics.
+
+        Notes
+        -----
+        Do not modify the input data without copying it.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        """
+        from deepmd.entrypoints.train import (
+            update_one_sel,
+        )
+
+        local_jdata_cpy = local_jdata.copy()
+        return update_one_sel(global_jdata, local_jdata_cpy, True)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 8d09d25577..2c1d235801 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -927,6 +927,7 @@ def model_args(exclude_hybrid=False):
                     standard_model_args(),
                     multi_model_args(),
                     frozen_model_args(),
+                    pairtab_model_args(),
                     *hybrid_models,
                 ],
                 optional=True,
@@ -1013,6 +1014,26 @@ def frozen_model_args() -> Argument:
     return ca
 
 
+def pairtab_model_args() -> Argument:
+    doc_tab_file = "Path to the tabulation file."
+    doc_rcut = "The cut-off radius."
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    ca = Argument(
+        "pairtab",
+        dict,
+        [
+            Argument("tab_file", str, optional=False, doc=doc_tab_file),
+            Argument("rcut", float, optional=False, doc=doc_rcut),
+            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
+        ],
+        doc="Pairwise tabulation energy model.",
+    )
+    return ca
+
+
 def linear_ener_model_args() -> Argument:
     doc_weights = (
         "If the type is list of float, a list of weights for each model. "
diff --git a/doc/model/index.md b/doc/model/index.md
index 6c128028a6..589b39b2b5 100644
--- a/doc/model/index.md
+++ b/doc/model/index.md
@@ -17,4 +17,4 @@
 - [Deep potential long-range](dplr.md)
 - [Deep Potential - Range Correction (DPRc)](dprc.md)
 - [Linear model](linear.md)
-- [Interpolation with a pairwise potential](pairtab.md)
+- [Interpolation or combination with a pairwise potential](pairtab.md)
diff --git a/doc/model/pairtab.md b/doc/model/pairtab.md
index e3f0118f2c..115345796a 100644
--- a/doc/model/pairtab.md
+++ b/doc/model/pairtab.md
@@ -1,4 +1,4 @@
-# Interpolation with a pairwise potential
+# Interpolation or combination with a pairwise potential
 
 ## Theory
 In applications like the radiation damage simulation, the interatomic distance may become too close, so that the DFT calculations fail.
@@ -33,3 +33,56 @@ where the scale $\alpha_s$ is a tunable scale of the interatomic distance $r_{ij
 The pairwise potential $u^{\textrm{pair}}(r)$ is defined by a user-defined table that provides the value of $u^{\textrm{pair}}$ on an evenly discretized grid from 0 to the cutoff distance.[^1]
 
 [^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+DeePMD-kit also supports combination with a pairwise potential:
+
+```math
+  E_i = E_i^{\mathrm{DP}} + E_i^{\mathrm{pair}},
+```
+
+## Table file
+
+The table file should be a text file that can be read by {py:meth}`numpy.loadtxt`.
+The first column is the distance between two atoms, where upper range should be larger than the cutoff radius.
+Other columns are two-body interaction energies for pairs of certain types,
+in the order of Type_0-Type_0, Type_0-Type_1, ..., Type_0-Type_N, Type_1-Type_1, ..., Type_1-Type_N, ..., and Type_N-Type_N.
+
+The interaction should be smooth at the cut-off distance.
+
+## Interpolation with a short-range pairwise potential
+
+```json
+"model": {
+  "use_srtab": "H2O_tab_potential.txt",
+  "smin_alpha": 0.1,
+  "sw_rmin": 0.8,
+  "sw_rmax": 1.0,
+  "_comment": "Below uses a normal DP model"
+}
+```
+
+{ref}`sw_rmin <model/sw_rmin>` and {ref}`sw_rmax <model/sw_rmax>` must be smaller than the cutoff radius of the DP model.
+
+## Combination with a pairwise potential
+
+To combine with a pairwise potential, use the [linear model](./linear.md):
+
+```json
+"model": {
+  "type": "linear_ener",
+  "weights": "sum",
+  "models": [
+    {
+      "_comment": "Here uses a normal DP model"
+    },
+    {
+      "type": "pairtab",
+      "tab_file": "dftd3.txt",
+      "rcut": 10.0,
+      "sel": 534
+    }
+  ]
+}
+```
+
+The {ref}`rcut <model[pairtab]/rcut>` can be larger than that of the DP model.
diff --git a/examples/water/d3/README.md b/examples/water/d3/README.md
new file mode 100644
index 0000000000..bd75960010
--- /dev/null
+++ b/examples/water/d3/README.md
@@ -0,0 +1,11 @@
+# DPD3
+
+`dftd3.txt` tabulates D3 dispersion for each pair of types (O-O, O-H, H-H).
+It can be generated by [simple-dftd3](https://github.com/dftd3/simple-dftd3).
+
+## Note
+
+As an example, it cannot be used in production:
+
+- For small file sizes in the repository, the distance interval in the tabulation is only 0.1.
+- The example training data does not contain dispersion interaction.
diff --git a/examples/water/d3/dftd3.txt b/examples/water/d3/dftd3.txt
new file mode 100644
index 0000000000..bbc9726134
--- /dev/null
+++ b/examples/water/d3/dftd3.txt
@@ -0,0 +1,100 @@
+1.000000000000000056e-01 -5.836993924755046366e-03 -3.207255698139210940e-03 -1.843064837882633228e-03
+2.000000000000000111e-01 -5.836993806911452108e-03 -3.207255613696154226e-03 -1.843064776130543892e-03
+3.000000000000000444e-01 -5.836992560106194113e-03 -3.207254720510349828e-03 -1.843064123123401392e-03
+4.000000000000000222e-01 -5.836986225627246658e-03 -3.207250184384043221e-03 -1.843060811677158526e-03
+5.000000000000000000e-01 -5.836964436915091821e-03 -3.207234589497737730e-03 -1.843052788205641135e-03
+5.999999999999999778e-01 -5.836905460107320170e-03 -3.207192410957825698e-03 -1.843338972660025360e-03
+7.000000000000000666e-01 -5.836769626930583300e-03 -3.207096085246822614e-03 -1.851839876215982238e-03
+8.000000000000000444e-01 -5.836491030513121618e-03 -3.206924889333430135e-03 -2.035200426069873857e-03
+9.000000000000000222e-01 -5.835967602710929840e-03 -3.206999537190755728e-03 -3.724418810291191088e-03
+1.000000000000000000e+00 -5.835053775792304297e-03 -3.210477055685919626e-03 -4.311009958284344433e-03
+1.100000000000000089e+00 -5.833591489567684953e-03 -3.237527828601436623e-03 -4.381510573223419171e-03
+1.200000000000000178e+00 -5.831652981781070173e-03 -3.454845258034439960e-03 -4.394419437232751843e-03
+1.300000000000000266e+00 -5.830520601296543433e-03 -4.478070067533340692e-03 -4.394683688871586433e-03
+1.400000000000000133e+00 -5.835353622834494637e-03 -5.097530655625692915e-03 -4.389691198859401421e-03
+1.500000000000000222e+00 -5.863290690264541874e-03 -5.215500241204417201e-03 -4.380686516072217034e-03
+1.600000000000000089e+00 -6.007605076700822840e-03 -5.234994618743306349e-03 -4.367337507268855175e-03
+1.700000000000000178e+00 -6.481613230242359684e-03 -5.228094160806716871e-03 -4.348706108547779198e-03
+1.800000000000000266e+00 -6.814114687600298335e-03 -5.208252365588400719e-03 -4.323505520547227775e-03
+1.900000000000000133e+00 -6.876286379079538276e-03 -5.177988357772074675e-03 -4.290186895355558444e-03
+2.000000000000000000e+00 -6.858440816799354217e-03 -5.136887568332395605e-03 -4.246989919717190920e-03
+2.100000000000000089e+00 -6.810730159155128395e-03 -5.083475665301987606e-03 -4.192000168715152505e-03
+2.200000000000000178e+00 -6.742330737387775344e-03 -5.015815334399144516e-03 -4.123231519970332187e-03
+2.300000000000000266e+00 -6.653841351238824232e-03 -4.931782661310191510e-03 -4.038743210125123918e-03
+2.400000000000000355e+00 -6.543651317938833402e-03 -4.829269294496830317e-03 -3.936795390727530070e-03
+2.500000000000000444e+00 -6.409559281498313811e-03 -4.706385522261587705e-03 -3.816040239463167755e-03
+2.600000000000000089e+00 -6.249406635892575460e-03 -4.561685215972477100e-03 -3.675736338668155346e-03
+2.700000000000000178e+00 -6.061478463281754457e-03 -4.394408172892586353e-03 -3.515962176363645990e-03
+2.800000000000000266e+00 -5.844844934626365965e-03 -4.204716954930251029e-03 -3.337792190764940319e-03
+2.900000000000000355e+00 -5.599669004675433479e-03 -3.993889719587391009e-03 -3.143390268473208755e-03
+3.000000000000000444e+00 -5.327453506642119106e-03 -3.764420755089863558e-03 -2.935977648106832729e-03
+3.100000000000000089e+00 -5.031178000843260223e-03 -3.519982860915751074e-03 -2.719650568099894056e-03
+3.200000000000000178e+00 -4.715273672783852794e-03 -3.265225882759082918e-03 -2.499057451653833965e-03
+3.300000000000000266e+00 -4.385404785641488362e-03 -3.005422601424333727e-03 -2.278985743812388717e-03
+3.400000000000000355e+00 -4.048065433713449700e-03 -2.746015696661484231e-03 -2.063937321866260270e-03
+3.500000000000000444e+00 -3.710048572169818114e-03 -2.492149763588673555e-03 -1.857774171128685628e-03
+3.600000000000000089e+00 -3.377881092113224713e-03 -2.248275746149775312e-03 -1.663491260531681313e-03
+3.700000000000000178e+00 -3.057327225182689644e-03 -2.017890114824574810e-03 -1.483133951195727196e-03
+3.800000000000000266e+00 -2.753038981057491941e-03 -1.803430168074075671e-03 -1.317840750738439540e-03
+3.900000000000000355e+00 -2.468388171389931940e-03 -1.606308000309067743e-03 -1.167971059502070875e-03
+4.000000000000000000e+00 -2.205469013267805957e-03 -1.427041871266797194e-03 -1.033273795673775699e-03
+4.099999999999999645e+00 -1.965228953751702902e-03 -1.265437879541002862e-03 -9.130610310879381641e-04
+4.200000000000000178e+00 -1.747673832278765806e-03 -1.120782158543769547e-03 -8.063636493380576522e-04
+4.299999999999999822e+00 -1.552098284175109895e-03 -9.920168984562682292e-04 -7.120580835032176920e-04
+4.399999999999999467e+00 -1.377305748647780163e-03 -8.778864597897169646e-04 -6.289618864203703032e-04
+4.500000000000000000e+00 -1.221797526507303194e-03 -7.770496638083513111e-04 -5.559009474092405914e-04
+4.599999999999999645e+00 -1.083922782809847944e-03 -6.881603844395511003e-04 -4.917533939693695443e-04
+4.700000000000000178e+00 -9.619897379282633162e-04 -6.099214740721333600e-04 -4.354756390957214944e-04
+4.799999999999999822e+00 -8.543428352989788704e-04 -5.411178648690499965e-04 -3.861155118068372257e-04
+4.900000000000000355e+00 -7.594124385866309881e-04 -4.806343247547230249e-04 -3.428165131289927659e-04
+5.000000000000000000e+00 -6.757436744162991990e-04 -4.274624687438948085e-04 -3.048162971647301774e-04
+5.099999999999999645e+00 -6.020102408497160842e-04 -3.807006248475114439e-04 -2.714416410742632600e-04
+5.200000000000000178e+00 -5.370178955485286568e-04 -3.395492294862310413e-04 -2.421014916366724180e-04
+5.299999999999999822e+00 -4.797012289428498875e-04 -3.033036596191310643e-04 -2.162791601488694472e-04
+5.400000000000000355e+00 -4.291163603974148220e-04 -2.713458112672340397e-04 -1.935243599692976007e-04
+5.500000000000000000e+00 -3.844314156775488251e-04 -2.431352896687036106e-04 -1.734455139070628909e-04
+5.599999999999999645e+00 -3.449160478270333653e-04 -2.182007570692257958e-04 -1.557025751017268144e-04
+5.700000000000000178e+00 -3.099308250478081581e-04 -1.961317615550248216e-04 -1.400004825033046053e-04
+5.799999999999999822e+00 -2.789169965744946232e-04 -1.765712194195623135e-04 -1.260832928664179986e-04
+5.900000000000000355e+00 -2.513869308376957498e-04 -1.592086242469989389e-04 -1.137289820430557137e-04
+6.000000000000000000e+00 -2.269153740910770769e-04 -1.437739928526920498e-04 -1.027448796326863424e-04
+6.099999999999999645e+00 -2.051315821421489645e-04 -1.300325201124615134e-04 -9.296368585686617101e-05
+6.200000000000000178e+00 -1.857123177371916057e-04 -1.177798933810950252e-04 -8.424001307773229020e-05
+6.299999999999999822e+00 -1.683756703844696025e-04 -1.068382068924710331e-04 -7.644739339328402133e-05
+6.400000000000000355e+00 -1.528756359693242027e-04 -9.705241326038571551e-05 -6.947569600606938272e-05
+6.500000000000000000e+00 -1.389973847900246836e-04 -8.828725024164000819e-05 -6.322890211399061677e-05
+6.599999999999999645e+00 -1.265531447216864910e-04 -8.042458445906290783e-05 -5.762318996708282935e-05
+6.700000000000000178e+00 -1.153786284350462083e-04 -7.336111861455703732e-05 -5.258528787829301387e-05
+6.799999999999999822e+00 -1.053299381724164837e-04 -6.700641408290249014e-05 -4.805105801105378322e-05
+6.900000000000000355e+00 -9.628088734156424651e-05 -6.128118618925484863e-05 -4.396427848897285724e-05
+7.000000000000000000e+00 -8.812068437769617318e-05 -5.611583465513190913e-05 -4.027559568117881135e-05
+7.099999999999999645e+00 -8.075193047879847589e-05 -5.144917649553730292e-05 -3.694162237238345173e-05
+7.200000000000000178e+00 -7.408888866698059216e-05 -4.722735299269381154e-05 -3.392416093003276399e-05
+7.299999999999999822e+00 -6.805598702152939358e-05 -4.340288624040664528e-05 -3.118953355495664799e-05
+7.400000000000000355e+00 -6.258652380321327402e-05 -3.993386415929820209e-05 -2.870800428147100793e-05
+7.500000000000000000e+00 -5.762154653038724025e-05 -3.678323585657680581e-05 -2.645327961777798337e-05
+7.599999999999999645e+00 -5.310888089285451013e-05 -3.391820178292012157e-05 -2.440207662848582849e-05
+7.700000000000000178e+00 -4.900228873380196631e-05 -3.130968536501008690e-05 -2.253374889733842244e-05
+7.799999999999999822e+00 -4.526073723647751752e-05 -2.893187470658392955e-05 -2.082996220611386151e-05
+7.900000000000000355e+00 -4.184776396661089387e-05 -2.676182459276817884e-05 -1.927441295794754013e-05
+8.000000000000000000e+00 -3.873092458939377268e-05 -2.477911043795883125e-05 -1.785258338919504348e-05
+8.099999999999999645e+00 -3.588131194417033489e-05 -2.296552701898519263e-05 -1.655152847892165657e-05
+8.199999999999999289e+00 -3.327313676038550535e-05 -2.130482586144845337e-05 -1.535969020138178571e-05
+8.300000000000000711e+00 -3.088336167038252842e-05 -1.978248602307972753e-05 -1.426673539356727330e-05
+8.400000000000000355e+00 -2.869138134992016182e-05 -1.838551376555334571e-05 -1.326341404347894562e-05
+8.500000000000000000e+00 -2.667874262351516647e-05 -1.710226724425689568e-05 -1.234143525923654349e-05
+8.599999999999999645e+00 -2.482889923305170253e-05 -1.592230289025118773e-05 -1.149335856644029766e-05
+8.699999999999999289e+00 -2.312699670543422217e-05 -1.483624062390156494e-05 -1.071249851148625846e-05
+8.800000000000000711e+00 -2.155968338640409072e-05 -1.383564543725925445e-05 -9.992840830439041262e-06
+8.900000000000000355e+00 -2.011494424844594071e-05 -1.291292322228744002e-05 -9.328968683880578804e-06
+9.000000000000000000e+00 -1.878195454422273937e-05 -1.206122901303710759e-05 -8.715997664073560640e-06
+9.099999999999999645e+00 -1.755095077450199208e-05 -1.127438605916122122e-05 -8.149518457037649769e-06
+9.199999999999999289e+00 -1.641311678073074286e-05 -1.054681436190059537e-05 -7.625546193173260002e-06
+9.300000000000000711e+00 -1.536048306550537418e-05 -9.873467487129955082e-06 -7.140475649634374041e-06
+9.400000000000000355e+00 -1.438583769617946587e-05 -9.249776627676899564e-06 -6.691041578929334717e-06
+9.500000000000000000e+00 -1.348264736372320039e-05 -8.671601022702781346e-06 -6.274283533910064576e-06
+9.599999999999999645e+00 -1.264498735578012246e-05 -8.135183958678718279e-06 -5.887514641681122086e-06
+9.700000000000001066e+00 -1.186747936398473687e-05 -7.637113677130612127e-06 -5.528293849956352819e-06
+9.800000000000000711e+00 -1.114523618469756001e-05 -7.174288601187318493e-06 -5.194401230658985063e-06
+9.900000000000000355e+00 -1.047381249252528874e-05 -6.743886368019750717e-06 -4.883815978498405921e-06
+1.000000000000000000e+01  0.000000000000000e00e+00  0.000000000000000e00e+00  0.000000000000000e00e+00
diff --git a/examples/water/d3/input.json b/examples/water/d3/input.json
new file mode 100644
index 0000000000..bbe7a2c8a9
--- /dev/null
+++ b/examples/water/d3/input.json
@@ -0,0 +1,95 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type": "linear_ener",
+    "weights": "sum",
+    "models": [
+      {
+        "type_map": [
+          "O",
+          "H"
+        ],
+        "descriptor": {
+          "type": "se_e2_a",
+          "sel": [
+            46,
+            92
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 16,
+          "precision": "float64",
+          "seed": 1,
+          "_comment2": " that's all"
+        },
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "precision": "float64",
+          "seed": 1,
+          "_comment3": " that's all"
+        },
+        "_comment4": " that's all"
+      },
+      {
+        "type": "pairtab",
+        "tab_file": "dftd3.txt",
+        "rcut": 10.0,
+        "sel": 534
+      }
+    ]
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment6": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment7": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment8": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment9": "that's all"
+  },
+  "_comment10": "that's all"
+}
diff --git a/source/tests/test_model_pairtab.py b/source/tests/test_model_pairtab.py
new file mode 100644
index 0000000000..fd678894b5
--- /dev/null
+++ b/source/tests/test_model_pairtab.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+import scipy.spatial.distance
+from common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.model.model import (
+    Model,
+)
+
+GLOBAL_ENER_FLOAT_PRECISION = tf.float64
+GLOBAL_TF_FLOAT_PRECISION = tf.float64
+GLOBAL_NP_FLOAT_PRECISION = np.float64
+
+
+class TestModel(tf.test.TestCase):
+    def setUp(self):
+        gen_data()
+
+    def test_model(self):
+        jfile = "water.json"
+        jdata = j_loader(jfile)
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = 1
+        test_size = 1
+
+        tab_filename = "test_pairtab_tab.txt"
+        jdata["model"] = {
+            "type": "pairtab",
+            "tab_file": tab_filename,
+            "rcut": 6,
+            "sel": [6],
+        }
+        rcut = j_must_have(jdata["model"], "rcut")
+
+        def pair_pot(r: float):
+            # LJ, as exmaple
+            return 4 * (1 / r**12 - 1 / r**6)
+
+        dx = 1e-4
+        d = np.arange(dx, rcut + dx, dx)
+        tab = np.array(
+            [
+                d,
+                pair_pot(d),
+                pair_pot(d),
+                pair_pot(d),
+            ]
+        ).T
+        np.savetxt(tab_filename, tab)
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
+        numb_test = 1
+
+        model = Model(
+            **jdata["model"],
+        )
+
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="test_pairtab",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: [],  # nopbc
+            is_training: False,
+        }
+
+        with self.cached_session() as sess:
+            sess.run(tf.global_variables_initializer())
+            [e, _, _] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+
+        e = e.reshape([-1])
+
+        coord = test_data["coord"][0, :].reshape(-1, 3)
+        distance = scipy.spatial.distance.cdist(coord, coord).ravel()
+        refe = [np.sum(pair_pot(distance[np.nonzero(distance)])) / 2]
+
+        refe = np.reshape(refe, [-1])
+
+        places = 10
+        np.testing.assert_almost_equal(e, refe, places)

From 674ea1784dc162fb301fadc3e43d448b806003ff Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 5 Jan 2024 09:41:42 +0800
Subject: [PATCH 61/97] rm rcut from DeepmdDataSystem (#3106)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd/utils/data_system.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index bf05b5faa7..65cfdc053f 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -37,7 +37,7 @@ def __init__(
         systems: List[str],
         batch_size: int,
         test_size: int,
-        rcut: float,
+        rcut: Optional[float] = None,
         set_prefix: str = "set",
         shuffle_test: bool = True,
         type_map: Optional[List[str]] = None,
@@ -59,7 +59,7 @@ def __init__(
         test_size
             The size of test data
         rcut
-            The cut-off radius
+            The cut-off radius. Not used.
         set_prefix
             Prefix for the directories of different sets
         shuffle_test
@@ -91,7 +91,7 @@ def __init__(
             descriptors except mixed types.
         """
         # init data
-        self.rcut = rcut
+        del rcut
         self.system_dirs = systems
         self.nsystems = len(self.system_dirs)
         self.data_systems = []

From 61ee4f22770d737b50db6e6f1631aada410aac27 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 5 Jan 2024 00:15:39 -0500
Subject: [PATCH 62/97] fix segfault in ~Region (#3108)

When debuging #3103, I notice a segfault in `~Region`, preventing the
actual error message thrown. `_norm_copy_coord_gpu` replaces `boxt` and
`rec_boxt` of `Region` with GPU pointers, runs `deepmd::copy_coord_gpu`,
and finnally recover the original pointer that can be deleted. However,
`deepmd::copy_coord_gpu` might throw CUDA errors for any reason, and the
pointers are not recovered. `~Region` tries to delete a pointer that it
doesn't own, causing the segfault. The CUDA error message is not visible
due to segfault. The segfault in #2895 may be also caused by it.
This PR adds a new constructor to `Region` to accept the external
pointers. `~Region` will delete `boxt` and`rec_boxt` only when the
pointer is not external.
We still need to figure out the reason for the error of `copy_coord_gpu`
behind the segfault.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/include/region.h                |  4 ++
 source/lib/src/region.cc                   | 14 +++++-
 source/lib/tests/test_coord.cc             | 56 +++-------------------
 source/lib/tests/test_simulation_region.cc |  8 +---
 source/op/prod_env_mat_multi_device.cc     |  8 +---
 source/tests/test_auto_batch_size.py       |  4 +-
 6 files changed, 28 insertions(+), 66 deletions(-)

diff --git a/source/lib/include/region.h b/source/lib/include/region.h
index 2f6dbbf4e0..ee11b5b8ac 100644
--- a/source/lib/include/region.h
+++ b/source/lib/include/region.h
@@ -8,7 +8,11 @@ struct Region {
   FPTYPE* boxt;
   FPTYPE* rec_boxt;
   Region();
+  Region(FPTYPE* extern_boxt, FPTYPE* extern_rec_boxt);
   ~Region();
+
+ private:
+  bool self_allocated;
 };
 
 template <typename FPTYPE>
diff --git a/source/lib/src/region.cc b/source/lib/src/region.cc
index 36a739d90a..6c5f5493e6 100644
--- a/source/lib/src/region.cc
+++ b/source/lib/src/region.cc
@@ -14,12 +14,22 @@ template <typename FPTYPE>
 Region<FPTYPE>::Region() {
   boxt = new FPTYPE[BOXT_DIM];
   rec_boxt = new FPTYPE[BOXT_DIM];
+  self_allocated = true;
+}
+
+template <typename FPTYPE>
+Region<FPTYPE>::Region(FPTYPE* extern_boxt, FPTYPE* extern_rec_boxt) {
+  boxt = extern_boxt;
+  rec_boxt = extern_rec_boxt;
+  self_allocated = false;
 }
 
 template <typename FPTYPE>
 Region<FPTYPE>::~Region() {
-  delete[] boxt;
-  delete[] rec_boxt;
+  if (self_allocated) {
+    delete[] boxt;
+    delete[] rec_boxt;
+  }
 }
 
 template struct deepmd::Region<double>;
diff --git a/source/lib/tests/test_coord.cc b/source/lib/tests/test_coord.cc
index af320ca3f7..c939dd6fa6 100644
--- a/source/lib/tests/test_coord.cc
+++ b/source/lib/tests/test_coord.cc
@@ -62,9 +62,6 @@ TEST_F(TestNormCoord, cpu_case2) {
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 TEST_F(TestNormCoord, gpu_case0) {
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
@@ -75,11 +72,8 @@ TEST_F(TestNormCoord, gpu_case0) {
   std::vector<double> out_c(r0);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
@@ -90,9 +84,6 @@ TEST_F(TestNormCoord, gpu_case0) {
 
 TEST_F(TestNormCoord, gpu_case1) {
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
@@ -103,11 +94,8 @@ TEST_F(TestNormCoord, gpu_case1) {
   std::vector<double> out_c(r1);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
@@ -118,9 +106,6 @@ TEST_F(TestNormCoord, gpu_case1) {
 
 TEST_F(TestNormCoord, gpu_case2) {
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
@@ -131,11 +116,8 @@ TEST_F(TestNormCoord, gpu_case2) {
   std::vector<double> out_c(r2);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
@@ -298,9 +280,6 @@ TEST_F(TestCopyCoord, gpu) {
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
@@ -325,14 +304,11 @@ TEST_F(TestCopyCoord, gpu) {
       int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
                         total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
                         1 + nloc);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
                                    int_data_dev, in_c_dev, in_t_dev, nloc,
                                    mem_size, loc_cellnum, total_cellnum,
                                    cell_info_dev, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::memcpy_device_to_host(out_t_dev, out_t);
   deepmd::memcpy_device_to_host(mapping_dev, mapping);
@@ -373,9 +349,6 @@ TEST_F(TestCopyCoord, gpu_lessmem) {
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
@@ -400,14 +373,11 @@ TEST_F(TestCopyCoord, gpu_lessmem) {
       int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
                         total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
                         1 + nloc);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
                                    int_data_dev, in_c_dev, in_t_dev, nloc,
                                    mem_size, loc_cellnum, total_cellnum,
                                    cell_info_dev, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::memcpy_device_to_host(out_t_dev, out_t);
   deepmd::memcpy_device_to_host(mapping_dev, mapping);
@@ -544,9 +514,6 @@ TEST_F(TestCopyCoordMoreCell, gpu) {
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
@@ -571,14 +538,11 @@ TEST_F(TestCopyCoordMoreCell, gpu) {
       int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
                         total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
                         1 + nloc);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
                                    int_data_dev, in_c_dev, in_t_dev, nloc,
                                    mem_size, loc_cellnum, total_cellnum,
                                    cell_info_dev, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::memcpy_device_to_host(out_t_dev, out_t);
   deepmd::memcpy_device_to_host(mapping_dev, mapping);
@@ -619,9 +583,6 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem) {
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
@@ -646,14 +607,11 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem) {
       int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
                         total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
                         1 + nloc);
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<double> region_dev(box_info_dev, box_info_dev + 9);
   int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
                                    int_data_dev, in_c_dev, in_t_dev, nloc,
                                    mem_size, loc_cellnum, total_cellnum,
                                    cell_info_dev, region_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::memcpy_device_to_host(out_t_dev, out_t);
   deepmd::memcpy_device_to_host(mapping_dev, mapping);
diff --git a/source/lib/tests/test_simulation_region.cc b/source/lib/tests/test_simulation_region.cc
index 98da9ec350..5f64d3f531 100644
--- a/source/lib/tests/test_simulation_region.cc
+++ b/source/lib/tests/test_simulation_region.cc
@@ -77,9 +77,6 @@ TEST_F(TestRegion, cpu) {
 TEST_F(TestRegion, gpu) {
   // check rec_box
   deepmd::Region<double> region;
-  deepmd::Region<double> region_dev;
-  double* new_boxt = region_dev.boxt;
-  double* new_rec_boxt = region_dev.rec_boxt;
   double *boxt_dev = NULL, *rec_boxt_dev = NULL;
   double *ref_rp_dev = NULL, *ref_ri_dev = NULL;
   init_region_cpu(region, &ref_boxt[0]);
@@ -90,8 +87,7 @@ TEST_F(TestRegion, gpu) {
   deepmd::malloc_device_memory_sync(rec_boxt_dev, region.rec_boxt, 9);
   deepmd::malloc_device_memory_sync(ref_rp_dev, ref_rp);
   deepmd::malloc_device_memory_sync(ref_ri_dev, ref_ri);
-  region_dev.boxt = boxt_dev;
-  region_dev.rec_boxt = rec_boxt_dev;
+  deepmd::Region<double> region_dev(boxt_dev, rec_boxt_dev);
   // check volume
   double vol[1];
   double* vol_dev = NULL;
@@ -141,8 +137,6 @@ TEST_F(TestRegion, gpu) {
   deepmd::delete_device_memory(rp2_dev);
   deepmd::delete_device_memory(rp_dev);
   deepmd::delete_device_memory(ri2_dev);
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index 048237e042..22654b5f3a 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -1496,11 +1496,7 @@ static int _norm_copy_coord_gpu(OpKernelContext* context,
   int* int_data_dev = cell_info_dev + 23;
   deepmd::memcpy_host_to_device(box_info_dev, box_info, 18);
   deepmd::memcpy_host_to_device(cell_info_dev, cell_info, 23);
-  deepmd::Region<FPTYPE> region_dev;
-  FPTYPE* new_boxt = region_dev.boxt;
-  FPTYPE* new_rec_boxt = region_dev.rec_boxt;
-  region_dev.boxt = box_info_dev;
-  region_dev.rec_boxt = box_info_dev + 9;
+  deepmd::Region<FPTYPE> region_dev(box_info_dev, box_info_dev + 9);
   deepmd::normalize_coord_gpu(tmp_coord, nall, region_dev);
   int tt;
   for (tt = 0; tt < max_cpy_trial; ++tt) {
@@ -1531,8 +1527,6 @@ static int _norm_copy_coord_gpu(OpKernelContext* context,
       }
     }
   }
-  region_dev.boxt = new_boxt;
-  region_dev.rec_boxt = new_rec_boxt;
   return (tt != max_cpy_trial);
 }
 
diff --git a/source/tests/test_auto_batch_size.py b/source/tests/test_auto_batch_size.py
index 93a96c9c29..5a349f70b9 100644
--- a/source/tests/test_auto_batch_size.py
+++ b/source/tests/test_auto_batch_size.py
@@ -45,8 +45,10 @@ def test_execute_oom_gpu(self, mock_is_gpu_available):
         self.assertEqual(result.shape, (256, 2))
 
     @unittest.mock.patch("tensorflow.compat.v1.test.is_gpu_available")
-    def test_execute_oom_cpu(self, mock_is_gpu_available):
+    @unittest.mock.patch("tensorflow.compat.v1.config.experimental.get_visible_devices")
+    def test_execute_oom_cpu(self, mock_is_gpu_available, mock_get_visible_devices):
         mock_is_gpu_available.return_value = False
+        mock_get_visible_devices.return_value = []
         # initial batch size 256 = 128 * 2, nb is always 128
         auto_batch_size = AutoBatchSize(256, 2.0)
         nb, result = auto_batch_size.execute(self.oom, 1, 2)

From db22812de47f4300ccb2a3a6e14e40334bad9f63 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 5 Jan 2024 00:16:09 -0500
Subject: [PATCH 63/97] add activation_function and resnet arguments and NumPy
 implementation to NativeLayer (#3109)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd_utils/model_format.py            | 94 +++++++++++++++++++++++--
 source/tests/test_model_format_utils.py | 52 ++++++++++----
 2 files changed, 129 insertions(+), 17 deletions(-)

diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format.py
index 68a6d4045b..0b67131c4d 100644
--- a/deepmd_utils/model_format.py
+++ b/deepmd_utils/model_format.py
@@ -4,6 +4,9 @@
 See issue #2982 for more information.
 """
 import json
+from abc import (
+    ABC,
+)
 from typing import (
     List,
     Optional,
@@ -121,7 +124,15 @@ def load_dp_model(filename: str) -> dict:
     return model_dict
 
 
-class NativeLayer:
+class NativeOP(ABC):
+    """The unit operation of a native model."""
+
+    def call(self, *args, **kwargs):
+        """Forward pass in NumPy implementation."""
+        raise NotImplementedError
+
+
+class NativeLayer(NativeOP):
     """Native representation of a layer.
 
     Parameters
@@ -132,6 +143,10 @@ class NativeLayer:
         The biases of the layer.
     idt : np.ndarray, optional
         The identity matrix of the layer.
+    activation_function : str, optional
+        The activation function of the layer.
+    resnet : bool, optional
+        Whether the layer is a residual layer.
     """
 
     def __init__(
@@ -139,10 +154,14 @@ def __init__(
         w: Optional[np.ndarray] = None,
         b: Optional[np.ndarray] = None,
         idt: Optional[np.ndarray] = None,
+        activation_function: Optional[str] = None,
+        resnet: bool = False,
     ) -> None:
         self.w = w
         self.b = b
         self.idt = idt
+        self.activation_function = activation_function
+        self.resnet = resnet
 
     def serialize(self) -> dict:
         """Serialize the layer to a dict.
@@ -158,7 +177,11 @@ def serialize(self) -> dict:
         }
         if self.idt is not None:
             data["idt"] = self.idt
-        return data
+        return {
+            "activation_function": self.activation_function,
+            "resnet": self.resnet,
+            "@variables": data,
+        }
 
     @classmethod
     def deserialize(cls, data: dict) -> "NativeLayer":
@@ -169,7 +192,13 @@ def deserialize(cls, data: dict) -> "NativeLayer":
         data : dict
             The dict to deserialize from.
         """
-        return cls(data["w"], data["b"], data.get("idt", None))
+        return cls(
+            w=data["@variables"]["w"],
+            b=data["@variables"]["b"],
+            idt=data.get("idt", None),
+            activation_function=data["activation_function"],
+            resnet=data.get("resnet", False),
+        )
 
     def __setitem__(self, key, value):
         if key in ("w", "matrix"):
@@ -178,6 +207,10 @@ def __setitem__(self, key, value):
             self.b = value
         elif key == "idt":
             self.idt = value
+        elif key == "activation_function":
+            self.activation_function = value
+        elif key == "resnet":
+            self.resnet = value
         else:
             raise KeyError(key)
 
@@ -188,11 +221,47 @@ def __getitem__(self, key):
             return self.b
         elif key == "idt":
             return self.idt
+        elif key == "activation_function":
+            return self.activation_function
+        elif key == "resnet":
+            return self.resnet
         else:
             raise KeyError(key)
 
+    def call(self, x: np.ndarray) -> np.ndarray:
+        """Forward pass.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            The input.
+
+        Returns
+        -------
+        np.ndarray
+            The output.
+        """
+        if self.w is None or self.b is None or self.activation_function is None:
+            raise ValueError("w, b, and activation_function must be set")
+        if self.activation_function == "tanh":
+            fn = np.tanh
+        elif self.activation_function.lower() == "none":
+
+            def fn(x):
+                return x
+        else:
+            raise NotImplementedError(self.activation_function)
+        y = fn(np.matmul(x, self.w) + self.b)
+        if self.idt is not None:
+            y *= self.idt
+        if self.resnet and self.w.shape[1] == self.w.shape[0]:
+            y += x
+        elif self.resnet and self.w.shape[1] == 2 * self.w.shape[0]:
+            y += np.concatenate([x, x], axis=1)
+        return y
+
 
-class NativeNet:
+class NativeNet(NativeOP):
     """Native representation of a neural network.
 
     Parameters
@@ -238,3 +307,20 @@ def __setitem__(self, key, value):
         if len(self.layers) <= key:
             self.layers.extend([NativeLayer()] * (key - len(self.layers) + 1))
         self.layers[key] = value
+
+    def call(self, x: np.ndarray) -> np.ndarray:
+        """Forward pass.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            The input.
+
+        Returns
+        -------
+        np.ndarray
+            The output.
+        """
+        for layer in self.layers:
+            x = layer.call(x)
+        return x
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index b959ace3f6..3b2aa5d8d4 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -25,25 +25,45 @@ def test_serialize(self):
         network[1]["b"] = self.b
         network[0]["w"] = self.w
         network[0]["b"] = self.b
+        network[1]["activation_function"] = "tanh"
+        network[0]["activation_function"] = "tanh"
+        network[1]["resnet"] = True
+        network[0]["resnet"] = True
         jdata = network.serialize()
-        np.testing.assert_array_equal(jdata["layers"][0]["w"], self.w)
-        np.testing.assert_array_equal(jdata["layers"][0]["b"], self.b)
-        np.testing.assert_array_equal(jdata["layers"][1]["w"], self.w)
-        np.testing.assert_array_equal(jdata["layers"][1]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["w"], self.w)
+        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][0]["activation_function"], "tanh")
+        np.testing.assert_array_equal(jdata["layers"][1]["activation_function"], "tanh")
+        np.testing.assert_array_equal(jdata["layers"][0]["resnet"], True)
+        np.testing.assert_array_equal(jdata["layers"][1]["resnet"], True)
 
     def test_deserialize(self):
         network = NativeNet.deserialize(
             {
                 "layers": [
-                    {"w": self.w, "b": self.b},
-                    {"w": self.w, "b": self.b},
-                ]
+                    {
+                        "activation_function": "tanh",
+                        "resnet": True,
+                        "@variables": {"w": self.w, "b": self.b},
+                    },
+                    {
+                        "activation_function": "tanh",
+                        "resnet": True,
+                        "@variables": {"w": self.w, "b": self.b},
+                    },
+                ],
             }
         )
         np.testing.assert_array_equal(network[0]["w"], self.w)
         np.testing.assert_array_equal(network[0]["b"], self.b)
         np.testing.assert_array_equal(network[1]["w"], self.w)
         np.testing.assert_array_equal(network[1]["b"], self.b)
+        np.testing.assert_array_equal(network[0]["activation_function"], "tanh")
+        np.testing.assert_array_equal(network[1]["activation_function"], "tanh")
+        np.testing.assert_array_equal(network[0]["resnet"], True)
+        np.testing.assert_array_equal(network[1]["resnet"], True)
 
 
 class TestDPModel(unittest.TestCase):
@@ -52,12 +72,18 @@ def setUp(self) -> None:
         self.b = np.full((3,), 4.0)
         self.model_dict = {
             "type": "some_type",
-            "@variables": {
-                "layers": [
-                    {"w": self.w, "b": self.b},
-                    {"w": self.w, "b": self.b},
-                ]
-            },
+            "layers": [
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": self.w, "b": self.b},
+                },
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": self.w, "b": self.b},
+                },
+            ],
         }
         self.filename = "test_dp_model_format.dp"
 

From c4b7baae636c1d0bf263403126f8dd58fbdadd61 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 5 Jan 2024 16:52:01 +0800
Subject: [PATCH 64/97] NativeLayer: support None bias.  (#3111)

- fix bugs.
- add UT for the `NativeLayer`

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd_utils/model_format.py            | 13 +++++++++----
 source/tests/test_model_format_utils.py | 21 ++++++++++++++++++++-
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format.py
index 0b67131c4d..83e6ac11fc 100644
--- a/deepmd_utils/model_format.py
+++ b/deepmd_utils/model_format.py
@@ -194,8 +194,8 @@ def deserialize(cls, data: dict) -> "NativeLayer":
         """
         return cls(
             w=data["@variables"]["w"],
-            b=data["@variables"]["b"],
-            idt=data.get("idt", None),
+            b=data["@variables"].get("b", None),
+            idt=data["@variables"].get("idt", None),
             activation_function=data["activation_function"],
             resnet=data.get("resnet", False),
         )
@@ -241,7 +241,7 @@ def call(self, x: np.ndarray) -> np.ndarray:
         np.ndarray
             The output.
         """
-        if self.w is None or self.b is None or self.activation_function is None:
+        if self.w is None or self.activation_function is None:
             raise ValueError("w, b, and activation_function must be set")
         if self.activation_function == "tanh":
             fn = np.tanh
@@ -251,7 +251,12 @@ def fn(x):
                 return x
         else:
             raise NotImplementedError(self.activation_function)
-        y = fn(np.matmul(x, self.w) + self.b)
+        y = (
+            np.matmul(x, self.w) + self.b
+            if self.b is not None
+            else np.matmul(x, self.w)
+        )
+        y = fn(y)
         if self.idt is not None:
             y *= self.idt
         if self.resnet and self.w.shape[1] == self.w.shape[0]:
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index 3b2aa5d8d4..2fef4e1922 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
 import os
 import unittest
 from copy import (
@@ -8,16 +9,34 @@
 import numpy as np
 
 from deepmd_utils.model_format import (
+    NativeLayer,
     NativeNet,
     load_dp_model,
     save_dp_model,
 )
 
 
+class TestNativeLayer(unittest.TestCase):
+    def setUp(self) -> None:
+        self.w = np.full((2, 3), 3.0)
+        self.b = np.full((3,), 4.0)
+        self.idt = np.full((3,), 5.0)
+
+    def test_serialize_deserize(self):
+        for ww, bb, idt, activation_function, resnet in itertools.product(
+            [self.w], [self.b, None], [self.idt, None], ["tanh", "none"], [True, False]
+        ):
+            nl0 = NativeLayer(ww, bb, idt, activation_function, resnet)
+            nl1 = NativeLayer.deserialize(nl0.serialize())
+            inp = np.arange(self.w.shape[0])
+            np.testing.assert_allclose(nl0.call(inp), nl1.call(inp))
+
+
 class TestNativeNet(unittest.TestCase):
     def setUp(self) -> None:
-        self.w = np.full((3, 2), 3.0)
+        self.w = np.full((2, 3), 3.0)
         self.b = np.full((3,), 4.0)
+        self.idt = np.full((3,), 5.0)
 
     def test_serialize(self):
         network = NativeNet()

From 7b3c3c01af32391f673947bf94af831acff03c49 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sat, 6 Jan 2024 07:46:43 +0800
Subject: [PATCH 65/97] fix native layer concat bug.  (#3112)

add UT testing all input and output shapes

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd_utils/model_format.py            |  2 +-
 source/tests/test_model_format_utils.py | 22 ++++++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format.py
index 83e6ac11fc..131be93121 100644
--- a/deepmd_utils/model_format.py
+++ b/deepmd_utils/model_format.py
@@ -262,7 +262,7 @@ def fn(x):
         if self.resnet and self.w.shape[1] == self.w.shape[0]:
             y += x
         elif self.resnet and self.w.shape[1] == 2 * self.w.shape[0]:
-            y += np.concatenate([x, x], axis=1)
+            y += np.concatenate([x, x], axis=-1)
         return y
 
 
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index 2fef4e1922..af8c4361c8 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -17,18 +17,24 @@
 
 
 class TestNativeLayer(unittest.TestCase):
-    def setUp(self) -> None:
-        self.w = np.full((2, 3), 3.0)
-        self.b = np.full((3,), 4.0)
-        self.idt = np.full((3,), 5.0)
-
     def test_serialize_deserize(self):
-        for ww, bb, idt, activation_function, resnet in itertools.product(
-            [self.w], [self.b, None], [self.idt, None], ["tanh", "none"], [True, False]
+        for (ni, no), bias, ut, activation_function, resnet, ashp in itertools.product(
+            [(5, 5), (5, 10), (5, 9), (9, 5)],
+            [True, False],
+            [True, False],
+            ["tanh", "none"],
+            [True, False],
+            [None, [4], [3, 2]],
         ):
+            ww = np.full((ni, no), 3.0)
+            bb = np.full((no,), 4.0) if bias else None
+            idt = np.full((no,), 5.0) if ut else None
             nl0 = NativeLayer(ww, bb, idt, activation_function, resnet)
             nl1 = NativeLayer.deserialize(nl0.serialize())
-            inp = np.arange(self.w.shape[0])
+            inp_shap = [ww.shape[0]]
+            if ashp is not None:
+                inp_shap = ashp + inp_shap
+            inp = np.arange(np.prod(inp_shap)).reshape(inp_shap)
             np.testing.assert_allclose(nl0.call(inp), nl1.call(inp))
 
 
From f181a3084a04f8e797f5474e14015d46e597e98a Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Mon, 8 Jan 2024 13:45:39 +0800
Subject: [PATCH 66/97] model format for the embedding net (#3113)

- refact the dir structure for the model_format
- change the input of `NativeNet` from `List[NativeLayer]` to the
`List[dict]`

---------

Signed-off-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd_utils/model_format/__init__.py         | 18 +++++
 .../network.py}                               | 66 ++++++++++++++++++-
 source/tests/test_model_format_utils.py       | 12 ++++
 3 files changed, 93 insertions(+), 3 deletions(-)
 create mode 100644 deepmd_utils/model_format/__init__.py
 rename deepmd_utils/{model_format.py => model_format/network.py} (81%)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
new file mode 100644
index 0000000000..4b33aa0151
--- /dev/null
+++ b/deepmd_utils/model_format/__init__.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .network import (
+    EmbeddingNet,
+    NativeLayer,
+    NativeNet,
+    load_dp_model,
+    save_dp_model,
+    traverse_model_dict,
+)
+
+__all__ = [
+    "EmbeddingNet",
+    "NativeLayer",
+    "NativeNet",
+    "load_dp_model",
+    "save_dp_model",
+    "traverse_model_dict",
+]
diff --git a/deepmd_utils/model_format.py b/deepmd_utils/model_format/network.py
similarity index 81%
rename from deepmd_utils/model_format.py
rename to deepmd_utils/model_format/network.py
index 131be93121..04aaa75534 100644
--- a/deepmd_utils/model_format.py
+++ b/deepmd_utils/model_format/network.py
@@ -275,10 +275,10 @@ class NativeNet(NativeOP):
         The layers of the network.
     """
 
-    def __init__(self, layers: Optional[List[NativeLayer]] = None) -> None:
+    def __init__(self, layers: Optional[List[dict]] = None) -> None:
         if layers is None:
             layers = []
-        self.layers = layers
+        self.layers = [NativeLayer.deserialize(layer) for layer in layers]
 
     def serialize(self) -> dict:
         """Serialize the network to a dict.
@@ -299,7 +299,7 @@ def deserialize(cls, data: dict) -> "NativeNet":
         data : dict
             The dict to deserialize from.
         """
-        return cls([NativeLayer.deserialize(layer) for layer in data["layers"]])
+        return cls(data["layers"])
 
     def __getitem__(self, key):
         assert isinstance(key, int)
@@ -329,3 +329,63 @@ def call(self, x: np.ndarray) -> np.ndarray:
         for layer in self.layers:
             x = layer.call(x)
         return x
+
+
+class EmbeddingNet(NativeNet):
+    def __init__(
+        self,
+        in_dim,
+        neuron: List[int] = [24, 48, 96],
+        activation_function: str = "tanh",
+        resnet_dt: bool = False,
+    ):
+        layers = []
+        i_in = in_dim
+        rng = np.random.default_rng()
+        for idx, ii in enumerate(neuron):
+            i_ot = ii
+            layers.append(
+                NativeLayer(
+                    rng.normal(size=(i_in, i_ot)),
+                    b=rng.normal(size=(ii)),
+                    idt=rng.normal(size=(ii)) if resnet_dt else None,
+                    activation_function=activation_function,
+                    resnet=True,
+                ).serialize()
+            )
+            i_in = i_ot
+        super().__init__(layers)
+        self.in_dim = in_dim
+        self.neuron = neuron
+        self.activation_function = activation_function
+        self.resnet_dt = resnet_dt
+
+    def serialize(self) -> dict:
+        """Serialize the network to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized network.
+        """
+        return {
+            "in_dim": self.in_dim,
+            "neuron": self.neuron.copy(),
+            "activation_function": self.activation_function,
+            "resnet_dt": self.resnet_dt,
+            "layers": [layer.serialize() for layer in self.layers],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "EmbeddingNet":
+        """Deserialize the network from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        layers = data.pop("layers")
+        obj = cls(**data)
+        super(EmbeddingNet, obj).__init__(layers)
+        return obj
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index af8c4361c8..f26ebbaa8d 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 from deepmd_utils.model_format import (
+    EmbeddingNet,
     NativeLayer,
     NativeNet,
     load_dp_model,
@@ -90,6 +91,17 @@ def test_deserialize(self):
         np.testing.assert_array_equal(network[0]["resnet"], True)
         np.testing.assert_array_equal(network[1]["resnet"], True)
 
+    def test_embedding_net(self):
+        for ni, idt, act in itertools.product(
+            [1, 10],
+            [True, False],
+            ["tanh", "none"],
+        ):
+            en0 = EmbeddingNet(ni)
+            en1 = EmbeddingNet.deserialize(en0.serialize())
+            inp = np.ones([ni])
+            np.testing.assert_allclose(en0.call(inp), en1.call(inp))
+
 
 class TestDPModel(unittest.TestCase):
     def setUp(self) -> None:

From d1c00749f1a392466a5468ab36e23cd6b2c4588f Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Tue, 9 Jan 2024 08:40:17 +0800
Subject: [PATCH 67/97] support numerical precision and env_mat (#3114)

- change of numerical precision is supported
- environment matrix is supported.

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd_utils/model_format/__init__.py   |   8 ++
 deepmd_utils/model_format/common.py     |  24 +++++
 deepmd_utils/model_format/env_mat.py    | 129 ++++++++++++++++++++++++
 deepmd_utils/model_format/network.py    |  48 ++++++---
 source/tests/test_model_format_utils.py |  53 +++++++++-
 5 files changed, 246 insertions(+), 16 deletions(-)
 create mode 100644 deepmd_utils/model_format/common.py
 create mode 100644 deepmd_utils/model_format/env_mat.py

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 4b33aa0151..533dd9ffff 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -1,4 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from .common import (
+    PRECISION_DICT,
+)
+from .env_mat import (
+    EnvMat,
+)
 from .network import (
     EmbeddingNet,
     NativeLayer,
@@ -9,10 +15,12 @@
 )
 
 __all__ = [
+    "EnvMat",
     "EmbeddingNet",
     "NativeLayer",
     "NativeNet",
     "load_dp_model",
     "save_dp_model",
     "traverse_model_dict",
+    "PRECISION_DICT",
 ]
diff --git a/deepmd_utils/model_format/common.py b/deepmd_utils/model_format/common.py
new file mode 100644
index 0000000000..82beb969c2
--- /dev/null
+++ b/deepmd_utils/model_format/common.py
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+)
+
+import numpy as np
+
+PRECISION_DICT = {
+    "float16": np.float16,
+    "float32": np.float32,
+    "float64": np.float64,
+    "half": np.float16,
+    "single": np.float32,
+    "double": np.float64,
+}
+DEFAULT_PRECISION = "float64"
+
+
+class NativeOP(ABC):
+    """The unit operation of a native model."""
+
+    def call(self, *args, **kwargs):
+        """Forward pass in NumPy implementation."""
+        raise NotImplementedError
diff --git a/deepmd_utils/model_format/env_mat.py b/deepmd_utils/model_format/env_mat.py
new file mode 100644
index 0000000000..84771135a6
--- /dev/null
+++ b/deepmd_utils/model_format/env_mat.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from .common import (
+    NativeOP,
+)
+
+
+def compute_smooth_weight(
+    distance: np.ndarray,
+    rmin: float,
+    rmax: float,
+):
+    """Compute smooth weight for descriptor elements."""
+    min_mask = distance <= rmin
+    max_mask = distance >= rmax
+    mid_mask = np.logical_not(np.logical_or(min_mask, max_mask))
+    uu = (distance - rmin) / (rmax - rmin)
+    vv = uu * uu * uu * (-6.0 * uu * uu + 15.0 * uu - 10.0) + 1.0
+    return vv * mid_mask + min_mask
+
+
+def _make_env_mat(
+    nlist,
+    coord,
+    rcut: float,
+    ruct_smth: float,
+):
+    """Make smooth environment matrix."""
+    nf, nloc, nnei = nlist.shape
+    # nf x nall x 3
+    coord = coord.reshape(nf, -1, 3)
+    mask = nlist >= 0
+    nlist = nlist * mask
+    # nf x (nloc x nnei) x 3
+    index = np.tile(nlist.reshape(nf, -1, 1), (1, 1, 3))
+    coord_r = np.take_along_axis(coord, index, 1)
+    # nf x nloc x nnei x 3
+    coord_r = coord_r.reshape(nf, nloc, nnei, 3)
+    # nf x nloc x 1 x 3
+    coord_l = coord[:, :nloc].reshape(nf, -1, 1, 3)
+    # nf x nloc x nnei x 3
+    diff = coord_r - coord_l
+    # nf x nloc x nnei
+    length = np.linalg.norm(diff, axis=-1, keepdims=True)
+    # for index 0 nloc atom
+    length = length + ~np.expand_dims(mask, -1)
+    t0 = 1 / length
+    t1 = diff / length**2
+    weight = compute_smooth_weight(length, ruct_smth, rcut)
+    env_mat_se_a = np.concatenate([t0, t1], axis=-1) * weight * np.expand_dims(mask, -1)
+    return env_mat_se_a, diff * np.expand_dims(mask, -1), weight
+
+
+class EnvMat(NativeOP):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+    ):
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+
+    def call(
+        self,
+        nlist: np.ndarray,
+        coord_ext: np.ndarray,
+        atype_ext: np.ndarray,
+        davg: Optional[np.ndarray] = None,
+        dstd: Optional[np.ndarray] = None,
+    ) -> Union[np.ndarray, np.ndarray]:
+        """Compute the environment matrix.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        davg
+            The data avg. shape: nt x nnei x 4
+        dstd
+            The inverse of data std. shape: nt x nnei x 4
+
+        Returns
+        -------
+        env_mat
+            The environment matrix. shape: nf x nloc x nnei x 4
+        switch
+            The value of switch function. shape: nf x nloc x nnei
+        """
+        em, sw = self._call(nlist, coord_ext)
+        nf, nloc, nnei = nlist.shape
+        atype = atype_ext[:, :nloc]
+        if davg is not None:
+            em -= davg[atype]
+        if dstd is not None:
+            em /= dstd[atype]
+        return em, sw
+
+    def _call(
+        self,
+        nlist,
+        coord_ext,
+    ):
+        em, diff, ww = _make_env_mat(nlist, coord_ext, self.rcut, self.rcut_smth)
+        return em, ww
+
+    def serialize(
+        self,
+    ) -> dict:
+        return {
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+        }
+
+    @classmethod
+    def deserialize(
+        cls,
+        data: dict,
+    ) -> "EnvMat":
+        return cls(**data)
diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index 04aaa75534..98c35636fa 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -4,9 +4,6 @@
 See issue #2982 for more information.
 """
 import json
-from abc import (
-    ABC,
-)
 from typing import (
     List,
     Optional,
@@ -20,6 +17,12 @@
 except ImportError:
     __version__ = "unknown"
 
+from .common import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+    NativeOP,
+)
+
 
 def traverse_model_dict(model_obj, callback: callable, is_variable: bool = False):
     """Traverse a model dict and call callback on each variable.
@@ -124,14 +127,6 @@ def load_dp_model(filename: str) -> dict:
     return model_dict
 
 
-class NativeOP(ABC):
-    """The unit operation of a native model."""
-
-    def call(self, *args, **kwargs):
-        """Forward pass in NumPy implementation."""
-        raise NotImplementedError
-
-
 class NativeLayer(NativeOP):
     """Native representation of a layer.
 
@@ -156,12 +151,16 @@ def __init__(
         idt: Optional[np.ndarray] = None,
         activation_function: Optional[str] = None,
         resnet: bool = False,
+        precision: str = DEFAULT_PRECISION,
     ) -> None:
-        self.w = w
-        self.b = b
-        self.idt = idt
+        prec = PRECISION_DICT[precision.lower()]
+        self.precision = precision
+        self.w = w.astype(prec) if w is not None else None
+        self.b = b.astype(prec) if b is not None else None
+        self.idt = idt.astype(prec) if idt is not None else None
         self.activation_function = activation_function
         self.resnet = resnet
+        self.check_type_consistency()
 
     def serialize(self) -> dict:
         """Serialize the layer to a dict.
@@ -180,6 +179,7 @@ def serialize(self) -> dict:
         return {
             "activation_function": self.activation_function,
             "resnet": self.resnet,
+            "precision": self.precision,
             "@variables": data,
         }
 
@@ -192,14 +192,28 @@ def deserialize(cls, data: dict) -> "NativeLayer":
         data : dict
             The dict to deserialize from.
         """
+        precision = data.get("precision", DEFAULT_PRECISION)
         return cls(
             w=data["@variables"]["w"],
             b=data["@variables"].get("b", None),
             idt=data["@variables"].get("idt", None),
             activation_function=data["activation_function"],
             resnet=data.get("resnet", False),
+            precision=precision,
         )
 
+    def check_type_consistency(self):
+        precision = self.precision
+
+        def check_var(var):
+            if var is not None:
+                # assertion "float64" == "double" would fail
+                assert PRECISION_DICT[var.dtype.name] is PRECISION_DICT[precision]
+
+        check_var(self.w)
+        check_var(self.b)
+        check_var(self.idt)
+
     def __setitem__(self, key, value):
         if key in ("w", "matrix"):
             self.w = value
@@ -211,6 +225,8 @@ def __setitem__(self, key, value):
             self.activation_function = value
         elif key == "resnet":
             self.resnet = value
+        elif key == "precision":
+            self.precision = value
         else:
             raise KeyError(key)
 
@@ -225,6 +241,8 @@ def __getitem__(self, key):
             return self.activation_function
         elif key == "resnet":
             return self.resnet
+        elif key == "precision":
+            return self.precision
         else:
             raise KeyError(key)
 
@@ -338,6 +356,7 @@ def __init__(
         neuron: List[int] = [24, 48, 96],
         activation_function: str = "tanh",
         resnet_dt: bool = False,
+        precision: str = DEFAULT_PRECISION,
     ):
         layers = []
         i_in = in_dim
@@ -351,6 +370,7 @@ def __init__(
                     idt=rng.normal(size=(ii)) if resnet_dt else None,
                     activation_function=activation_function,
                     resnet=True,
+                    precision=precision,
                 ).serialize()
             )
             i_in = i_ot
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index f26ebbaa8d..aeb717060d 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -10,6 +10,7 @@
 
 from deepmd_utils.model_format import (
     EmbeddingNet,
+    EnvMat,
     NativeLayer,
     NativeNet,
     load_dp_model,
@@ -19,18 +20,22 @@
 
 class TestNativeLayer(unittest.TestCase):
     def test_serialize_deserize(self):
-        for (ni, no), bias, ut, activation_function, resnet, ashp in itertools.product(
+        for (
+            ni,
+            no,
+        ), bias, ut, activation_function, resnet, ashp, prec in itertools.product(
             [(5, 5), (5, 10), (5, 9), (9, 5)],
             [True, False],
             [True, False],
             ["tanh", "none"],
             [True, False],
             [None, [4], [3, 2]],
+            ["float32", "float64", "single", "double"],
         ):
             ww = np.full((ni, no), 3.0)
             bb = np.full((no,), 4.0) if bias else None
             idt = np.full((no,), 5.0) if ut else None
-            nl0 = NativeLayer(ww, bb, idt, activation_function, resnet)
+            nl0 = NativeLayer(ww, bb, idt, activation_function, resnet, prec)
             nl1 = NativeLayer.deserialize(nl0.serialize())
             inp_shap = [ww.shape[0]]
             if ashp is not None:
@@ -134,3 +139,47 @@ def test_save_load_model(self):
     def tearDown(self) -> None:
         if os.path.exists(self.filename):
             os.remove(self.filename)
+
+
+class TestEnvMat(unittest.TestCase):
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, 0, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, 7])
+        self.rcut = 0.4
+        self.rcut_smth = 2.2
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+        em0 = EnvMat(self.rcut, self.rcut_smth)
+        em1 = EnvMat.deserialize(em0.serialize())
+        mm0, ww0 = em0.call(self.nlist, self.coord_ext, self.atype_ext, davg, dstd)
+        mm1, ww1 = em1.call(self.nlist, self.coord_ext, self.atype_ext, davg, dstd)
+        np.testing.assert_allclose(mm0, mm1)
+        np.testing.assert_allclose(ww0, ww1)

From 25cdd1981a76aef357a0c219d5b5dc2db60d91eb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 13:16:16 +0800
Subject: [PATCH 68/97] Bump docker/metadata-action from 5.4.0 to 5.5.0 (#3115)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[docker/metadata-action](https://github.com/docker/metadata-action) from
5.4.0 to 5.5.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/docker/metadata-action/releases">docker/metadata-action's
releases</a>.</em></p>
<blockquote>
<h2>v5.5.0</h2>
<ul>
<li>Set <code>cwd://</code> prefix for bake files path by <a
href="https://github.com/crazy-max"><code>@​crazy-max</code></a> in <a
href="https://redirect.github.com/docker/metadata-action/pull/370">docker/metadata-action#370</a></li>
<li>Bump <code>@​docker/actions-toolkit</code> from 0.16.0 to 0.16.1 in
<a
href="https://redirect.github.com/docker/metadata-action/pull/371">docker/metadata-action#371</a></li>
<li>Bump moment from 2.29.4 to 2.30.1 in <a
href="https://redirect.github.com/docker/metadata-action/pull/373">docker/metadata-action#373</a></li>
<li>Bump moment-timezone from 0.5.43 to 0.5.44 in <a
href="https://redirect.github.com/docker/metadata-action/pull/374">docker/metadata-action#374</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/docker/metadata-action/compare/v5.4.0...v5.5.0">https://github.com/docker/metadata-action/compare/v5.4.0...v5.5.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/docker/metadata-action/commit/dbef88086f6cef02e264edb7dbf63250c17cef6c"><code>dbef880</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/374">#374</a>
from docker/dependabot/npm_and_yarn/moment-timezone-0...</li>
<li><a
href="https://github.com/docker/metadata-action/commit/b73e7a71ace0f6ec603f2c5052d8304f33e5dbff"><code>b73e7a7</code></a>
chore: update generated content</li>
<li><a
href="https://github.com/docker/metadata-action/commit/b9fba690eb63fbb6eee7f71bd710b75d4f0128c8"><code>b9fba69</code></a>
chore(deps): Bump moment-timezone from 0.5.43 to 0.5.44</li>
<li><a
href="https://github.com/docker/metadata-action/commit/ac82374ba6046fefd36df45f67cec12fa6ebeb75"><code>ac82374</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/373">#373</a>
from docker/dependabot/npm_and_yarn/moment-2.30.1</li>
<li><a
href="https://github.com/docker/metadata-action/commit/c92519a44ee7b643d904a587e47563f49f24b387"><code>c92519a</code></a>
chore: update generated content</li>
<li><a
href="https://github.com/docker/metadata-action/commit/3b4179d34de6b2ba21a306f387c50672dbbe8612"><code>3b4179d</code></a>
chore(deps): Bump moment from 2.29.4 to 2.30.1</li>
<li><a
href="https://github.com/docker/metadata-action/commit/0784993ef80dda711a73b98a352e2b73c7feaf32"><code>0784993</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/371">#371</a>
from docker/dependabot/npm_and_yarn/docker/actions-to...</li>
<li><a
href="https://github.com/docker/metadata-action/commit/52c3e9e4100b351cb25a8ec886015c375515f619"><code>52c3e9e</code></a>
chore: update generated content</li>
<li><a
href="https://github.com/docker/metadata-action/commit/82a5e6726cc813b23b85149bf897c55710d45705"><code>82a5e67</code></a>
chore(deps): Bump <code>@​docker/actions-toolkit</code> from 0.16.0 to
0.16.1</li>
<li><a
href="https://github.com/docker/metadata-action/commit/41e1fe3437fd5c8d22c24a824307673223ead782"><code>41e1fe3</code></a>
Merge pull request <a
href="https://redirect.github.com/docker/metadata-action/issues/370">#370</a>
from crazy-max/bake-cwd</li>
<li>Additional commits viewable in <a
href="https://github.com/docker/metadata-action/compare/9dc751fe249ad99385a2583ee0d084c400eee04e...dbef88086f6cef02e264edb7dbf63250c17cef6c">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/metadata-action&package-manager=github_actions&previous-version=5.4.0&new-version=5.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index e47f14b1f4..23076e9bf5 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -144,7 +144,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@9dc751fe249ad99385a2583ee0d084c400eee04e
+        uses: docker/metadata-action@dbef88086f6cef02e264edb7dbf63250c17cef6c
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 

From a971d928b0fe266913565dd51003fd876efc88a9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 13:17:25 +0800
Subject: [PATCH 69/97] [pre-commit.ci] pre-commit autoupdate (#3116)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.9 →
v0.1.11](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.9...v0.1.11)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index efa2bc1675..5b6beb1dba 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.9
+    rev: v0.1.11
     hooks:
     - id: ruff
       args: ["--fix"]

From 438bc78fdb48004729fc5e76e5b1515600604d3d Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Wed, 10 Jan 2024 10:00:47 +0800
Subject: [PATCH 70/97] Add dp model format sea (#3123)

- add precision test for embedding net

Limitations
- only support `type_one_side`
- does not support type embedding and `stripped_type_embedding`
- does not support `exclude_types`
- does not support spin

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/__init__.py   |   6 +
 deepmd_utils/model_format/network.py    |   2 +
 deepmd_utils/model_format/se_e2_a.py    | 195 ++++++++++++++++++++++++
 source/tests/test_model_format_utils.py |  45 +++++-
 4 files changed, 243 insertions(+), 5 deletions(-)
 create mode 100644 deepmd_utils/model_format/se_e2_a.py

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 533dd9ffff..40769f187d 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from .common import (
+    DEFAULT_PRECISION,
     PRECISION_DICT,
 )
 from .env_mat import (
@@ -13,8 +14,12 @@
     save_dp_model,
     traverse_model_dict,
 )
+from .se_e2_a import (
+    DescrptSeA,
+)
 
 __all__ = [
+    "DescrptSeA",
     "EnvMat",
     "EmbeddingNet",
     "NativeLayer",
@@ -23,4 +28,5 @@
     "save_dp_model",
     "traverse_model_dict",
     "PRECISION_DICT",
+    "DEFAULT_PRECISION",
 ]
diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index 98c35636fa..682a349476 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -379,6 +379,7 @@ def __init__(
         self.neuron = neuron
         self.activation_function = activation_function
         self.resnet_dt = resnet_dt
+        self.precision = precision
 
     def serialize(self) -> dict:
         """Serialize the network to a dict.
@@ -393,6 +394,7 @@ def serialize(self) -> dict:
             "neuron": self.neuron.copy(),
             "activation_function": self.activation_function,
             "resnet_dt": self.resnet_dt,
+            "precision": self.precision,
             "layers": [layer.serialize() for layer in self.layers],
         }
 
diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd_utils/model_format/se_e2_a.py
new file mode 100644
index 0000000000..114f9df915
--- /dev/null
+++ b/deepmd_utils/model_format/se_e2_a.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+try:
+    from deepmd_utils._version import version as __version__
+except ImportError:
+    __version__ = "unknown"
+
+from typing import (
+    Any,
+    List,
+    Optional,
+)
+
+from .common import (
+    DEFAULT_PRECISION,
+    NativeOP,
+)
+from .env_mat import (
+    EnvMat,
+)
+from .network import (
+    EmbeddingNet,
+)
+
+
+class DescrptSeA(NativeOP):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        spin: Optional[Any] = None,
+        stripped_type_embedding: bool = False,
+    ) -> None:
+        ## seed, uniform_seed, multi_task, not included.
+        if not type_one_side:
+            raise NotImplementedError("type_one_side == False not implemented")
+        if stripped_type_embedding:
+            raise NotImplementedError("stripped_type_embedding is not implemented")
+        if exclude_types != []:
+            raise NotImplementedError("exclude_types is not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.sel = sel
+        self.ntypes = len(self.sel)
+        self.neuron = neuron
+        self.axis_neuron = axis_neuron
+        self.resnet_dt = resnet_dt
+        self.trainable = trainable
+        self.type_one_side = type_one_side
+        self.exclude_types = exclude_types
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.spin = spin
+        self.stripped_type_embedding = stripped_type_embedding
+
+        in_dim = 1  # not considiering type embedding
+        self.embeddings = []
+        for ii in range(self.ntypes):
+            self.embeddings.append(
+                EmbeddingNet(
+                    in_dim,
+                    self.neuron,
+                    self.activation_function,
+                    self.resnet_dt,
+                    self.precision,
+                )
+            )
+        self.env_mat = EnvMat(self.rcut, self.rcut_smth)
+        self.nnei = np.sum(self.sel)
+        self.nneix4 = self.nnei * 4
+        self.davg = np.zeros([self.ntypes, self.nneix4])
+        self.dstd = np.ones([self.ntypes, self.nneix4])
+        self.orig_sel = self.sel
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.davg = value
+        elif key in ("std", "data_std", "dstd"):
+            self.dstd = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.davg
+        elif key in ("std", "data_std", "dstd"):
+            return self.dstd
+        else:
+            raise KeyError(key)
+
+    def cal_g(
+        self,
+        ss,
+        ll,
+    ):
+        nf, nloc, nnei = ss.shape[0:3]
+        ss = ss.reshape(nf, nloc, nnei, 1)
+        # nf x nloc x nnei x ng
+        gg = self.embeddings[ll].call(ss)
+        return gg
+
+    def call(
+        self,
+        coord_ext,
+        atype_ext,
+        nlist,
+    ):
+        """Compute the environment matrix.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x ng x axis_neuron
+        """
+        # nf x nloc x nnei x 4
+        rr, ww = self.env_mat.call(nlist, coord_ext, atype_ext, self.davg, self.dstd)
+        nf, nloc, nnei, _ = rr.shape
+        sec = np.append([0], np.cumsum(self.sel))
+
+        ng = self.neuron[-1]
+        gr = np.zeros([nf, nloc, ng, 4])
+        for tt in range(self.ntypes):
+            tr = rr[:, :, sec[tt] : sec[tt + 1], :]
+            ss = tr[..., 0:1]
+            gg = self.cal_g(ss, tt)
+            # nf x nloc x ng x 4
+            gr += np.einsum("flni,flnj->flij", gg, tr)
+        gr /= self.nnei
+        gr1 = gr[:, :, : self.axis_neuron, :]
+        # nf x nloc x ng x ng1
+        grrg = np.einsum("flid,fljd->flij", gr, gr1)
+        # nf x nloc x (ng x ng1)
+        grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron)
+        return grrg
+
+    def serialize(self) -> dict:
+        return {
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "neuron": self.neuron,
+            "axis_neuron": self.axis_neuron,
+            "resnet_dt": self.resnet_dt,
+            "trainable": self.trainable,
+            "type_one_side": self.type_one_side,
+            "exclude_types": self.exclude_types,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "spin": self.spin,
+            "stripped_type_embedding": self.stripped_type_embedding,
+            "env_mat": self.env_mat.serialize(),
+            "embeddings": [ii.serialize() for ii in self.embeddings],
+            "@variables": {
+                "davg": self.davg,
+                "dstd": self.dstd,
+            },
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeA":
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        obj["davg"] = variables["davg"]
+        obj["dstd"] = variables["dstd"]
+        obj.embeddings = [EmbeddingNet.deserialize(dd) for dd in embeddings]
+        obj.env_mat = EnvMat.deserialize(env_mat)
+        return obj
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index aeb717060d..7fd93c1366 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 from deepmd_utils.model_format import (
+    DescrptSeA,
     EmbeddingNet,
     EnvMat,
     NativeLayer,
@@ -97,12 +98,18 @@ def test_deserialize(self):
         np.testing.assert_array_equal(network[1]["resnet"], True)
 
     def test_embedding_net(self):
-        for ni, idt, act in itertools.product(
+        for ni, act, idt, prec in itertools.product(
             [1, 10],
-            [True, False],
             ["tanh", "none"],
+            [True, False],
+            ["double", "single"],
         ):
-            en0 = EmbeddingNet(ni)
+            en0 = EmbeddingNet(
+                ni,
+                activation_function=act,
+                precision=prec,
+                resnet_dt=idt,
+            )
             en1 = EmbeddingNet.deserialize(en0.serialize())
             inp = np.ones([ni])
             np.testing.assert_allclose(en0.call(inp), en1.call(inp))
@@ -141,7 +148,7 @@ def tearDown(self) -> None:
             os.remove(self.filename)
 
 
-class TestEnvMat(unittest.TestCase):
+class TestCaseSingleFrameWithNlist:
     def setUp(self):
         # nloc == 3, nall == 4
         self.nloc = 3
@@ -158,6 +165,7 @@ def setUp(self):
         ).reshape([1, self.nall * 3])
         self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
         # sel = [5, 2]
+        self.sel = [5, 2]
         self.nlist = np.array(
             [
                 [1, 3, -1, -1, -1, 2, -1],
@@ -165,10 +173,15 @@ def setUp(self):
                 [0, 1, -1, -1, -1, 0, -1],
             ],
             dtype=int,
-        ).reshape([1, self.nloc, 7])
+        ).reshape([1, self.nloc, sum(self.sel)])
         self.rcut = 0.4
         self.rcut_smth = 2.2
 
+
+class TestEnvMat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
     def test_self_consistency(
         self,
     ):
@@ -183,3 +196,25 @@ def test_self_consistency(
         mm1, ww1 = em1.call(self.nlist, self.coord_ext, self.atype_ext, davg, dstd)
         np.testing.assert_allclose(mm0, mm1)
         np.testing.assert_allclose(ww0, ww1)
+
+
+class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        em0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        em0.davg = davg
+        em0.dstd = dstd
+        em1 = DescrptSeA.deserialize(em0.serialize())
+        mm0 = em0.call(self.coord_ext, self.atype_ext, self.nlist)
+        mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
+        np.testing.assert_allclose(mm0, mm1)

From dac64cf9987d079db789e034ed0e75bddda0c6a5 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Wed, 10 Jan 2024 11:27:49 +0800
Subject: [PATCH 71/97] input order of env_mat changed to be consistent with
 descriptor (#3125)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/env_mat.py    | 2 +-
 deepmd_utils/model_format/se_e2_a.py    | 2 +-
 source/tests/test_model_format_utils.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/deepmd_utils/model_format/env_mat.py b/deepmd_utils/model_format/env_mat.py
index 84771135a6..7822bd7d0c 100644
--- a/deepmd_utils/model_format/env_mat.py
+++ b/deepmd_utils/model_format/env_mat.py
@@ -68,9 +68,9 @@ def __init__(
 
     def call(
         self,
-        nlist: np.ndarray,
         coord_ext: np.ndarray,
         atype_ext: np.ndarray,
+        nlist: np.ndarray,
         davg: Optional[np.ndarray] = None,
         dstd: Optional[np.ndarray] = None,
     ) -> Union[np.ndarray, np.ndarray]:
diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd_utils/model_format/se_e2_a.py
index 114f9df915..5a4fe15a2d 100644
--- a/deepmd_utils/model_format/se_e2_a.py
+++ b/deepmd_utils/model_format/se_e2_a.py
@@ -137,7 +137,7 @@ def call(
             The descriptor. shape: nf x nloc x ng x axis_neuron
         """
         # nf x nloc x nnei x 4
-        rr, ww = self.env_mat.call(nlist, coord_ext, atype_ext, self.davg, self.dstd)
+        rr, ww = self.env_mat.call(coord_ext, atype_ext, nlist, self.davg, self.dstd)
         nf, nloc, nnei, _ = rr.shape
         sec = np.append([0], np.cumsum(self.sel))
 
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index 7fd93c1366..ac37120e83 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -192,8 +192,8 @@ def test_self_consistency(
         dstd = 0.1 + np.abs(dstd)
         em0 = EnvMat(self.rcut, self.rcut_smth)
         em1 = EnvMat.deserialize(em0.serialize())
-        mm0, ww0 = em0.call(self.nlist, self.coord_ext, self.atype_ext, davg, dstd)
-        mm1, ww1 = em1.call(self.nlist, self.coord_ext, self.atype_ext, davg, dstd)
+        mm0, ww0 = em0.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
+        mm1, ww1 = em1.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
         np.testing.assert_allclose(mm0, mm1)
         np.testing.assert_allclose(ww0, ww1)
 

From 398f037834e8f3b082edd8916218d16b9a5f8cec Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Wed, 10 Jan 2024 11:28:19 +0800
Subject: [PATCH 72/97] doc string for dp model format descriptor se_e2_a
 (#3124)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/se_e2_a.py | 96 ++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 6 deletions(-)

diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd_utils/model_format/se_e2_a.py
index 5a4fe15a2d..624f30c122 100644
--- a/deepmd_utils/model_format/se_e2_a.py
+++ b/deepmd_utils/model_format/se_e2_a.py
@@ -25,6 +25,95 @@
 
 
 class DescrptSeA(NativeOP):
+    r"""DeepPot-SE constructed from all information (both angular and radial) of
+    atomic configurations. The embedding takes the distance between atoms as input.
+
+    The descriptor :math:`\mathcal{D}^i \in \mathcal{R}^{M_1 \times M_2}` is given by [1]_
+
+    .. math::
+        \mathcal{D}^i = (\mathcal{G}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \mathcal{G}^i_<
+
+    where :math:`\mathcal{R}^i \in \mathbb{R}^{N \times 4}` is the coordinate
+    matrix, and each row of :math:`\mathcal{R}^i` can be constructed as follows
+
+    .. math::
+        (\mathcal{R}^i)_j = [
+        \begin{array}{c}
+            s(r_{ji}) & \frac{s(r_{ji})x_{ji}}{r_{ji}} & \frac{s(r_{ji})y_{ji}}{r_{ji}} & \frac{s(r_{ji})z_{ji}}{r_{ji}}
+        \end{array}
+        ]
+
+    where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is
+    the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm.
+    The switching function :math:`s(r)` is defined as:
+
+    .. math::
+        s(r)=
+        \begin{cases}
+        \frac{1}{r}, & r<r_s \\
+        \frac{1}{r} \{ {(\frac{r - r_s}{ r_c - r_s})}^3 (-6 {(\frac{r - r_s}{ r_c - r_s})}^2 +15 \frac{r - r_s}{ r_c - r_s} -10) +1 \}, & r_s \leq r<r_c \\
+        0, & r \geq r_c
+        \end{cases}
+
+    Each row of the embedding matrix  :math:`\mathcal{G}^i \in \mathbb{R}^{N \times M_1}` consists of outputs
+    of a embedding network :math:`\mathcal{N}` of :math:`s(r_{ji})`:
+
+    .. math::
+        (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji}))
+
+    :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
+    :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
+    :meth:`deepmd.utils.network.embedding_net`.
+
+    Parameters
+    ----------
+    rcut
+            The cut-off radius :math:`r_c`
+    rcut_smth
+            From where the environment matrix should be smoothed :math:`r_s`
+    sel : list[str]
+            sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+    axis_neuron
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    trainable
+            If the weights of embedding net are trainable.
+    type_one_side
+            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
+    exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    set_davg_zero
+            Set the shift of embedding net input to zero.
+    activation_function
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    multi_task
+            If the model has multi fitting nets to train.
+    spin
+            The deepspin object.
+
+    Limitations
+    -----------
+    The currently implementation does not support the following features
+
+    1. type_one_side == False
+    2. exclude_types != []
+    3. spin is not None
+
+    References
+    ----------
+    .. [1] Linfeng Zhang, Jiequn Han, Han Wang, Wissam A. Saidi, Roberto Car, and E. Weinan. 2018.
+       End-to-end symmetry preserving inter-atomic potential energy model for finite and extended
+       systems. In Proceedings of the 32nd International Conference on Neural Information Processing
+       Systems (NIPS'18). Curran Associates Inc., Red Hook, NY, USA, 4441-4451.
+    """
+
     def __init__(
         self,
         rcut: float,
@@ -40,13 +129,10 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
-        stripped_type_embedding: bool = False,
     ) -> None:
         ## seed, uniform_seed, multi_task, not included.
         if not type_one_side:
             raise NotImplementedError("type_one_side == False not implemented")
-        if stripped_type_embedding:
-            raise NotImplementedError("stripped_type_embedding is not implemented")
         if exclude_types != []:
             raise NotImplementedError("exclude_types is not implemented")
         if spin is not None:
@@ -66,7 +152,6 @@ def __init__(
         self.activation_function = activation_function
         self.precision = precision
         self.spin = spin
-        self.stripped_type_embedding = stripped_type_embedding
 
         in_dim = 1  # not considiering type embedding
         self.embeddings = []
@@ -120,7 +205,7 @@ def call(
         atype_ext,
         nlist,
     ):
-        """Compute the environment matrix.
+        """Compute the descriptor.
 
         Parameters
         ----------
@@ -172,7 +257,6 @@ def serialize(self) -> dict:
             "activation_function": self.activation_function,
             "precision": self.precision,
             "spin": self.spin,
-            "stripped_type_embedding": self.stripped_type_embedding,
             "env_mat": self.env_mat.serialize(),
             "embeddings": [ii.serialize() for ii in self.embeddings],
             "@variables": {

From 43f96398d0be51c6226c694945ecd181c6ee1015 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 10 Jan 2024 01:07:41 -0500
Subject: [PATCH 73/97] add native Networks for mutiple Network classes (#3117)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd_utils/model_format/__init__.py   |   2 +
 deepmd_utils/model_format/network.py    | 112 ++++++++++++++++++++++++
 deepmd_utils/model_format/se_e2_a.py    |  27 +++---
 source/tests/test_model_format_utils.py |  65 ++++++++++++++
 4 files changed, 194 insertions(+), 12 deletions(-)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 40769f187d..d814f56b1a 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -10,6 +10,7 @@
     EmbeddingNet,
     NativeLayer,
     NativeNet,
+    NetworkCollection,
     load_dp_model,
     save_dp_model,
     traverse_model_dict,
@@ -24,6 +25,7 @@
     "EmbeddingNet",
     "NativeLayer",
     "NativeNet",
+    "NetworkCollection",
     "load_dp_model",
     "save_dp_model",
     "traverse_model_dict",
diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index 682a349476..c587b08cf6 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -3,10 +3,14 @@
 
 See issue #2982 for more information.
 """
+import itertools
 import json
 from typing import (
+    ClassVar,
+    Dict,
     List,
     Optional,
+    Union,
 )
 
 import h5py
@@ -411,3 +415,111 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
         obj = cls(**data)
         super(EmbeddingNet, obj).__init__(layers)
         return obj
+
+
+class NetworkCollection:
+    """A collection of networks for multiple elements.
+
+    The number of dimesions for types might be 0, 1, or 2.
+    - 0: embedding or fitting with type embedding, in ()
+    - 1: embedding with type_one_side, or fitting, in (type_i)
+    - 2: embedding without type_one_side, in (type_i, type_j)
+
+    Parameters
+    ----------
+    ndim : int
+        The number of dimensions.
+    network_type : str, optional
+        The type of the network.
+    networks : dict, optional
+        The networks to initialize with.
+    """
+
+    # subclass may override this
+    NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
+        "network": NativeNet,
+        "embedding_network": EmbeddingNet,
+    }
+
+    def __init__(
+        self,
+        ndim: int,
+        ntypes: int,
+        network_type: str = "network",
+        networks: List[Union[NativeNet, dict]] = [],
+    ):
+        self.ndim = ndim
+        self.ntypes = ntypes
+        self.network_type = self.NETWORK_TYPE_MAP[network_type]
+        self._networks = [None for ii in range(ntypes**ndim)]
+        for ii, network in enumerate(networks):
+            self[ii] = network
+        if len(networks):
+            self.check_completeness()
+
+    def check_completeness(self):
+        """Check whether the collection is complete.
+
+        Raises
+        ------
+        RuntimeError
+            If the collection is incomplete.
+        """
+        for tt in itertools.product(range(self.ntypes), repeat=self.ndim):
+            if self[tuple(tt)] is None:
+                raise RuntimeError(f"network for {tt} not found")
+
+    def _convert_key(self, key):
+        if isinstance(key, int):
+            idx = key
+        else:
+            if isinstance(key, tuple):
+                pass
+            elif isinstance(key, str):
+                key = tuple([int(tt) for tt in key.split("_")[1:]])
+            else:
+                raise TypeError(key)
+            assert isinstance(key, tuple)
+            assert len(key) == self.ndim
+            idx = sum([tt * self.ntypes**ii for ii, tt in enumerate(key)])
+        return idx
+
+    def __getitem__(self, key):
+        return self._networks[self._convert_key(key)]
+
+    def __setitem__(self, key, value):
+        if isinstance(value, self.network_type):
+            pass
+        elif isinstance(value, dict):
+            value = self.network_type.deserialize(value)
+        else:
+            raise TypeError(value)
+        self._networks[self._convert_key(key)] = value
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        network_type_map_inv = {v: k for k, v in self.NETWORK_TYPE_MAP.items()}
+        network_type_name = network_type_map_inv[self.network_type]
+        return {
+            "ndim": self.ndim,
+            "ntypes": self.ntypes,
+            "network_type": network_type_name,
+            "networks": [nn.serialize() for nn in self._networks],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "NetworkCollection":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        return cls(**data)
diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd_utils/model_format/se_e2_a.py
index 624f30c122..a34694a882 100644
--- a/deepmd_utils/model_format/se_e2_a.py
+++ b/deepmd_utils/model_format/se_e2_a.py
@@ -21,6 +21,7 @@
 )
 from .network import (
     EmbeddingNet,
+    NetworkCollection,
 )
 
 
@@ -154,16 +155,18 @@ def __init__(
         self.spin = spin
 
         in_dim = 1  # not considiering type embedding
-        self.embeddings = []
+        self.embeddings = NetworkCollection(
+            ntypes=self.ntypes,
+            ndim=(1 if self.type_one_side else 2),
+            network_type="embedding_network",
+        )
         for ii in range(self.ntypes):
-            self.embeddings.append(
-                EmbeddingNet(
-                    in_dim,
-                    self.neuron,
-                    self.activation_function,
-                    self.resnet_dt,
-                    self.precision,
-                )
+            self.embeddings[(ii,)] = EmbeddingNet(
+                in_dim,
+                self.neuron,
+                self.activation_function,
+                self.resnet_dt,
+                self.precision,
             )
         self.env_mat = EnvMat(self.rcut, self.rcut_smth)
         self.nnei = np.sum(self.sel)
@@ -196,7 +199,7 @@ def cal_g(
         nf, nloc, nnei = ss.shape[0:3]
         ss = ss.reshape(nf, nloc, nnei, 1)
         # nf x nloc x nnei x ng
-        gg = self.embeddings[ll].call(ss)
+        gg = self.embeddings[(ll,)].call(ss)
         return gg
 
     def call(
@@ -258,7 +261,7 @@ def serialize(self) -> dict:
             "precision": self.precision,
             "spin": self.spin,
             "env_mat": self.env_mat.serialize(),
-            "embeddings": [ii.serialize() for ii in self.embeddings],
+            "embeddings": self.embeddings.serialize(),
             "@variables": {
                 "davg": self.davg,
                 "dstd": self.dstd,
@@ -274,6 +277,6 @@ def deserialize(cls, data: dict) -> "DescrptSeA":
 
         obj["davg"] = variables["davg"]
         obj["dstd"] = variables["dstd"]
-        obj.embeddings = [EmbeddingNet.deserialize(dd) for dd in embeddings]
+        obj.embeddings = NetworkCollection.deserialize(embeddings)
         obj.env_mat = EnvMat.deserialize(env_mat)
         return obj
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index ac37120e83..98138fee17 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -14,6 +14,7 @@
     EnvMat,
     NativeLayer,
     NativeNet,
+    NetworkCollection,
     load_dp_model,
     save_dp_model,
 )
@@ -115,6 +116,70 @@ def test_embedding_net(self):
             np.testing.assert_allclose(en0.call(inp), en1.call(inp))
 
 
+class TestNetworkCollection(unittest.TestCase):
+    def setUp(self) -> None:
+        w = np.full((2, 3), 3.0)
+        b = np.full((3,), 4.0)
+        self.network = {
+            "layers": [
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": w, "b": b},
+                },
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": w, "b": b},
+                },
+            ],
+        }
+
+    def test_two_dim(self):
+        networks = NetworkCollection(ndim=2, ntypes=2)
+        networks[(0, 0)] = self.network
+        networks[(1, 1)] = self.network
+        networks[(0, 1)] = self.network
+        with self.assertRaises(RuntimeError):
+            networks.check_completeness()
+        networks[(1, 0)] = self.network
+        networks.check_completeness()
+        np.testing.assert_equal(
+            networks.serialize(),
+            NetworkCollection.deserialize(networks.serialize()).serialize(),
+        )
+        np.testing.assert_equal(
+            networks[(0, 0)].serialize(), networks.serialize()["networks"][0]
+        )
+
+    def test_one_dim(self):
+        networks = NetworkCollection(ndim=1, ntypes=2)
+        networks[(0,)] = self.network
+        with self.assertRaises(RuntimeError):
+            networks.check_completeness()
+        networks[(1,)] = self.network
+        networks.check_completeness()
+        np.testing.assert_equal(
+            networks.serialize(),
+            NetworkCollection.deserialize(networks.serialize()).serialize(),
+        )
+        np.testing.assert_equal(
+            networks[(0,)].serialize(), networks.serialize()["networks"][0]
+        )
+
+    def test_zero_dim(self):
+        networks = NetworkCollection(ndim=0, ntypes=2)
+        networks[()] = self.network
+        networks.check_completeness()
+        np.testing.assert_equal(
+            networks.serialize(),
+            NetworkCollection.deserialize(networks.serialize()).serialize(),
+        )
+        np.testing.assert_equal(
+            networks[()].serialize(), networks.serialize()["networks"][0]
+        )
+
+
 class TestDPModel(unittest.TestCase):
     def setUp(self) -> None:
         self.w = np.full((3, 2), 3.0)

From ff5845646c72e59c28e6325ecebfec5f9ce7b50a Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 12 Jan 2024 09:10:22 +0800
Subject: [PATCH 74/97] add definition for the output of fitting and model
 (#3128)

Signed-off-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: CodiumAI-Agent <137281646+CodiumAI-Agent@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd_utils/model_format/__init__.py   |  14 ++
 deepmd_utils/model_format/output_def.py | 278 ++++++++++++++++++++++++
 source/tests/test_output_def.py         | 241 ++++++++++++++++++++
 3 files changed, 533 insertions(+)
 create mode 100644 deepmd_utils/model_format/output_def.py
 create mode 100644 source/tests/test_output_def.py

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index d814f56b1a..356eaaf4fa 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -15,6 +15,14 @@
     save_dp_model,
     traverse_model_dict,
 )
+from .output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+    VariableDef,
+    fitting_check_output,
+    model_check_output,
+)
 from .se_e2_a import (
     DescrptSeA,
 )
@@ -31,4 +39,10 @@
     "traverse_model_dict",
     "PRECISION_DICT",
     "DEFAULT_PRECISION",
+    "ModelOutputDef",
+    "FittingOutputDef",
+    "OutputVariableDef",
+    "VariableDef",
+    "model_check_output",
+    "fitting_check_output",
 ]
diff --git a/deepmd_utils/model_format/output_def.py b/deepmd_utils/model_format/output_def.py
new file mode 100644
index 0000000000..f4fcdce3ca
--- /dev/null
+++ b/deepmd_utils/model_format/output_def.py
@@ -0,0 +1,278 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Tuple,
+    Union,
+)
+
+
+def check_var(var, var_def):
+    if var_def.atomic:
+        # var.shape == [nf, nloc, *var_def.shape]
+        if len(var.shape) != len(var_def.shape) + 2:
+            raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}")
+        if list(var.shape[2:]) != var_def.shape:
+            raise ValueError(f"{var.shape[2:]} not matching def {var_def.shape}")
+    else:
+        # var.shape == [nf, *var_def.shape]
+        if len(var.shape) != len(var_def.shape) + 1:
+            raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}")
+        if list(var.shape[1:]) != var_def.shape:
+            raise ValueError(f"{var.shape[1:]} not matching def {var_def.shape}")
+
+
+def model_check_output(cls):
+    """Check if the output of the Model is consistent with the definition.
+
+    Two methods are assumed to be provided by the Model:
+    1. Model.output_def that gives the output definition.
+    2. Model.forward that defines the forward path of the model.
+
+    """
+
+    class wrapper(cls):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            self.md = cls.output_def(self)
+
+        def forward(
+            self,
+            *args,
+            **kwargs,
+        ):
+            ret = cls.forward(self, *args, **kwargs)
+            for kk in self.md.keys_outp():
+                dd = self.md[kk]
+                check_var(ret[kk], dd)
+                if dd.reduciable:
+                    rk = get_reduce_name(kk)
+                    check_var(ret[rk], self.md[rk])
+                if dd.differentiable:
+                    dnr, dnc = get_deriv_name(kk)
+                    check_var(ret[dnr], self.md[dnr])
+                    check_var(ret[dnc], self.md[dnc])
+            return ret
+
+    return wrapper
+
+
+def fitting_check_output(cls):
+    """Check if the output of the Fitting is consistent with the definition.
+
+    Two methods are assumed to be provided by the Fitting:
+    1. Fitting.output_def that gives the output definition.
+    2. Fitting.forward defines the forward path of the fitting.
+
+    """
+
+    class wrapper(cls):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            self.md = cls.output_def(self)
+
+        def forward(
+            self,
+            *args,
+            **kwargs,
+        ):
+            ret = cls.forward(self, *args, **kwargs)
+            for kk in self.md.keys():
+                dd = self.md[kk]
+                check_var(ret[kk], dd)
+            return ret
+
+    return wrapper
+
+
+class VariableDef:
+    """Defines the shape and other properties of a variable.
+
+    Parameters
+    ----------
+    name
+          Name of the output variable. Notice that the xxxx_redu,
+          xxxx_derv_c, xxxx_derv_r are reserved names that should
+          not be used to define variables.
+    shape
+          The shape of the variable. e.g. energy should be [1],
+          dipole should be [3], polarizabilty should be [3,3].
+    atomic
+          If the variable is defined for each atom.
+
+    """
+
+    def __init__(
+        self,
+        name: str,
+        shape: Union[List[int], Tuple[int]],
+        atomic: bool = True,
+    ):
+        self.name = name
+        self.shape = list(shape)
+        self.atomic = atomic
+
+
+class OutputVariableDef(VariableDef):
+    """Defines the shape and other properties of the one output variable.
+
+    It is assume that the fitting network output variables for each
+    local atom. This class defines one output variable, including its
+    name, shape, reducibility and differentiability.
+
+    Parameters
+    ----------
+    name
+          Name of the output variable. Notice that the xxxx_redu,
+          xxxx_derv_c, xxxx_derv_r are reserved names that should
+          not be used to define variables.
+    shape
+          The shape of the variable. e.g. energy should be [1],
+          dipole should be [3], polarizabilty should be [3,3].
+    reduciable
+          If the variable is reduced.
+    differentiable
+          If the variable is differentiated with respect to coordinates
+          of atoms and cell tensor (pbc case). Only reduciable variable
+          are differentiable.
+
+    """
+
+    def __init__(
+        self,
+        name: str,
+        shape: Union[List[int], Tuple[int]],
+        reduciable: bool = False,
+        differentiable: bool = False,
+    ):
+        # fitting output must be atomic
+        super().__init__(name, shape, atomic=True)
+        self.reduciable = reduciable
+        self.differentiable = differentiable
+        if not self.reduciable and self.differentiable:
+            raise ValueError("only reduciable variable are differentiable")
+
+
+class FittingOutputDef:
+    """Defines the shapes and other properties of the fitting network outputs.
+
+    It is assume that the fitting network output variables for each
+    local atom. This class defines all the outputs.
+
+    Parameters
+    ----------
+    var_defs
+          List of output variable definitions.
+
+    """
+
+    def __init__(
+        self,
+        var_defs: List[OutputVariableDef] = [],
+    ):
+        self.var_defs = {vv.name: vv for vv in var_defs}
+
+    def __getitem__(
+        self,
+        key,
+    ) -> OutputVariableDef:
+        return self.var_defs[key]
+
+    def get_data(self) -> Dict[str, OutputVariableDef]:
+        return self.var_defs
+
+    def keys(self):
+        return self.var_defs.keys()
+
+
+class ModelOutputDef:
+    """Defines the shapes and other properties of the model outputs.
+
+    The model reduce and differentiate fitting outputs if applicable.
+    If a variable is named by foo, then the reduced variable is called
+    foo_redu, the derivative w.r.t. coordinates is called foo_derv_r
+    and the derivative w.r.t. cell is called foo_derv_c.
+
+    Parameters
+    ----------
+    fit_defs
+          Definition for the fitting net output
+
+    """
+
+    def __init__(
+        self,
+        fit_defs: FittingOutputDef,
+    ):
+        self.def_outp = fit_defs
+        self.def_redu = do_reduce(self.def_outp)
+        self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp)
+        self.var_defs = {}
+        for ii in [
+            self.def_outp.get_data(),
+            self.def_redu,
+            self.def_derv_c,
+            self.def_derv_r,
+        ]:
+            self.var_defs.update(ii)
+
+    def __getitem__(self, key) -> VariableDef:
+        return self.var_defs[key]
+
+    def get_data(self, key) -> Dict[str, VariableDef]:
+        return self.var_defs
+
+    def keys(self):
+        return self.var_defs.keys()
+
+    def keys_outp(self):
+        return self.def_outp.keys()
+
+    def keys_redu(self):
+        return self.def_redu.keys()
+
+    def keys_derv_r(self):
+        return self.def_derv_r.keys()
+
+    def keys_derv_c(self):
+        return self.def_derv_c.keys()
+
+
+def get_reduce_name(name):
+    return name + "_redu"
+
+
+def get_deriv_name(name):
+    return name + "_derv_r", name + "_derv_c"
+
+
+def do_reduce(
+    def_outp,
+):
+    def_redu = {}
+    for kk, vv in def_outp.get_data().items():
+        if vv.reduciable:
+            rk = get_reduce_name(kk)
+            def_redu[rk] = VariableDef(rk, vv.shape, atomic=False)
+    return def_redu
+
+
+def do_derivative(
+    def_outp,
+):
+    def_derv_r = {}
+    def_derv_c = {}
+    for kk, vv in def_outp.get_data().items():
+        if vv.differentiable:
+            rkr, rkc = get_deriv_name(kk)
+            def_derv_r[rkr] = VariableDef(rkr, [*vv.shape, 3], atomic=True)
+            def_derv_c[rkc] = VariableDef(rkc, [*vv.shape, 3, 3], atomic=False)
+    return def_derv_r, def_derv_c
diff --git a/source/tests/test_output_def.py b/source/tests/test_output_def.py
new file mode 100644
index 0000000000..7f5404ee31
--- /dev/null
+++ b/source/tests/test_output_def.py
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd_utils.model_format import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+    model_check_output,
+)
+
+
+class TestDef(unittest.TestCase):
+    def test_model_output_def(self):
+        defs = [
+            OutputVariableDef("energy", [1], True, True),
+            OutputVariableDef("dos", [10], True, False),
+            OutputVariableDef("foo", [3], False, False),
+        ]
+        # fitting definition
+        fd = FittingOutputDef(defs)
+        expected_keys = ["energy", "dos", "foo"]
+        self.assertEqual(
+            set(expected_keys),
+            set(fd.keys()),
+        )
+        # shape
+        self.assertEqual(fd["energy"].shape, [1])
+        self.assertEqual(fd["dos"].shape, [10])
+        self.assertEqual(fd["foo"].shape, [3])
+        # atomic
+        self.assertEqual(fd["energy"].atomic, True)
+        self.assertEqual(fd["dos"].atomic, True)
+        self.assertEqual(fd["foo"].atomic, True)
+        # reduce
+        self.assertEqual(fd["energy"].reduciable, True)
+        self.assertEqual(fd["dos"].reduciable, True)
+        self.assertEqual(fd["foo"].reduciable, False)
+        # derivative
+        self.assertEqual(fd["energy"].differentiable, True)
+        self.assertEqual(fd["dos"].differentiable, False)
+        self.assertEqual(fd["foo"].differentiable, False)
+        # model definition
+        md = ModelOutputDef(fd)
+        expected_keys = [
+            "energy",
+            "dos",
+            "foo",
+            "energy_redu",
+            "energy_derv_r",
+            "energy_derv_c",
+            "dos_redu",
+        ]
+        self.assertEqual(
+            set(expected_keys),
+            set(md.keys()),
+        )
+        for kk in expected_keys:
+            self.assertEqual(md[kk].name, kk)
+        # reduce
+        self.assertEqual(md["energy"].reduciable, True)
+        self.assertEqual(md["dos"].reduciable, True)
+        self.assertEqual(md["foo"].reduciable, False)
+        # derivative
+        self.assertEqual(md["energy"].differentiable, True)
+        self.assertEqual(md["dos"].differentiable, False)
+        self.assertEqual(md["foo"].differentiable, False)
+        # shape
+        self.assertEqual(md["energy"].shape, [1])
+        self.assertEqual(md["dos"].shape, [10])
+        self.assertEqual(md["foo"].shape, [3])
+        self.assertEqual(md["energy_redu"].shape, [1])
+        self.assertEqual(md["energy_derv_r"].shape, [1, 3])
+        self.assertEqual(md["energy_derv_c"].shape, [1, 3, 3])
+        # atomic
+        self.assertEqual(md["energy"].atomic, True)
+        self.assertEqual(md["dos"].atomic, True)
+        self.assertEqual(md["foo"].atomic, True)
+        self.assertEqual(md["energy_redu"].atomic, False)
+        self.assertEqual(md["energy_derv_r"].atomic, True)
+        self.assertEqual(md["energy_derv_c"].atomic, False)
+
+    def test_raise_no_redu_deriv(self):
+        with self.assertRaises(ValueError) as context:
+            (OutputVariableDef("energy", [1], False, True),)
+
+    def test_model_decorator(self):
+        nf = 2
+        nloc = 3
+
+        @model_check_output
+        class Foo:
+            def output_def(self):
+                defs = [
+                    OutputVariableDef("energy", [1], True, True),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def forward(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros([nf, 1]),
+                    "energy_derv_r": np.zeros([nf, nloc, 1, 3]),
+                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                }
+
+        ff = Foo()
+        ff.forward()
+
+    def test_model_decorator_keyerror(self):
+        nf = 2
+        nloc = 3
+
+        @model_check_output
+        class Foo:
+            def output_def(self):
+                defs = [
+                    OutputVariableDef("energy", [1], True, True),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def forward(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros([nf, 1]),
+                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                }
+
+        ff = Foo()
+        with self.assertRaises(KeyError) as context:
+            ff.forward()
+            self.assertIn("energy_derv_r", context.exception)
+
+    def test_model_decorator_shapeerror(self):
+        nf = 2
+        nloc = 3
+
+        @model_check_output
+        class Foo:
+            def __init__(
+                self,
+                shape_rd=[nf, 1],
+                shape_dr=[nf, nloc, 1, 3],
+            ):
+                self.shape_rd, self.shape_dr = shape_rd, shape_dr
+
+            def output_def(self):
+                defs = [
+                    OutputVariableDef("energy", [1], True, True),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def forward(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros(self.shape_rd),
+                    "energy_derv_r": np.zeros(self.shape_dr),
+                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                }
+
+        ff = Foo()
+        ff.forward()
+        # shape of reduced energy
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_rd=[nf, nloc, 1])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_rd=[nf, 2])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+        # shape of dr
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1, 3, 3])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1, 4])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+
+    def test_fitting_decorator(self):
+        nf = 2
+        nloc = 3
+
+        @fitting_check_output
+        class Foo:
+            def output_def(self):
+                defs = [
+                    OutputVariableDef("energy", [1], True, True),
+                ]
+                return FittingOutputDef(defs)
+
+            def forward(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                }
+
+        ff = Foo()
+        ff.forward()
+
+    def test_fitting_decorator_shapeerror(self):
+        nf = 2
+        nloc = 3
+
+        @fitting_check_output
+        class Foo:
+            def __init__(
+                self,
+                shape=[nf, nloc, 1],
+            ):
+                self.shape = shape
+
+            def output_def(self):
+                defs = [
+                    OutputVariableDef("energy", [1], True, True),
+                ]
+                return FittingOutputDef(defs)
+
+            def forward(self):
+                return {
+                    "energy": np.zeros(self.shape),
+                }
+
+        ff = Foo()
+        ff.forward()
+        # shape of reduced energy
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape=[nf, 1])
+            ff.forward()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape=[nf, nloc, 2])
+            ff.forward()
+            self.assertIn("not matching", context.exception)

From 04f07effcfe9c3add9aa110741bf714f7709d1f7 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 11 Jan 2024 20:55:24 -0500
Subject: [PATCH 75/97] lmp: fix evflag initialization (#3133)

Fix #3126.

See
https://docs.lammps.org/Developer_updating.html#use-ev-init-to-initialize-variables-derived-from-eflag-and-vflag
---
 source/lmp/pair_deepmd.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 533e3538fe..3a6c1c8bbf 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -450,9 +450,9 @@ void PairDeepMD::compute(int eflag, int vflag) {
   if (numb_models == 0) {
     return;
   }
-  if (eflag || vflag) {
-    ev_setup(eflag, vflag);
-  }
+  // See
+  // https://docs.lammps.org/Developer_updating.html#use-ev-init-to-initialize-variables-derived-from-eflag-and-vflag
+  ev_init(eflag, vflag);
   if (vflag_atom) {
     error->all(FLERR,
                "6-element atomic virial is not supported. Use compute "

From 828df66cba04d208f2412e7883a5c9987e4aed0f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 11 Jan 2024 23:29:30 -0500
Subject: [PATCH 76/97] cc: refactor DeepPotModelDevi, making it
 framework-independent (#3134)

Refactor `DeepPotModelDevi` as a step of #3122. Now, it is just a
wrapper of multiple `DeepPot` classes. Models can have different
behaviors inside different `DeepPot`.

One may argue that the new class needs to prepare the input multiple
times. However, it's not expensive only to copy the memory. Also, during
the simulations, usually we run it every 100 steps.
---
 source/api_cc/include/DeepPot.h |  41 +----
 source/api_cc/src/DeepPot.cc    | 312 ++------------------------------
 2 files changed, 22 insertions(+), 331 deletions(-)

diff --git a/source/api_cc/include/DeepPot.h b/source/api_cc/include/DeepPot.h
index 7c4a0afe10..0e61b03ce9 100644
--- a/source/api_cc/include/DeepPot.h
+++ b/source/api_cc/include/DeepPot.h
@@ -480,7 +480,7 @@ class DeepPotModelDevi {
    **/
   double cutoff() const {
     assert(inited);
-    return rcut;
+    return dps[0].cutoff();
   };
   /**
    * @brief Get the number of types.
@@ -488,7 +488,7 @@ class DeepPotModelDevi {
    **/
   int numb_types() const {
     assert(inited);
-    return ntypes;
+    return dps[0].numb_types();
   };
   /**
    * @brief Get the number of types with spin.
@@ -496,7 +496,7 @@ class DeepPotModelDevi {
    **/
   int numb_types_spin() const {
     assert(inited);
-    return ntypes_spin;
+    return dps[0].numb_types_spin();
   };
   /**
    * @brief Get the dimension of the frame parameter.
@@ -504,7 +504,7 @@ class DeepPotModelDevi {
    **/
   int dim_fparam() const {
     assert(inited);
-    return dfparam;
+    return dps[0].dim_fparam();
   };
   /**
    * @brief Get the dimension of the atomic parameter.
@@ -512,7 +512,7 @@ class DeepPotModelDevi {
    **/
   int dim_aparam() const {
     assert(inited);
-    return daparam;
+    return dps[0].dim_aparam();
   };
   /**
    * @brief Compute the average energy.
@@ -590,39 +590,12 @@ class DeepPotModelDevi {
    **/
   bool is_aparam_nall() const {
     assert(inited);
-    return aparam_nall;
+    return dps[0].is_aparam_nall();
   };
 
  private:
   unsigned numb_models;
-  std::vector<tensorflow::Session*> sessions;
-  int num_intra_nthreads, num_inter_nthreads;
-  std::vector<tensorflow::GraphDef*> graph_defs;
+  std::vector<deepmd::DeepPot> dps;
   bool inited;
-  template <class VT>
-  VT get_scalar(const std::string name) const;
-  // VALUETYPE get_rcut () const;
-  // int get_ntypes () const;
-  double rcut;
-  double cell_size;
-  int dtype;
-  std::string model_type;
-  std::string model_version;
-  int ntypes;
-  int ntypes_spin;
-  int dfparam;
-  int daparam;
-  bool aparam_nall;
-  template <typename VALUETYPE>
-  void validate_fparam_aparam(const int& nloc,
-                              const std::vector<VALUETYPE>& fparam,
-                              const std::vector<VALUETYPE>& aparam) const;
-
-  // copy neighbor list info from host
-  bool init_nbor;
-  std::vector<std::vector<int> > sec;
-  deepmd::AtomMap atommap;
-  NeighborListData nlist_data;
-  InputNlist nlist;
 };
 }  // namespace deepmd
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index bb39a9fd60..cd1b571153 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -1234,30 +1234,17 @@ void DeepPot::get_type_map(std::string& type_map) {
   type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
 }
 
-DeepPotModelDevi::DeepPotModelDevi()
-    : inited(false), init_nbor(false), numb_models(0) {}
+DeepPotModelDevi::DeepPotModelDevi() : inited(false), numb_models(0) {}
 
 DeepPotModelDevi::DeepPotModelDevi(
     const std::vector<std::string>& models,
     const int& gpu_rank,
     const std::vector<std::string>& file_contents)
-    : inited(false), init_nbor(false), numb_models(0) {
-  try {
-    init(models, gpu_rank, file_contents);
-  } catch (...) {
-    // Clean up and rethrow, as the destructor will not be called
-    for (unsigned ii = 0; ii < numb_models; ++ii) {
-      delete graph_defs[ii];
-    }
-    throw;
-  }
+    : inited(false), numb_models(0) {
+  init(models, gpu_rank, file_contents);
 }
 
-DeepPotModelDevi::~DeepPotModelDevi() {
-  for (unsigned ii = 0; ii < numb_models; ++ii) {
-    delete graph_defs[ii];
-  }
-}
+DeepPotModelDevi::~DeepPotModelDevi() {}
 
 void DeepPotModelDevi::init(const std::vector<std::string>& models,
                             const int& gpu_rank,
@@ -1269,188 +1256,17 @@ void DeepPotModelDevi::init(const std::vector<std::string>& models,
     return;
   }
   numb_models = models.size();
-  sessions.resize(numb_models);
-  graph_defs.resize(numb_models);
-
-  int gpu_num = -1;
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  DPGetDeviceCount(gpu_num);
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-
-  SessionOptions options;
-  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
-  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
-  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
-  for (unsigned ii = 0; ii < numb_models; ++ii) {
-    graph_defs[ii] = new GraphDef();
-    if (file_contents.size() == 0) {
-      check_status(ReadBinaryProto(Env::Default(), models[ii], graph_defs[ii]));
-    } else {
-      (*graph_defs[ii]).ParseFromString(file_contents[ii]);
-    }
-  }
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  if (gpu_num > 0) {
-    options.config.set_allow_soft_placement(true);
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
-        0.9);
-    options.config.mutable_gpu_options()->set_allow_growth(true);
-    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-  }
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-
-  for (unsigned ii = 0; ii < numb_models; ++ii) {
-    if (gpu_num > 0) {
-      std::string str = "/gpu:";
-      str += std::to_string(gpu_rank % gpu_num);
-      graph::SetDefaultDevice(str, &(*graph_defs[ii]));
-    }
-    check_status(NewSession(options, &(sessions[ii])));
-    check_status(sessions[ii]->Create(*graph_defs[ii]));
-  }
-  try {
-    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  } catch (deepmd::tf_exception& e) {
-    // no model version defined in old models
-    model_version = "0.0";
-  }
-  if (!model_compatable(model_version)) {
-    throw deepmd::deepmd_exception(
-        "incompatable model: version " + model_version +
-        " in graph, but version " + global_model_version +
-        " supported. "
-        "See https://deepmd.rtfd.io/compatability/ for details.");
-  }
-  dtype = session_get_dtype(sessions[0], "descrpt_attr/rcut");
-  if (dtype == tensorflow::DT_DOUBLE) {
-    rcut = get_scalar<double>("descrpt_attr/rcut");
-  } else {
-    rcut = get_scalar<float>("descrpt_attr/rcut");
-  }
-  cell_size = rcut;
-  ntypes = get_scalar<int>("descrpt_attr/ntypes");
-  try {
-    ntypes_spin = get_scalar<int>("spin_attr/ntypes_spin");
-  } catch (const deepmd::deepmd_exception) {
-    ntypes_spin = 0;
-  }
-  dfparam = get_scalar<int>("fitting_attr/dfparam");
-  daparam = get_scalar<int>("fitting_attr/daparam");
-  if (dfparam < 0) {
-    dfparam = 0;
+  if (numb_models == 0) {
+    throw deepmd::deepmd_exception("no model is specified");
   }
-  if (daparam < 0) {
-    daparam = 0;
+  dps.resize(numb_models);
+  for (unsigned int ii = 0; ii < numb_models; ++ii) {
+    dps[ii].init(models[ii], gpu_rank,
+                 file_contents.size() > ii ? file_contents[ii] : "");
   }
-  if (daparam > 0) {
-    try {
-      aparam_nall = get_scalar<bool>("fitting_attr/aparam_nall");
-    } catch (const deepmd::deepmd_exception) {
-      aparam_nall = false;
-    }
-  } else {
-    aparam_nall = false;
-  }
-  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
-  // rcut = get_rcut();
-  // cell_size = rcut;
-  // ntypes = get_ntypes();
   inited = true;
-
-  init_nbor = false;
-}
-
-template <class VT>
-VT DeepPotModelDevi::get_scalar(const std::string name) const {
-  VT myrcut;
-  for (unsigned ii = 0; ii < numb_models; ++ii) {
-    VT ret = session_get_scalar<VT>(sessions[ii], name);
-    if (ii == 0) {
-      myrcut = ret;
-    } else {
-      assert(myrcut == ret);
-    }
-  }
-  return myrcut;
 }
 
-template <typename VALUETYPE>
-void DeepPotModelDevi::validate_fparam_aparam(
-    const int& nloc,
-    const std::vector<VALUETYPE>& fparam,
-    const std::vector<VALUETYPE>& aparam) const {
-  if (fparam.size() != dfparam) {
-    throw deepmd::deepmd_exception(
-        "the dim of frame parameter provided is not consistent with what the "
-        "model uses");
-  }
-  if (aparam.size() != daparam * nloc) {
-    throw deepmd::deepmd_exception(
-        "the dim of atom parameter provided is not consistent with what the "
-        "model uses");
-  }
-}
-
-template void DeepPotModelDevi::validate_fparam_aparam<double>(
-    const int& nloc,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam) const;
-
-template void DeepPotModelDevi::validate_fparam_aparam<float>(
-    const int& nloc,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam) const;
-
-// void
-// DeepPotModelDevi::
-// compute (ENERGYTYPE &			dener,
-// 	 std::vector<VALUETYPE> &	dforce_,
-// 	 std::vector<VALUETYPE> &	dvirial,
-// 	 std::vector<VALUETYPE> &	model_devi,
-// 	 const std::vector<VALUETYPE> &	dcoord_,
-// 	 const std::vector<int> &	datype_,
-// 	 const std::vector<VALUETYPE> &	dbox,
-// 	 const std::vector<VALUETYPE> &	fparam,
-// 	 const std::vector<VALUETYPE> &	aparam)
-// {
-//   if (numb_models == 0) return;
-
-//   atommap = AtomMap<VALUETYPE> (datype_.begin(), datype_.end());
-//   validate_fparam_aparam(atommap.get_type().size(), fparam, aparam);
-
-//   std::vector<std::pair<std::string, Tensor>> input_tensors;
-//   int nloc = session_input_tensors (input_tensors, dcoord_, ntypes, datype_,
-//   dbox, cell_size, fparam, aparam, atommap);
-
-//   std::vector<ENERGYTYPE > all_energy (numb_models);
-//   std::vector<std::vector<VALUETYPE > > all_force (numb_models);
-//   std::vector<std::vector<VALUETYPE > > all_virial (numb_models);
-
-//   for (unsigned ii = 0; ii < numb_models; ++ii){
-//     run_model (all_energy[ii], all_force[ii], all_virial[ii], sessions[ii],
-//     input_tensors, atommap);
-//   }
-
-//   dener = 0;
-//   for (unsigned ii = 0; ii < numb_models; ++ii){
-//     dener += all_energy[ii];
-//   }
-//   dener /= VALUETYPE(numb_models);
-//   compute_avg (dvirial, all_virial);
-//   compute_avg (dforce_, all_force);
-
-//   compute_std_f (model_devi, dforce_, all_force);
-
-//   // for (unsigned ii = 0; ii < numb_models; ++ii){
-//   //   cout << all_force[ii][573] << " " << all_force[ii][574] << " " <<
-//   all_force[ii][575] << endl;
-//   // }
-//   // cout << dforce_[573] << " "
-//   //      << dforce_[574] << " "
-//   //      << dforce_[575] << " "
-//   //      << model_devi[191] << endl;
-// }
-
 template <typename VALUETYPE>
 void DeepPotModelDevi::compute(std::vector<ENERGYTYPE>& all_energy,
                                std::vector<std::vector<VALUETYPE>>& all_force,
@@ -1466,57 +1282,12 @@ void DeepPotModelDevi::compute(std::vector<ENERGYTYPE>& all_energy,
   if (numb_models == 0) {
     return;
   }
-  int nall = dcoord_.size() / 3;
-  int nframes = 1;
-  int nloc = nall - nghost;
-  validate_fparam_aparam((aparam_nall ? nall : nloc), fparam, aparam_);
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  // select real atoms
-  std::vector<VALUETYPE> dcoord, dforce, aparam, datom_energy, datom_virial;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
-
-  // agp == 0 means that the LAMMPS nbor list has been updated
-  if (ago == 0) {
-    atommap = AtomMap(datype.begin(), datype.begin() + nloc_real);
-    assert(nloc == atommap.get_type().size());
-
-    nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);
-    nlist_data.shuffle(atommap);
-    nlist_data.make_inlist(nlist);
-  }
-  int ret;
-  if (dtype == tensorflow::DT_DOUBLE) {
-    ret = session_input_tensors<double>(input_tensors, dcoord, ntypes, datype,
-                                        dbox, nlist, fparam, aparam, atommap,
-                                        nghost_real, ago, "", aparam_nall);
-  } else {
-    ret = session_input_tensors<float>(input_tensors, dcoord, ntypes, datype,
-                                       dbox, nlist, fparam, aparam, atommap,
-                                       nghost_real, ago, "", aparam_nall);
-  }
   all_energy.resize(numb_models);
   all_force.resize(numb_models);
   all_virial.resize(numb_models);
-  assert(nloc == ret);
   for (unsigned ii = 0; ii < numb_models; ++ii) {
-    std::vector<VALUETYPE> dforce;
-    if (dtype == tensorflow::DT_DOUBLE) {
-      run_model<double>(all_energy[ii], dforce, all_virial[ii], sessions[ii],
-                        input_tensors, atommap, 1, nghost_real);
-    } else {
-      run_model<float>(all_energy[ii], dforce, all_virial[ii], sessions[ii],
-                       input_tensors, atommap, 1, nghost_real);
-    }
-    // bkw map
-    all_force[ii].resize(nframes * fwd_map.size() * 3);
-    select_map<VALUETYPE>(all_force[ii], dforce, bkw_map, 3, nframes,
-                          fwd_map.size(), nall_real);
+    dps[ii].compute(all_energy[ii], all_force[ii], all_virial[ii], dcoord_,
+                    datype_, dbox, nghost, lmp_list, ago, fparam, aparam_);
   }
 }
 
@@ -1564,68 +1335,15 @@ void DeepPotModelDevi::compute(
   if (numb_models == 0) {
     return;
   }
-  int nframes = 1;
-  int nall = dcoord_.size() / 3;
-  int nloc = nall - nghost;
-  validate_fparam_aparam((aparam_nall ? nall : nloc), fparam, aparam_);
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  // select real atoms
-  std::vector<VALUETYPE> dcoord, dforce, aparam, datom_energy, datom_virial;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
-  // agp == 0 means that the LAMMPS nbor list has been updated
-
-  if (ago == 0) {
-    atommap = AtomMap(datype.begin(), datype.begin() + nloc_real);
-    assert(nloc == atommap.get_type().size());
-
-    nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);
-    nlist_data.shuffle(atommap);
-    nlist_data.make_inlist(nlist);
-  }
-  int ret;
-  if (dtype == tensorflow::DT_DOUBLE) {
-    ret = session_input_tensors<double>(input_tensors, dcoord, ntypes, datype,
-                                        dbox, nlist, fparam, aparam, atommap,
-                                        nghost_real, ago, "", aparam_nall);
-  } else {
-    ret = session_input_tensors<float>(input_tensors, dcoord, ntypes, datype,
-                                       dbox, nlist, fparam, aparam, atommap,
-                                       nghost_real, ago, "", aparam_nall);
-  }
-
   all_energy.resize(numb_models);
   all_force.resize(numb_models);
   all_virial.resize(numb_models);
   all_atom_energy.resize(numb_models);
   all_atom_virial.resize(numb_models);
-  assert(nloc == ret);
   for (unsigned ii = 0; ii < numb_models; ++ii) {
-    std::vector<VALUETYPE> dforce, datom_energy, datom_virial;
-    if (dtype == tensorflow::DT_DOUBLE) {
-      run_model<double>(all_energy[ii], dforce, all_virial[ii], datom_energy,
-                        datom_virial, sessions[ii], input_tensors, atommap, 1,
-                        nghost_real);
-    } else {
-      run_model<float>(all_energy[ii], dforce, all_virial[ii], datom_energy,
-                       datom_virial, sessions[ii], input_tensors, atommap, 1,
-                       nghost_real);
-    }
-    // bkw map
-    all_force[ii].resize(nframes * fwd_map.size() * 3);
-    all_atom_energy[ii].resize(nframes * fwd_map.size());
-    all_atom_virial[ii].resize(nframes * fwd_map.size() * 9);
-    select_map<VALUETYPE>(all_force[ii], dforce, bkw_map, 3, nframes,
-                          fwd_map.size(), nall_real);
-    select_map<VALUETYPE>(all_atom_energy[ii], datom_energy, bkw_map, 1,
-                          nframes, fwd_map.size(), nall_real);
-    select_map<VALUETYPE>(all_atom_virial[ii], datom_virial, bkw_map, 9,
-                          nframes, fwd_map.size(), nall_real);
+    dps[ii].compute(all_energy[ii], all_force[ii], all_virial[ii],
+                    all_atom_energy[ii], all_atom_virial[ii], dcoord_, datype_,
+                    dbox, nghost, lmp_list, ago, fparam, aparam_);
   }
 }
 

From d5590a4b7a196dd8f65c63b332ecf6440f309f9d Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 12 Jan 2024 19:14:03 +0800
Subject: [PATCH 77/97] fix: model check assumes __call__ as the forward method
 (#3136)

- add `__call__` method for `NativeOP`.
- adapt UTs accordingly.

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/__init__.py   |  2 ++
 deepmd_utils/model_format/common.py     |  4 +++
 deepmd_utils/model_format/output_def.py | 12 +++----
 source/tests/test_output_def.py         | 48 +++++++++++++------------
 4 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 356eaaf4fa..0d6972e9cf 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -2,6 +2,7 @@
 from .common import (
     DEFAULT_PRECISION,
     PRECISION_DICT,
+    NativeOP,
 )
 from .env_mat import (
     EnvMat,
@@ -34,6 +35,7 @@
     "NativeLayer",
     "NativeNet",
     "NetworkCollection",
+    "NativeOP",
     "load_dp_model",
     "save_dp_model",
     "traverse_model_dict",
diff --git a/deepmd_utils/model_format/common.py b/deepmd_utils/model_format/common.py
index 82beb969c2..d032e5d5df 100644
--- a/deepmd_utils/model_format/common.py
+++ b/deepmd_utils/model_format/common.py
@@ -22,3 +22,7 @@ class NativeOP(ABC):
     def call(self, *args, **kwargs):
         """Forward pass in NumPy implementation."""
         raise NotImplementedError
+
+    def __call__(self, *args, **kwargs):
+        """Forward pass in NumPy implementation."""
+        return self.call(*args, **kwargs)
diff --git a/deepmd_utils/model_format/output_def.py b/deepmd_utils/model_format/output_def.py
index f4fcdce3ca..7feb24a145 100644
--- a/deepmd_utils/model_format/output_def.py
+++ b/deepmd_utils/model_format/output_def.py
@@ -27,7 +27,7 @@ def model_check_output(cls):
 
     Two methods are assumed to be provided by the Model:
     1. Model.output_def that gives the output definition.
-    2. Model.forward that defines the forward path of the model.
+    2. Model.__call__ that defines the forward path of the model.
 
     """
 
@@ -40,12 +40,12 @@ def __init__(
             super().__init__(*args, **kwargs)
             self.md = cls.output_def(self)
 
-        def forward(
+        def __call__(
             self,
             *args,
             **kwargs,
         ):
-            ret = cls.forward(self, *args, **kwargs)
+            ret = cls.__call__(self, *args, **kwargs)
             for kk in self.md.keys_outp():
                 dd = self.md[kk]
                 check_var(ret[kk], dd)
@@ -66,7 +66,7 @@ def fitting_check_output(cls):
 
     Two methods are assumed to be provided by the Fitting:
     1. Fitting.output_def that gives the output definition.
-    2. Fitting.forward defines the forward path of the fitting.
+    2. Fitting.__call__ defines the forward path of the fitting.
 
     """
 
@@ -79,12 +79,12 @@ def __init__(
             super().__init__(*args, **kwargs)
             self.md = cls.output_def(self)
 
-        def forward(
+        def __call__(
             self,
             *args,
             **kwargs,
         ):
-            ret = cls.forward(self, *args, **kwargs)
+            ret = cls.__call__(self, *args, **kwargs)
             for kk in self.md.keys():
                 dd = self.md[kk]
                 check_var(ret[kk], dd)
diff --git a/source/tests/test_output_def.py b/source/tests/test_output_def.py
index 7f5404ee31..e0c56784da 100644
--- a/source/tests/test_output_def.py
+++ b/source/tests/test_output_def.py
@@ -6,6 +6,7 @@
 from deepmd_utils.model_format import (
     FittingOutputDef,
     ModelOutputDef,
+    NativeOP,
     OutputVariableDef,
     fitting_check_output,
     model_check_output,
@@ -91,14 +92,14 @@ def test_model_decorator(self):
         nloc = 3
 
         @model_check_output
-        class Foo:
+        class Foo(NativeOP):
             def output_def(self):
                 defs = [
                     OutputVariableDef("energy", [1], True, True),
                 ]
                 return ModelOutputDef(FittingOutputDef(defs))
 
-            def forward(self):
+            def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros([nf, 1]),
@@ -107,21 +108,24 @@ def forward(self):
                 }
 
         ff = Foo()
-        ff.forward()
+        ff()
 
     def test_model_decorator_keyerror(self):
         nf = 2
         nloc = 3
 
         @model_check_output
-        class Foo:
+        class Foo(NativeOP):
+            def __init__(self):
+                super().__init__()
+
             def output_def(self):
                 defs = [
                     OutputVariableDef("energy", [1], True, True),
                 ]
                 return ModelOutputDef(FittingOutputDef(defs))
 
-            def forward(self):
+            def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros([nf, 1]),
@@ -130,7 +134,7 @@ def forward(self):
 
         ff = Foo()
         with self.assertRaises(KeyError) as context:
-            ff.forward()
+            ff()
             self.assertIn("energy_derv_r", context.exception)
 
     def test_model_decorator_shapeerror(self):
@@ -138,7 +142,7 @@ def test_model_decorator_shapeerror(self):
         nloc = 3
 
         @model_check_output
-        class Foo:
+        class Foo(NativeOP):
             def __init__(
                 self,
                 shape_rd=[nf, 1],
@@ -152,7 +156,7 @@ def output_def(self):
                 ]
                 return ModelOutputDef(FittingOutputDef(defs))
 
-            def forward(self):
+            def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros(self.shape_rd),
@@ -161,28 +165,28 @@ def forward(self):
                 }
 
         ff = Foo()
-        ff.forward()
+        ff()
         # shape of reduced energy
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape_rd=[nf, nloc, 1])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape_rd=[nf, 2])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
         # shape of dr
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape_dr=[nf, nloc, 1])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape_dr=[nf, nloc, 1, 3, 3])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape_dr=[nf, nloc, 1, 4])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
 
     def test_fitting_decorator(self):
@@ -190,27 +194,27 @@ def test_fitting_decorator(self):
         nloc = 3
 
         @fitting_check_output
-        class Foo:
+        class Foo(NativeOP):
             def output_def(self):
                 defs = [
                     OutputVariableDef("energy", [1], True, True),
                 ]
                 return FittingOutputDef(defs)
 
-            def forward(self):
+            def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                 }
 
         ff = Foo()
-        ff.forward()
+        ff()
 
     def test_fitting_decorator_shapeerror(self):
         nf = 2
         nloc = 3
 
         @fitting_check_output
-        class Foo:
+        class Foo(NativeOP):
             def __init__(
                 self,
                 shape=[nf, nloc, 1],
@@ -223,19 +227,19 @@ def output_def(self):
                 ]
                 return FittingOutputDef(defs)
 
-            def forward(self):
+            def call(self):
                 return {
                     "energy": np.zeros(self.shape),
                 }
 
         ff = Foo()
-        ff.forward()
+        ff()
         # shape of reduced energy
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape=[nf, 1])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)
         with self.assertRaises(ValueError) as context:
             ff = Foo(shape=[nf, nloc, 2])
-            ff.forward()
+            ff()
             self.assertIn("not matching", context.exception)

From 308f97eeec89230ef3bf60ba3e10931ad72bf5de Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Fri, 12 Jan 2024 22:04:53 +0800
Subject: [PATCH 78/97] support fitting net (#3137)

- also add doc string for the embedding net

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/__init__.py   |   2 +
 deepmd_utils/model_format/network.py    | 116 +++++++++++++++++++++++-
 source/tests/test_model_format_utils.py |  28 ++++++
 3 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 0d6972e9cf..9d1fafe5c8 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -9,6 +9,7 @@
 )
 from .network import (
     EmbeddingNet,
+    FittingNet,
     NativeLayer,
     NativeNet,
     NetworkCollection,
@@ -32,6 +33,7 @@
     "DescrptSeA",
     "EnvMat",
     "EmbeddingNet",
+    "FittingNet",
     "NativeLayer",
     "NativeNet",
     "NetworkCollection",
diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index c587b08cf6..c73e53f5cb 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -162,7 +162,9 @@ def __init__(
         self.w = w.astype(prec) if w is not None else None
         self.b = b.astype(prec) if b is not None else None
         self.idt = idt.astype(prec) if idt is not None else None
-        self.activation_function = activation_function
+        self.activation_function = (
+            activation_function if activation_function is not None else "none"
+        )
         self.resnet = resnet
         self.check_type_consistency()
 
@@ -354,6 +356,24 @@ def call(self, x: np.ndarray) -> np.ndarray:
 
 
 class EmbeddingNet(NativeNet):
+    """The embedding network.
+
+    Parameters
+    ----------
+    in_dim
+        Input dimension.
+    neuron
+        The number of neurons in each layer. The output dimension
+        is the same as the dimension of the last layer.
+    activation_function
+        The activation function.
+    resnet_dt
+        Use time step at the resnet architecture.
+    precision
+        Floating point precision for the model paramters.
+
+    """
+
     def __init__(
         self,
         in_dim,
@@ -370,8 +390,8 @@ def __init__(
             layers.append(
                 NativeLayer(
                     rng.normal(size=(i_in, i_ot)),
-                    b=rng.normal(size=(ii)),
-                    idt=rng.normal(size=(ii)) if resnet_dt else None,
+                    b=rng.normal(size=(i_ot)),
+                    idt=rng.normal(size=(i_ot)) if resnet_dt else None,
                     activation_function=activation_function,
                     resnet=True,
                     precision=precision,
@@ -417,6 +437,95 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
         return obj
 
 
+class FittingNet(EmbeddingNet):
+    """The fitting network. It may be implemented as an embedding
+    net connected with a linear output layer.
+
+    Parameters
+    ----------
+    in_dim
+        Input dimension.
+    out_dim
+        Output dimension
+    neuron
+        The number of neurons in each hidden layer.
+    activation_function
+        The activation function.
+    resnet_dt
+        Use time step at the resnet architecture.
+    precision
+        Floating point precision for the model paramters.
+    bias_out
+        The last linear layer has bias.
+
+    """
+
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        neuron: List[int] = [24, 48, 96],
+        activation_function: str = "tanh",
+        resnet_dt: bool = False,
+        precision: str = DEFAULT_PRECISION,
+        bias_out: bool = True,
+    ):
+        super().__init__(
+            in_dim,
+            neuron=neuron,
+            activation_function=activation_function,
+            resnet_dt=resnet_dt,
+            precision=precision,
+        )
+        rng = np.random.default_rng()
+        i_in, i_ot = neuron[-1], out_dim
+        self.layers.append(
+            NativeLayer(
+                rng.normal(size=(i_in, i_ot)),
+                b=rng.normal(size=(i_ot)) if bias_out else None,
+                idt=None,
+                activation_function=None,
+                resnet=False,
+                precision=precision,
+            )
+        )
+        self.out_dim = out_dim
+        self.bias_out = bias_out
+
+    def serialize(self) -> dict:
+        """Serialize the network to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized network.
+        """
+        return {
+            "in_dim": self.in_dim,
+            "out_dim": self.out_dim,
+            "neuron": self.neuron.copy(),
+            "activation_function": self.activation_function,
+            "resnet_dt": self.resnet_dt,
+            "precision": self.precision,
+            "bias_out": self.bias_out,
+            "layers": [layer.serialize() for layer in self.layers],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "FittingNet":
+        """Deserialize the network from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        layers = data.pop("layers")
+        obj = cls(**data)
+        NativeNet.__init__(obj, layers)
+        return obj
+
+
 class NetworkCollection:
     """A collection of networks for multiple elements.
 
@@ -439,6 +548,7 @@ class NetworkCollection:
     NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
         "network": NativeNet,
         "embedding_network": EmbeddingNet,
+        "fitting_network": FittingNet,
     }
 
     def __init__(
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index 98138fee17..b08c3bcf52 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -12,6 +12,7 @@
     DescrptSeA,
     EmbeddingNet,
     EnvMat,
+    FittingNet,
     NativeLayer,
     NativeNet,
     NetworkCollection,
@@ -98,6 +99,8 @@ def test_deserialize(self):
         np.testing.assert_array_equal(network[0]["resnet"], True)
         np.testing.assert_array_equal(network[1]["resnet"], True)
 
+
+class TestEmbeddingNet(unittest.TestCase):
     def test_embedding_net(self):
         for ni, act, idt, prec in itertools.product(
             [1, 10],
@@ -116,6 +119,31 @@ def test_embedding_net(self):
             np.testing.assert_allclose(en0.call(inp), en1.call(inp))
 
 
+class TestFittingNet(unittest.TestCase):
+    def test_fitting_net(self):
+        for ni, no, act, idt, prec, bo in itertools.product(
+            [1, 10],
+            [1, 7],
+            ["tanh", "none"],
+            [True, False],
+            ["double", "single"],
+            [True, False],
+        ):
+            en0 = FittingNet(
+                ni,
+                no,
+                activation_function=act,
+                precision=prec,
+                resnet_dt=idt,
+                bias_out=bo,
+            )
+            en1 = FittingNet.deserialize(en0.serialize())
+            inp = np.ones([ni])
+            en0.call(inp)
+            en1.call(inp)
+            np.testing.assert_allclose(en0.call(inp), en1.call(inp))
+
+
 class TestNetworkCollection(unittest.TestCase):
     def setUp(self) -> None:
         w = np.full((2, 3), 3.0)

From 15117a023eee3bf08080f4d40e33dae20f367450 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sat, 13 Jan 2024 14:19:42 +0800
Subject: [PATCH 79/97] refactorize NativeLayer, interface does not rely on the
 platform (#3138)

- add parameter shape consistency check for layer
- add input-output shape consistency check for net

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/network.py    |  94 ++++++++++++++------
 deepmd_utils/model_format/se_e2_a.py    |   2 +
 source/tests/test_model_format_utils.py | 111 ++++++++++++++++++------
 3 files changed, 154 insertions(+), 53 deletions(-)

diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index c73e53f5cb..d9071784ca 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -3,6 +3,7 @@
 
 See issue #2982 for more information.
 """
+import copy
 import itertools
 import json
 from typing import (
@@ -150,23 +151,26 @@ class NativeLayer(NativeOP):
 
     def __init__(
         self,
-        w: Optional[np.ndarray] = None,
-        b: Optional[np.ndarray] = None,
-        idt: Optional[np.ndarray] = None,
+        num_in,
+        num_out,
+        bias: bool = True,
+        use_timestep: bool = False,
         activation_function: Optional[str] = None,
         resnet: bool = False,
         precision: str = DEFAULT_PRECISION,
     ) -> None:
         prec = PRECISION_DICT[precision.lower()]
         self.precision = precision
-        self.w = w.astype(prec) if w is not None else None
-        self.b = b.astype(prec) if b is not None else None
-        self.idt = idt.astype(prec) if idt is not None else None
+        rng = np.random.default_rng()
+        self.w = rng.normal(size=(num_in, num_out)).astype(prec)
+        self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None
+        self.idt = rng.normal(size=(num_out,)).astype(prec) if use_timestep else None
         self.activation_function = (
             activation_function if activation_function is not None else "none"
         )
         self.resnet = resnet
         self.check_type_consistency()
+        self.check_shape_consistency()
 
     def serialize(self) -> dict:
         """Serialize the layer to a dict.
@@ -179,10 +183,11 @@ def serialize(self) -> dict:
         data = {
             "w": self.w,
             "b": self.b,
+            "idt": self.idt,
         }
-        if self.idt is not None:
-            data["idt"] = self.idt
         return {
+            "bias": self.b is not None,
+            "use_timestep": self.idt is not None,
             "activation_function": self.activation_function,
             "resnet": self.resnet,
             "precision": self.precision,
@@ -198,15 +203,34 @@ def deserialize(cls, data: dict) -> "NativeLayer":
         data : dict
             The dict to deserialize from.
         """
-        precision = data.get("precision", DEFAULT_PRECISION)
-        return cls(
-            w=data["@variables"]["w"],
-            b=data["@variables"].get("b", None),
-            idt=data["@variables"].get("idt", None),
-            activation_function=data["activation_function"],
-            resnet=data.get("resnet", False),
-            precision=precision,
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables")
+        assert variables["w"] is not None and len(variables["w"].shape) == 2
+        num_in, num_out = variables["w"].shape
+        obj = cls(
+            num_in,
+            num_out,
+            **data,
         )
+        obj.w, obj.b, obj.idt = (
+            variables["w"],
+            variables.get("b", None),
+            variables.get("idt", None),
+        )
+        obj.check_shape_consistency()
+        return obj
+
+    def check_shape_consistency(self):
+        if self.b is not None and self.w.shape[1] != self.b.shape[0]:
+            raise ValueError(
+                f"dim 1 of w {self.w.shape[1]} is not equal to shape "
+                f"of b {self.b.shape[0]}",
+            )
+        if self.idt is not None and self.w.shape[1] != self.idt.shape[0]:
+            raise ValueError(
+                f"dim 1 of w {self.w.shape[1]} is not equal to shape "
+                f"of idt {self.idt.shape[0]}",
+            )
 
     def check_type_consistency(self):
         precision = self.precision
@@ -252,6 +276,14 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
+    @property
+    def dim_in(self) -> int:
+        return self.w.shape[0]
+
+    @property
+    def dim_out(self) -> int:
+        return self.w.shape[1]
+
     def call(self, x: np.ndarray) -> np.ndarray:
         """Forward pass.
 
@@ -303,6 +335,7 @@ def __init__(self, layers: Optional[List[dict]] = None) -> None:
         if layers is None:
             layers = []
         self.layers = [NativeLayer.deserialize(layer) for layer in layers]
+        self.check_shape_consistency()
 
     def serialize(self) -> dict:
         """Serialize the network to a dict.
@@ -327,16 +360,21 @@ def deserialize(cls, data: dict) -> "NativeNet":
 
     def __getitem__(self, key):
         assert isinstance(key, int)
-        if len(self.layers) <= key:
-            self.layers.extend([NativeLayer()] * (key - len(self.layers) + 1))
         return self.layers[key]
 
     def __setitem__(self, key, value):
         assert isinstance(key, int)
-        if len(self.layers) <= key:
-            self.layers.extend([NativeLayer()] * (key - len(self.layers) + 1))
         self.layers[key] = value
 
+    def check_shape_consistency(self):
+        for ii in range(len(self.layers) - 1):
+            if self.layers[ii].dim_out != self.layers[ii + 1].dim_in:
+                raise ValueError(
+                    f"the dim of layer {ii} output {self.layers[ii].dim_out} ",
+                    f"does not match the dim of layer {ii+1} ",
+                    f"output {self.layers[ii].dim_out}",
+                )
+
     def call(self, x: np.ndarray) -> np.ndarray:
         """Forward pass.
 
@@ -389,9 +427,10 @@ def __init__(
             i_ot = ii
             layers.append(
                 NativeLayer(
-                    rng.normal(size=(i_in, i_ot)),
-                    b=rng.normal(size=(i_ot)),
-                    idt=rng.normal(size=(i_ot)) if resnet_dt else None,
+                    i_in,
+                    i_ot,
+                    bias=True,
+                    use_timestep=resnet_dt,
                     activation_function=activation_function,
                     resnet=True,
                     precision=precision,
@@ -431,6 +470,7 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
         data : dict
             The dict to deserialize from.
         """
+        data = copy.deepcopy(data)
         layers = data.pop("layers")
         obj = cls(**data)
         super(EmbeddingNet, obj).__init__(layers)
@@ -481,9 +521,10 @@ def __init__(
         i_in, i_ot = neuron[-1], out_dim
         self.layers.append(
             NativeLayer(
-                rng.normal(size=(i_in, i_ot)),
-                b=rng.normal(size=(i_ot)) if bias_out else None,
-                idt=None,
+                i_in,
+                i_ot,
+                bias=bias_out,
+                use_timestep=False,
                 activation_function=None,
                 resnet=False,
                 precision=precision,
@@ -520,6 +561,7 @@ def deserialize(cls, data: dict) -> "FittingNet":
         data : dict
             The dict to deserialize from.
         """
+        data = copy.deepcopy(data)
         layers = data.pop("layers")
         obj = cls(**data)
         NativeNet.__init__(obj, layers)
diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd_utils/model_format/se_e2_a.py
index a34694a882..b9143ee360 100644
--- a/deepmd_utils/model_format/se_e2_a.py
+++ b/deepmd_utils/model_format/se_e2_a.py
@@ -6,6 +6,7 @@
 except ImportError:
     __version__ = "unknown"
 
+import copy
 from typing import (
     Any,
     List,
@@ -270,6 +271,7 @@ def serialize(self) -> dict:
 
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptSeA":
+        data = copy.deepcopy(data)
         variables = data.pop("@variables")
         embeddings = data.pop("embeddings")
         env_mat = data.pop("env_mat")
diff --git a/source/tests/test_model_format_utils.py b/source/tests/test_model_format_utils.py
index b08c3bcf52..f588647096 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/test_model_format_utils.py
@@ -35,39 +35,74 @@ def test_serialize_deserize(self):
             [None, [4], [3, 2]],
             ["float32", "float64", "single", "double"],
         ):
-            ww = np.full((ni, no), 3.0)
-            bb = np.full((no,), 4.0) if bias else None
-            idt = np.full((no,), 5.0) if ut else None
-            nl0 = NativeLayer(ww, bb, idt, activation_function, resnet, prec)
+            nl0 = NativeLayer(
+                ni,
+                no,
+                bias=bias,
+                use_timestep=ut,
+                activation_function=activation_function,
+                resnet=resnet,
+                precision=prec,
+            )
             nl1 = NativeLayer.deserialize(nl0.serialize())
-            inp_shap = [ww.shape[0]]
+            inp_shap = [ni]
             if ashp is not None:
                 inp_shap = ashp + inp_shap
             inp = np.arange(np.prod(inp_shap)).reshape(inp_shap)
             np.testing.assert_allclose(nl0.call(inp), nl1.call(inp))
 
+    def test_shape_error(self):
+        self.w0 = np.full((2, 3), 3.0)
+        self.b0 = np.full((2,), 4.0)
+        self.b1 = np.full((3,), 4.0)
+        self.idt0 = np.full((2,), 4.0)
+        with self.assertRaises(ValueError) as context:
+            network = NativeLayer.deserialize(
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": self.w0, "b": self.b0},
+                }
+            )
+            assert "not equalt to shape of b" in context.exception
+        with self.assertRaises(ValueError) as context:
+            network = NativeLayer.deserialize(
+                {
+                    "activation_function": "tanh",
+                    "resnet": True,
+                    "@variables": {"w": self.w0, "b": self.b1, "idt": self.idt0},
+                }
+            )
+            assert "not equalt to shape of idt" in context.exception
+
 
 class TestNativeNet(unittest.TestCase):
     def setUp(self) -> None:
-        self.w = np.full((2, 3), 3.0)
-        self.b = np.full((3,), 4.0)
-        self.idt = np.full((3,), 5.0)
+        self.w0 = np.full((2, 3), 3.0)
+        self.b0 = np.full((3,), 4.0)
+        self.w1 = np.full((3, 4), 3.0)
+        self.b1 = np.full((4,), 4.0)
 
     def test_serialize(self):
-        network = NativeNet()
-        network[1]["w"] = self.w
-        network[1]["b"] = self.b
-        network[0]["w"] = self.w
-        network[0]["b"] = self.b
+        network = NativeNet(
+            [
+                NativeLayer(2, 3).serialize(),
+                NativeLayer(3, 4).serialize(),
+            ]
+        )
+        network[1]["w"] = self.w1
+        network[1]["b"] = self.b1
+        network[0]["w"] = self.w0
+        network[0]["b"] = self.b0
         network[1]["activation_function"] = "tanh"
         network[0]["activation_function"] = "tanh"
         network[1]["resnet"] = True
         network[0]["resnet"] = True
         jdata = network.serialize()
-        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["w"], self.w)
-        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["b"], self.b)
-        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["w"], self.w)
-        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["b"], self.b)
+        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["w"], self.w0)
+        np.testing.assert_array_equal(jdata["layers"][0]["@variables"]["b"], self.b0)
+        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["w"], self.w1)
+        np.testing.assert_array_equal(jdata["layers"][1]["@variables"]["b"], self.b1)
         np.testing.assert_array_equal(jdata["layers"][0]["activation_function"], "tanh")
         np.testing.assert_array_equal(jdata["layers"][1]["activation_function"], "tanh")
         np.testing.assert_array_equal(jdata["layers"][0]["resnet"], True)
@@ -80,25 +115,45 @@ def test_deserialize(self):
                     {
                         "activation_function": "tanh",
                         "resnet": True,
-                        "@variables": {"w": self.w, "b": self.b},
+                        "@variables": {"w": self.w0, "b": self.b0},
                     },
                     {
                         "activation_function": "tanh",
                         "resnet": True,
-                        "@variables": {"w": self.w, "b": self.b},
+                        "@variables": {"w": self.w1, "b": self.b1},
                     },
                 ],
             }
         )
-        np.testing.assert_array_equal(network[0]["w"], self.w)
-        np.testing.assert_array_equal(network[0]["b"], self.b)
-        np.testing.assert_array_equal(network[1]["w"], self.w)
-        np.testing.assert_array_equal(network[1]["b"], self.b)
+        np.testing.assert_array_equal(network[0]["w"], self.w0)
+        np.testing.assert_array_equal(network[0]["b"], self.b0)
+        np.testing.assert_array_equal(network[1]["w"], self.w1)
+        np.testing.assert_array_equal(network[1]["b"], self.b1)
         np.testing.assert_array_equal(network[0]["activation_function"], "tanh")
         np.testing.assert_array_equal(network[1]["activation_function"], "tanh")
         np.testing.assert_array_equal(network[0]["resnet"], True)
         np.testing.assert_array_equal(network[1]["resnet"], True)
 
+    def test_shape_error(self):
+        with self.assertRaises(ValueError) as context:
+            network = NativeNet.deserialize(
+                {
+                    "layers": [
+                        {
+                            "activation_function": "tanh",
+                            "resnet": True,
+                            "@variables": {"w": self.w0, "b": self.b0},
+                        },
+                        {
+                            "activation_function": "tanh",
+                            "resnet": True,
+                            "@variables": {"w": self.w0, "b": self.b0},
+                        },
+                    ],
+                }
+            )
+            assert "does not match the dim of layer" in context.exception
+
 
 class TestEmbeddingNet(unittest.TestCase):
     def test_embedding_net(self):
@@ -146,19 +201,21 @@ def test_fitting_net(self):
 
 class TestNetworkCollection(unittest.TestCase):
     def setUp(self) -> None:
-        w = np.full((2, 3), 3.0)
-        b = np.full((3,), 4.0)
+        w0 = np.full((2, 3), 3.0)
+        b0 = np.full((3,), 4.0)
+        w1 = np.full((3, 4), 3.0)
+        b1 = np.full((4,), 4.0)
         self.network = {
             "layers": [
                 {
                     "activation_function": "tanh",
                     "resnet": True,
-                    "@variables": {"w": w, "b": b},
+                    "@variables": {"w": w0, "b": b0},
                 },
                 {
                     "activation_function": "tanh",
                     "resnet": True,
-                    "@variables": {"w": w, "b": b},
+                    "@variables": {"w": w1, "b": b1},
                 },
             ],
         }

From ae90498c04a77ea7eeea9e5bb1050f0d5f6295a5 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sat, 13 Jan 2024 15:04:42 +0800
Subject: [PATCH 80/97] refactorize networks, now can be used cross platform
 (#3141)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 deepmd_utils/model_format/__init__.py |   6 +
 deepmd_utils/model_format/network.py  | 469 +++++++++++++-------------
 2 files changed, 248 insertions(+), 227 deletions(-)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 9d1fafe5c8..72dd7b59ee 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -14,6 +14,9 @@
     NativeNet,
     NetworkCollection,
     load_dp_model,
+    make_embedding_network,
+    make_fitting_network,
+    make_multilayer_network,
     save_dp_model,
     traverse_model_dict,
 )
@@ -32,6 +35,9 @@
 __all__ = [
     "DescrptSeA",
     "EnvMat",
+    "make_multilayer_network",
+    "make_embedding_network",
+    "make_fitting_network",
     "EmbeddingNet",
     "FittingNet",
     "NativeLayer",
diff --git a/deepmd_utils/model_format/network.py b/deepmd_utils/model_format/network.py
index d9071784ca..71ed659787 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd_utils/model_format/network.py
@@ -276,11 +276,9 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
-    @property
     def dim_in(self) -> int:
         return self.w.shape[0]
 
-    @property
     def dim_out(self) -> int:
         return self.w.shape[1]
 
@@ -322,250 +320,267 @@ def fn(x):
         return y
 
 
-class NativeNet(NativeOP):
-    """Native representation of a neural network.
-
-    Parameters
-    ----------
-    layers : list[NativeLayer], optional
-        The layers of the network.
-    """
-
-    def __init__(self, layers: Optional[List[dict]] = None) -> None:
-        if layers is None:
-            layers = []
-        self.layers = [NativeLayer.deserialize(layer) for layer in layers]
-        self.check_shape_consistency()
-
-    def serialize(self) -> dict:
-        """Serialize the network to a dict.
-
-        Returns
-        -------
-        dict
-            The serialized network.
-        """
-        return {"layers": [layer.serialize() for layer in self.layers]}
-
-    @classmethod
-    def deserialize(cls, data: dict) -> "NativeNet":
-        """Deserialize the network from a dict.
+def make_multilayer_network(T_NetworkLayer, ModuleBase):
+    class NN(ModuleBase):
+        """Native representation of a neural network.
 
         Parameters
         ----------
-        data : dict
-            The dict to deserialize from.
+        layers : list[NativeLayer], optional
+            The layers of the network.
         """
-        return cls(data["layers"])
-
-    def __getitem__(self, key):
-        assert isinstance(key, int)
-        return self.layers[key]
-
-    def __setitem__(self, key, value):
-        assert isinstance(key, int)
-        self.layers[key] = value
 
-    def check_shape_consistency(self):
-        for ii in range(len(self.layers) - 1):
-            if self.layers[ii].dim_out != self.layers[ii + 1].dim_in:
-                raise ValueError(
-                    f"the dim of layer {ii} output {self.layers[ii].dim_out} ",
-                    f"does not match the dim of layer {ii+1} ",
-                    f"output {self.layers[ii].dim_out}",
-                )
-
-    def call(self, x: np.ndarray) -> np.ndarray:
-        """Forward pass.
+        def __init__(self, layers: Optional[List[dict]] = None) -> None:
+            super().__init__()
+            if layers is None:
+                layers = []
+            self.layers = [T_NetworkLayer.deserialize(layer) for layer in layers]
+            self.check_shape_consistency()
+
+        def serialize(self) -> dict:
+            """Serialize the network to a dict.
+
+            Returns
+            -------
+            dict
+                The serialized network.
+            """
+            return {"layers": [layer.serialize() for layer in self.layers]}
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "NN":
+            """Deserialize the network from a dict.
+
+            Parameters
+            ----------
+            data : dict
+                The dict to deserialize from.
+            """
+            return cls(data["layers"])
+
+        def __getitem__(self, key):
+            assert isinstance(key, int)
+            return self.layers[key]
+
+        def __setitem__(self, key, value):
+            assert isinstance(key, int)
+            self.layers[key] = value
+
+        def check_shape_consistency(self):
+            for ii in range(len(self.layers) - 1):
+                if self.layers[ii].dim_out() != self.layers[ii + 1].dim_in():
+                    raise ValueError(
+                        f"the dim of layer {ii} output {self.layers[ii].dim_out} ",
+                        f"does not match the dim of layer {ii+1} ",
+                        f"output {self.layers[ii].dim_out}",
+                    )
+
+        def call(self, x):
+            """Forward pass.
+
+            Parameters
+            ----------
+            x : np.ndarray
+                The input.
+
+            Returns
+            -------
+            np.ndarray
+                The output.
+            """
+            for layer in self.layers:
+                x = layer(x)
+            return x
+
+    return NN
+
+
+NativeNet = make_multilayer_network(NativeLayer, NativeOP)
+
+
+def make_embedding_network(T_Network, T_NetworkLayer):
+    class EN(T_Network):
+        """The embedding network.
 
         Parameters
         ----------
-        x : np.ndarray
-            The input.
+        in_dim
+            Input dimension.
+        neuron
+            The number of neurons in each layer. The output dimension
+            is the same as the dimension of the last layer.
+        activation_function
+            The activation function.
+        resnet_dt
+            Use time step at the resnet architecture.
+        precision
+            Floating point precision for the model paramters.
 
-        Returns
-        -------
-        np.ndarray
-            The output.
         """
-        for layer in self.layers:
-            x = layer.call(x)
-        return x
-
-
-class EmbeddingNet(NativeNet):
-    """The embedding network.
-
-    Parameters
-    ----------
-    in_dim
-        Input dimension.
-    neuron
-        The number of neurons in each layer. The output dimension
-        is the same as the dimension of the last layer.
-    activation_function
-        The activation function.
-    resnet_dt
-        Use time step at the resnet architecture.
-    precision
-        Floating point precision for the model paramters.
-
-    """
-
-    def __init__(
-        self,
-        in_dim,
-        neuron: List[int] = [24, 48, 96],
-        activation_function: str = "tanh",
-        resnet_dt: bool = False,
-        precision: str = DEFAULT_PRECISION,
-    ):
-        layers = []
-        i_in = in_dim
-        rng = np.random.default_rng()
-        for idx, ii in enumerate(neuron):
-            i_ot = ii
-            layers.append(
-                NativeLayer(
-                    i_in,
-                    i_ot,
-                    bias=True,
-                    use_timestep=resnet_dt,
-                    activation_function=activation_function,
-                    resnet=True,
-                    precision=precision,
-                ).serialize()
-            )
-            i_in = i_ot
-        super().__init__(layers)
-        self.in_dim = in_dim
-        self.neuron = neuron
-        self.activation_function = activation_function
-        self.resnet_dt = resnet_dt
-        self.precision = precision
-
-    def serialize(self) -> dict:
-        """Serialize the network to a dict.
 
-        Returns
-        -------
-        dict
-            The serialized network.
-        """
-        return {
-            "in_dim": self.in_dim,
-            "neuron": self.neuron.copy(),
-            "activation_function": self.activation_function,
-            "resnet_dt": self.resnet_dt,
-            "precision": self.precision,
-            "layers": [layer.serialize() for layer in self.layers],
-        }
-
-    @classmethod
-    def deserialize(cls, data: dict) -> "EmbeddingNet":
-        """Deserialize the network from a dict.
+        def __init__(
+            self,
+            in_dim,
+            neuron: List[int] = [24, 48, 96],
+            activation_function: str = "tanh",
+            resnet_dt: bool = False,
+            precision: str = DEFAULT_PRECISION,
+        ):
+            layers = []
+            i_in = in_dim
+            for idx, ii in enumerate(neuron):
+                i_ot = ii
+                layers.append(
+                    T_NetworkLayer(
+                        i_in,
+                        i_ot,
+                        bias=True,
+                        use_timestep=resnet_dt,
+                        activation_function=activation_function,
+                        resnet=True,
+                        precision=precision,
+                    ).serialize()
+                )
+                i_in = i_ot
+            super().__init__(layers)
+            self.in_dim = in_dim
+            self.neuron = neuron
+            self.activation_function = activation_function
+            self.resnet_dt = resnet_dt
+            self.precision = precision
+
+        def serialize(self) -> dict:
+            """Serialize the network to a dict.
+
+            Returns
+            -------
+            dict
+                The serialized network.
+            """
+            return {
+                "in_dim": self.in_dim,
+                "neuron": self.neuron.copy(),
+                "activation_function": self.activation_function,
+                "resnet_dt": self.resnet_dt,
+                "precision": self.precision,
+                "layers": [layer.serialize() for layer in self.layers],
+            }
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "EmbeddingNet":
+            """Deserialize the network from a dict.
+
+            Parameters
+            ----------
+            data : dict
+                The dict to deserialize from.
+            """
+            data = copy.deepcopy(data)
+            layers = data.pop("layers")
+            obj = cls(**data)
+            super(EN, obj).__init__(layers)
+            return obj
+
+    return EN
+
+
+EmbeddingNet = make_embedding_network(NativeNet, NativeLayer)
+
+
+def make_fitting_network(T_EmbeddingNet, T_Network, T_NetworkLayer):
+    class FN(T_EmbeddingNet):
+        """The fitting network. It may be implemented as an embedding
+        net connected with a linear output layer.
 
         Parameters
         ----------
-        data : dict
-            The dict to deserialize from.
-        """
-        data = copy.deepcopy(data)
-        layers = data.pop("layers")
-        obj = cls(**data)
-        super(EmbeddingNet, obj).__init__(layers)
-        return obj
-
+        in_dim
+            Input dimension.
+        out_dim
+            Output dimension
+        neuron
+            The number of neurons in each hidden layer.
+        activation_function
+            The activation function.
+        resnet_dt
+            Use time step at the resnet architecture.
+        precision
+            Floating point precision for the model paramters.
+        bias_out
+            The last linear layer has bias.
 
-class FittingNet(EmbeddingNet):
-    """The fitting network. It may be implemented as an embedding
-    net connected with a linear output layer.
-
-    Parameters
-    ----------
-    in_dim
-        Input dimension.
-    out_dim
-        Output dimension
-    neuron
-        The number of neurons in each hidden layer.
-    activation_function
-        The activation function.
-    resnet_dt
-        Use time step at the resnet architecture.
-    precision
-        Floating point precision for the model paramters.
-    bias_out
-        The last linear layer has bias.
-
-    """
+        """
 
-    def __init__(
-        self,
-        in_dim,
-        out_dim,
-        neuron: List[int] = [24, 48, 96],
-        activation_function: str = "tanh",
-        resnet_dt: bool = False,
-        precision: str = DEFAULT_PRECISION,
-        bias_out: bool = True,
-    ):
-        super().__init__(
+        def __init__(
+            self,
             in_dim,
-            neuron=neuron,
-            activation_function=activation_function,
-            resnet_dt=resnet_dt,
-            precision=precision,
-        )
-        rng = np.random.default_rng()
-        i_in, i_ot = neuron[-1], out_dim
-        self.layers.append(
-            NativeLayer(
-                i_in,
-                i_ot,
-                bias=bias_out,
-                use_timestep=False,
-                activation_function=None,
-                resnet=False,
+            out_dim,
+            neuron: List[int] = [24, 48, 96],
+            activation_function: str = "tanh",
+            resnet_dt: bool = False,
+            precision: str = DEFAULT_PRECISION,
+            bias_out: bool = True,
+        ):
+            super().__init__(
+                in_dim,
+                neuron=neuron,
+                activation_function=activation_function,
+                resnet_dt=resnet_dt,
                 precision=precision,
             )
-        )
-        self.out_dim = out_dim
-        self.bias_out = bias_out
-
-    def serialize(self) -> dict:
-        """Serialize the network to a dict.
-
-        Returns
-        -------
-        dict
-            The serialized network.
-        """
-        return {
-            "in_dim": self.in_dim,
-            "out_dim": self.out_dim,
-            "neuron": self.neuron.copy(),
-            "activation_function": self.activation_function,
-            "resnet_dt": self.resnet_dt,
-            "precision": self.precision,
-            "bias_out": self.bias_out,
-            "layers": [layer.serialize() for layer in self.layers],
-        }
-
-    @classmethod
-    def deserialize(cls, data: dict) -> "FittingNet":
-        """Deserialize the network from a dict.
-
-        Parameters
-        ----------
-        data : dict
-            The dict to deserialize from.
-        """
-        data = copy.deepcopy(data)
-        layers = data.pop("layers")
-        obj = cls(**data)
-        NativeNet.__init__(obj, layers)
-        return obj
+            i_in, i_ot = neuron[-1], out_dim
+            self.layers.append(
+                T_NetworkLayer(
+                    i_in,
+                    i_ot,
+                    bias=bias_out,
+                    use_timestep=False,
+                    activation_function=None,
+                    resnet=False,
+                    precision=precision,
+                )
+            )
+            self.out_dim = out_dim
+            self.bias_out = bias_out
+
+        def serialize(self) -> dict:
+            """Serialize the network to a dict.
+
+            Returns
+            -------
+            dict
+                The serialized network.
+            """
+            return {
+                "in_dim": self.in_dim,
+                "out_dim": self.out_dim,
+                "neuron": self.neuron.copy(),
+                "activation_function": self.activation_function,
+                "resnet_dt": self.resnet_dt,
+                "precision": self.precision,
+                "bias_out": self.bias_out,
+                "layers": [layer.serialize() for layer in self.layers],
+            }
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "FittingNet":
+            """Deserialize the network from a dict.
+
+            Parameters
+            ----------
+            data : dict
+                The dict to deserialize from.
+            """
+            data = copy.deepcopy(data)
+            layers = data.pop("layers")
+            obj = cls(**data)
+            T_Network.__init__(obj, layers)
+            return obj
+
+    return FN
+
+
+FittingNet = make_fitting_network(EmbeddingNet, NativeNet, NativeLayer)
 
 
 class NetworkCollection:

From 2096b800d45176f1facc1d1ceca73f3f54f24f62 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 13 Jan 2024 07:18:25 -0500
Subject: [PATCH 81/97] move utility to `deepmd_utils` (without modifaction)
 (#3140)

Move framework-independent codes to the `deepmd_utils` module without
modification, as a step of
https://github.com/deepmodeling/deepmd-kit/issues/3118.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/common.py                     |  271 +---
 deepmd/env.py                        |   24 +-
 deepmd/loggers/__init__.py           |    4 +-
 deepmd/loggers/loggers.py            |  276 +---
 deepmd/model/model_stat.py           |   75 +-
 deepmd/nvnmd/utils/argcheck.py       |   73 +-
 deepmd/utils/argcheck.py             | 2041 +-------------------------
 deepmd/utils/compat.py               |  401 +----
 deepmd/utils/data.py                 |  617 +-------
 deepmd/utils/data_system.py          |  665 +--------
 deepmd/utils/pair_tab.py             |   94 +-
 deepmd/utils/path.py                 |  369 +----
 deepmd/utils/plugin.py               |  104 +-
 deepmd/utils/random.py               |   76 +-
 deepmd/utils/weight_avg.py           |   51 +-
 deepmd_utils/common.py               |  270 ++++
 deepmd_utils/env.py                  |   28 +
 deepmd_utils/loggers/__init__.py     |    8 +
 deepmd_utils/loggers/loggers.py      |  277 ++++
 deepmd_utils/utils/__init__.py       |    3 +
 deepmd_utils/utils/argcheck.py       | 2028 +++++++++++++++++++++++++
 deepmd_utils/utils/argcheck_nvnmd.py |   70 +
 deepmd_utils/utils/compat.py         |  392 +++++
 deepmd_utils/utils/data.py           |  614 ++++++++
 deepmd_utils/utils/data_system.py    |  654 +++++++++
 deepmd_utils/utils/model_stat.py     |   68 +
 deepmd_utils/utils/pair_tab.py       |   91 ++
 deepmd_utils/utils/path.py           |  358 +++++
 deepmd_utils/utils/plugin.py         |   95 ++
 deepmd_utils/utils/random.py         |   67 +
 deepmd_utils/utils/weight_avg.py     |   48 +
 31 files changed, 5227 insertions(+), 4985 deletions(-)
 create mode 100644 deepmd_utils/common.py
 create mode 100644 deepmd_utils/env.py
 create mode 100644 deepmd_utils/loggers/__init__.py
 create mode 100644 deepmd_utils/loggers/loggers.py
 create mode 100644 deepmd_utils/utils/__init__.py
 create mode 100644 deepmd_utils/utils/argcheck.py
 create mode 100644 deepmd_utils/utils/argcheck_nvnmd.py
 create mode 100644 deepmd_utils/utils/compat.py
 create mode 100644 deepmd_utils/utils/data.py
 create mode 100644 deepmd_utils/utils/data_system.py
 create mode 100644 deepmd_utils/utils/model_stat.py
 create mode 100644 deepmd_utils/utils/pair_tab.py
 create mode 100644 deepmd_utils/utils/path.py
 create mode 100644 deepmd_utils/utils/plugin.py
 create mode 100644 deepmd_utils/utils/random.py
 create mode 100644 deepmd_utils/utils/weight_avg.py

diff --git a/deepmd/common.py b/deepmd/common.py
index 472508bb08..54e3d0a6f8 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -1,53 +1,65 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Collection of functions and classes used throughout the whole package."""
 
-import json
 import warnings
 from functools import (
     wraps,
 )
-from pathlib import (
-    Path,
-)
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
-    List,
-    Optional,
-    TypeVar,
     Union,
 )
 
-import numpy as np
 import tensorflow
-import yaml
 from tensorflow.python.framework import (
     tensor_util,
 )
 
 from deepmd.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
-from deepmd.utils.path import (
-    DPPath,
+from deepmd_utils.common import (
+    add_data_requirement,
+    data_requirement,
+    expand_sys_str,
+    get_np_precision,
+    j_loader,
+    j_must_have,
+    make_default_mesh,
+    select_idx_map,
 )
 
 if TYPE_CHECKING:
-    _DICT_VAL = TypeVar("_DICT_VAL")
-    _OBJ = TypeVar("_OBJ")
-    try:
-        from typing import Literal  # python >3.6
-    except ImportError:
-        from typing_extensions import Literal  # type: ignore
-    _ACTIVATION = Literal[
-        "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"
-    ]
-    _PRECISION = Literal["default", "float16", "float32", "float64"]
+    from deepmd_utils.common import (
+        _ACTIVATION,
+        _PRECISION,
+    )
+
+__all__ = [
+    # from deepmd_utils.common
+    "data_requirement",
+    "add_data_requirement",
+    "select_idx_map",
+    "make_default_mesh",
+    "j_must_have",
+    "j_loader",
+    "expand_sys_str",
+    "get_np_precision",
+    # from self
+    "PRECISION_DICT",
+    "gelu",
+    "gelu_tf",
+    "ACTIVATION_FN_DICT",
+    "get_activation_func",
+    "get_precision",
+    "safe_cast_tensor",
+    "cast_precision",
+    "clear_session",
+]
 
 # define constants
 PRECISION_DICT = {
@@ -115,10 +127,6 @@ def gelu_wrapper(x):
     return (lambda x: gelu_wrapper(x))(x)
 
 
-# TODO this is not a good way to do things. This is some global variable to which
-# TODO anyone can write and there is no good way to keep track of the changes
-data_requirement = {}
-
 ACTIVATION_FN_DICT = {
     "relu": tf.nn.relu,
     "relu6": tf.nn.relu6,
@@ -132,164 +140,6 @@ def gelu_wrapper(x):
 }
 
 
-def add_data_requirement(
-    key: str,
-    ndof: int,
-    atomic: bool = False,
-    must: bool = False,
-    high_prec: bool = False,
-    type_sel: Optional[bool] = None,
-    repeat: int = 1,
-    default: float = 0.0,
-    dtype: Optional[np.dtype] = None,
-):
-    """Specify data requirements for training.
-
-    Parameters
-    ----------
-    key : str
-        type of data stored in corresponding `*.npy` file e.g. `forces` or `energy`
-    ndof : int
-        number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces
-        have `atomic=True` and `ndof=3`
-    atomic : bool, optional
-        specifies whwther the `ndof` keyworrd applies to per atom quantity or not,
-        by default False
-    must : bool, optional
-        specifi if the `*.npy` data file must exist, by default False
-    high_prec : bool, optional
-        if true load data to `np.float64` else `np.float32`, by default False
-    type_sel : bool, optional
-        select only certain type of atoms, by default None
-    repeat : int, optional
-        if specify repaeat data `repeat` times, by default 1
-    default : float, optional, default=0.
-        default value of data
-    dtype : np.dtype, optional
-        the dtype of data, overwrites `high_prec` if provided
-    """
-    data_requirement[key] = {
-        "ndof": ndof,
-        "atomic": atomic,
-        "must": must,
-        "high_prec": high_prec,
-        "type_sel": type_sel,
-        "repeat": repeat,
-        "default": default,
-        "dtype": dtype,
-    }
-
-
-def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
-    """Build map of indices for element supplied element types from all atoms list.
-
-    Parameters
-    ----------
-    atom_types : np.ndarray
-        array specifing type for each atoms as integer
-    select_types : np.ndarray
-        types of atoms you want to find indices for
-
-    Returns
-    -------
-    np.ndarray
-        indices of types of atoms defined by `select_types` in `atom_types` array
-
-    Warnings
-    --------
-    `select_types` array will be sorted before finding indices in `atom_types`
-    """
-    sort_select_types = np.sort(select_types)
-    idx_map = []
-    for ii in sort_select_types:
-        idx_map.append(np.where(atom_types == ii)[0])
-    return np.concatenate(idx_map)
-
-
-def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
-    """Make mesh.
-
-    Only the size of mesh matters, not the values:
-    * 6 for PBC, no mixed types
-    * 0 for no PBC, no mixed types
-    * 7 for PBC, mixed types
-    * 1 for no PBC, mixed types
-
-    Parameters
-    ----------
-    pbc : bool
-        if True, the mesh will be made for periodic boundary conditions
-    mixed_type : bool
-        if True, the mesh will be made for mixed types
-
-    Returns
-    -------
-    np.ndarray
-        mesh
-    """
-    mesh_size = int(pbc) * 6 + int(mixed_type)
-    default_mesh = np.zeros(mesh_size, dtype=np.int32)
-    return default_mesh
-
-
-# TODO maybe rename this to j_deprecated and only warn about deprecated keys,
-# TODO if the deprecated_key argument is left empty function puppose is only custom
-# TODO error since dict[key] already raises KeyError when the key is missing
-def j_must_have(
-    jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
-) -> "_DICT_VAL":
-    """Assert that supplied dictionary conaines specified key.
-
-    Returns
-    -------
-    _DICT_VAL
-        value that was store unde supplied key
-
-    Raises
-    ------
-    RuntimeError
-        if the key is not present
-    """
-    if key not in jdata.keys():
-        for ii in deprecated_key:
-            if ii in jdata.keys():
-                warnings.warn(f"the key {ii} is deprecated, please use {key} instead")
-                return jdata[ii]
-        else:
-            raise RuntimeError(f"json database must provide key {key}")
-    else:
-        return jdata[key]
-
-
-def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
-    """Load yaml or json settings file.
-
-    Parameters
-    ----------
-    filename : Union[str, Path]
-        path to file
-
-    Returns
-    -------
-    Dict[str, Any]
-        loaded dictionary
-
-    Raises
-    ------
-    TypeError
-        if the supplied file is of unsupported type
-    """
-    filepath = Path(filename)
-    if filepath.suffix.endswith("json"):
-        with filepath.open() as fp:
-            return json.load(fp)
-    elif filepath.suffix.endswith(("yml", "yaml")):
-        with filepath.open() as fp:
-            return yaml.safe_load(fp)
-    else:
-        raise TypeError("config file must be json, or yaml/yml")
-
-
 def get_activation_func(
     activation_fn: Union["_ACTIVATION", None],
 ) -> Union[Callable[[tf.Tensor], tf.Tensor], None]:
@@ -340,57 +190,6 @@ def get_precision(precision: "_PRECISION") -> Any:
     return PRECISION_DICT[precision]
 
 
-# TODO port completely to pathlib when all callers are ported
-def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
-    """Recursively iterate over directories taking those that contain `type.raw` file.
-
-    Parameters
-    ----------
-    root_dir : Union[str, Path]
-        starting directory
-
-    Returns
-    -------
-    List[str]
-        list of string pointing to system directories
-    """
-    root_dir = DPPath(root_dir)
-    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
-    if (root_dir / "type.raw").is_file():
-        matches.append(str(root_dir))
-    return matches
-
-
-def get_np_precision(precision: "_PRECISION") -> np.dtype:
-    """Get numpy precision constant from string.
-
-    Parameters
-    ----------
-    precision : _PRECISION
-        string name of numpy constant or default
-
-    Returns
-    -------
-    np.dtype
-        numpy presicion constant
-
-    Raises
-    ------
-    RuntimeError
-        if string is invalid
-    """
-    if precision == "default":
-        return GLOBAL_NP_FLOAT_PRECISION
-    elif precision == "float16":
-        return np.float16
-    elif precision == "float32":
-        return np.float32
-    elif precision == "float64":
-        return np.float64
-    else:
-        raise RuntimeError(f"{precision} is not a valid precision")
-
-
 def safe_cast_tensor(
     input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType
 ) -> tf.Tensor:
diff --git a/deepmd/env.py b/deepmd/env.py
index 9b7f86f0d5..f290dc0a90 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -28,6 +28,11 @@
 )
 
 import deepmd.lib
+from deepmd_utils.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    global_float_prec,
+)
 
 if TYPE_CHECKING:
     from types import (
@@ -475,24 +480,7 @@ def _get_package_constants(
 op_grads_module = get_module("op_grads")
 
 # FLOAT_PREC
-dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
-if dp_float_prec in ("high", ""):
-    # default is high
-    GLOBAL_TF_FLOAT_PRECISION = tf.float64
-    GLOBAL_NP_FLOAT_PRECISION = np.float64
-    GLOBAL_ENER_FLOAT_PRECISION = np.float64
-    global_float_prec = "double"
-elif dp_float_prec == "low":
-    GLOBAL_TF_FLOAT_PRECISION = tf.float32
-    GLOBAL_NP_FLOAT_PRECISION = np.float32
-    GLOBAL_ENER_FLOAT_PRECISION = np.float64
-    global_float_prec = "float"
-else:
-    raise RuntimeError(
-        "Unsupported float precision option: %s. Supported: high,"
-        "low. Please set precision with environmental variable "
-        "DP_INTERFACE_PREC." % dp_float_prec
-    )
+GLOBAL_TF_FLOAT_PRECISION = tf.dtypes.as_dtype(GLOBAL_NP_FLOAT_PRECISION)
 
 
 def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor:
diff --git a/deepmd/loggers/__init__.py b/deepmd/loggers/__init__.py
index 39aa76139d..71057e3056 100644
--- a/deepmd/loggers/__init__.py
+++ b/deepmd/loggers/__init__.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module taking care of logging duties."""
+"""Alias of deepmd_utils.loggers for backward compatibility."""
 
-from .loggers import (
+from deepmd_utils.loggers.loggers import (
     set_log_handles,
 )
 
diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py
index 015581f6bd..74ca7de63e 100644
--- a/deepmd/loggers/loggers.py
+++ b/deepmd/loggers/loggers.py
@@ -1,277 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Logger initialization for package."""
-
-import logging
-import os
-from typing import (
-    TYPE_CHECKING,
-    Optional,
+"""Alias of deepmd_utils.loggers.loggers for backward compatibility."""
+from deepmd_utils.loggers.loggers import (
+    set_log_handles,
 )
 
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
-    from mpi4py import (
-        MPI,
-    )
-
-    _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND
-
-logging.getLogger(__name__)
-
 __all__ = ["set_log_handles"]
-
-# logger formater
-FFORMATTER = logging.Formatter(
-    "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
-)
-CFORMATTER = logging.Formatter(
-    #    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
-    "%(app_name)s %(levelname)-7s %(message)s"
-)
-FFORMATTER_MPI = logging.Formatter(
-    "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s"
-)
-CFORMATTER_MPI = logging.Formatter(
-    #    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
-    "%(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s"
-)
-
-
-class _AppFilter(logging.Filter):
-    """Add field `app_name` to log messages."""
-
-    def filter(self, record):
-        record.app_name = "DEEPMD"
-        return True
-
-
-class _MPIRankFilter(logging.Filter):
-    """Add MPI rank number to log messages, adds field `rank`."""
-
-    def __init__(self, rank: int) -> None:
-        super().__init__(name="MPI_rank_id")
-        self.mpi_rank = str(rank)
-
-    def filter(self, record):
-        record.rank = self.mpi_rank
-        return True
-
-
-class _MPIMasterFilter(logging.Filter):
-    """Filter that lets through only messages emited from rank==0."""
-
-    def __init__(self, rank: int) -> None:
-        super().__init__(name="MPI_master_log")
-        self.mpi_rank = rank
-
-    def filter(self, record):
-        if self.mpi_rank == 0:
-            return True
-        else:
-            return False
-
-
-class _MPIFileStream:
-    """Wrap MPI.File` so it has the same API as python file streams.
-
-    Parameters
-    ----------
-    filename : Path
-        disk location of the file stream
-    MPI : MPI
-        MPI communicator object
-    mode : str, optional
-        file write mode, by default _MPI_APPEND_MODE
-    """
-
-    def __init__(
-        self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE"
-    ) -> None:
-        self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode)
-        self.stream.Set_atomicity(True)
-        self.name = "MPIfilestream"
-
-    def write(self, msg: str):
-        """Write to MPI shared file stream.
-
-        Parameters
-        ----------
-        msg : str
-            message to write
-        """
-        b = bytearray()
-        b.extend(map(ord, msg))
-        self.stream.Write_shared(b)
-
-    def close(self):
-        """Synchronize and close MPI file stream."""
-        self.stream.Sync()
-        self.stream.Close()
-
-
-class _MPIHandler(logging.FileHandler):
-    """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to.
-
-    Parameters
-    ----------
-    filename : Path
-        file path
-    MPI : MPI
-        MPI communicator object
-    mode : str, optional
-        file access mode, by default "_MPI_APPEND_MODE"
-    """
-
-    def __init__(
-        self,
-        filename: "Path",
-        MPI: "MPI",
-        mode: str = "_MPI_APPEND_MODE",
-    ) -> None:
-        self.MPI = MPI
-        super().__init__(filename, mode=mode, encoding=None, delay=False)
-
-    def _open(self):
-        return _MPIFileStream(self.baseFilename, self.MPI, self.mode)
-
-    def setStream(self, stream):
-        """Stream canot be reasigned in MPI mode."""
-        raise NotImplementedError("Unable to do for MPI file handler!")
-
-
-def set_log_handles(
-    level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None
-):
-    """Set desired level for package loggers and add file handlers.
-
-    Parameters
-    ----------
-    level : int
-        logging level
-    log_path : Optional[str]
-        path to log file, if None logs will be send only to console. If the parent
-        directory does not exist it will be automatically created, by default None
-    mpi_log : Optional[str], optional
-        mpi log type. Has three options. `master` will output logs to file and console
-        only from rank==0. `collect` will write messages from all ranks to one file
-        opened under rank==0 and to console. `workers` will open one log file for each
-        worker designated by its rank, console behaviour is the same as for `collect`.
-        If this argument is specified, package 'mpi4py' must be already installed.
-        by default None
-
-    Raises
-    ------
-    RuntimeError
-        If the argument `mpi_log` is specified, package `mpi4py` is not installed.
-
-    References
-    ----------
-    https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U
-    https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error
-    https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu
-
-    Notes
-    -----
-    Logging levels:
-
-    +---------+--------------+----------------+----------------+----------------+
-    |         | our notation | python logging | tensorflow cpp | OpenMP         |
-    +=========+==============+================+================+================+
-    | debug   | 10           | 10             | 0              | 1/on/true/yes  |
-    +---------+--------------+----------------+----------------+----------------+
-    | info    | 20           | 20             | 1              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-    | warning | 30           | 30             | 2              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-    | error   | 40           | 40             | 3              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-
-    """
-    # silence logging for OpenMP when running on CPU if level is any other than debug
-    if level <= 10:
-        os.environ["KMP_WARNINGS"] = "FALSE"
-
-    # set TF cpp internal logging level
-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1))
-
-    # get root logger
-    root_log = logging.getLogger("deepmd")
-    root_log.propagate = False
-
-    root_log.setLevel(level)
-
-    # check if arguments are present
-    MPI = None
-    if mpi_log:
-        try:
-            from mpi4py import (
-                MPI,
-            )
-        except ImportError as e:
-            raise RuntimeError(
-                "You cannot specify 'mpi_log' when mpi4py not installed"
-            ) from e
-
-    # * add console handler ************************************************************
-    ch = logging.StreamHandler()
-    if MPI:
-        rank = MPI.COMM_WORLD.Get_rank()
-        if mpi_log == "master":
-            ch.setFormatter(CFORMATTER)
-            ch.addFilter(_MPIMasterFilter(rank))
-        else:
-            ch.setFormatter(CFORMATTER_MPI)
-            ch.addFilter(_MPIRankFilter(rank))
-    else:
-        ch.setFormatter(CFORMATTER)
-
-    ch.setLevel(level)
-    ch.addFilter(_AppFilter())
-    # clean old handlers before adding new one
-    root_log.handlers.clear()
-    root_log.addHandler(ch)
-
-    # * add file handler ***************************************************************
-    if log_path:
-        # create directory
-        log_path.parent.mkdir(exist_ok=True, parents=True)
-
-        fh = None
-
-        if mpi_log == "master":
-            rank = MPI.COMM_WORLD.Get_rank()
-            if rank == 0:
-                fh = logging.FileHandler(log_path, mode="w")
-                fh.addFilter(_MPIMasterFilter(rank))
-                fh.setFormatter(FFORMATTER)
-        elif mpi_log == "collect":
-            rank = MPI.COMM_WORLD.Get_rank()
-            fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE)
-            fh.addFilter(_MPIRankFilter(rank))
-            fh.setFormatter(FFORMATTER_MPI)
-        elif mpi_log == "workers":
-            rank = MPI.COMM_WORLD.Get_rank()
-            # if file has suffix than inser rank number before suffix
-            # e.g deepmd.log -> deepmd_<rank>.log
-            # if no suffix is present, insert rank as suffix
-            # e.g. deepmdlog -> deepmdlog.<rank>
-            if log_path.suffix:
-                worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix(
-                    log_path.suffix
-                )
-            else:
-                worker_log = log_path.with_suffix(f".{rank}")
-
-            fh = logging.FileHandler(worker_log, mode="w")
-            fh.setFormatter(FFORMATTER)
-        else:
-            fh = logging.FileHandler(log_path, mode="w")
-            fh.setFormatter(FFORMATTER)
-
-        if fh:
-            fh.setLevel(level)
-            fh.addFilter(_AppFilter())
-            root_log.addHandler(fh)
diff --git a/deepmd/model/model_stat.py b/deepmd/model/model_stat.py
index d2cc918b64..933a634ce8 100644
--- a/deepmd/model/model_stat.py
+++ b/deepmd/model/model_stat.py
@@ -1,68 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from collections import (
-    defaultdict,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.model_stat import (
+    _make_all_stat_ref,
+    make_stat_input,
+    merge_sys_stat,
 )
 
-import numpy as np
-
-
-def _make_all_stat_ref(data, nbatches):
-    all_stat = defaultdict(list)
-    for ii in range(data.get_nsystems()):
-        for jj in range(nbatches):
-            stat_data = data.get_batch(sys_idx=ii)
-            for dd in stat_data:
-                if dd == "natoms_vec":
-                    stat_data[dd] = stat_data[dd].astype(np.int32)
-                all_stat[dd].append(stat_data[dd])
-    return all_stat
-
-
-def make_stat_input(data, nbatches, merge_sys=True):
-    """Pack data for statistics.
-
-    Parameters
-    ----------
-    data
-        The data
-    nbatches : int
-        The number of batches
-    merge_sys : bool (True)
-        Merge system data
-
-    Returns
-    -------
-    all_stat:
-        A dictionary of list of list storing data for stat.
-        if merge_sys == False data can be accessed by
-            all_stat[key][sys_idx][batch_idx][frame_idx]
-        else merge_sys == True can be accessed by
-            all_stat[key][batch_idx][frame_idx]
-    """
-    all_stat = defaultdict(list)
-    for ii in range(data.get_nsystems()):
-        sys_stat = defaultdict(list)
-        for jj in range(nbatches):
-            stat_data = data.get_batch(sys_idx=ii)
-            for dd in stat_data:
-                if dd == "natoms_vec":
-                    stat_data[dd] = stat_data[dd].astype(np.int32)
-                sys_stat[dd].append(stat_data[dd])
-        for dd in sys_stat:
-            if merge_sys:
-                for bb in sys_stat[dd]:
-                    all_stat[dd].append(bb)
-            else:
-                all_stat[dd].append(sys_stat[dd])
-    return all_stat
-
-
-def merge_sys_stat(all_stat):
-    first_key = next(iter(all_stat.keys()))
-    nsys = len(all_stat[first_key])
-    ret = defaultdict(list)
-    for ii in range(nsys):
-        for dd in all_stat:
-            for bb in all_stat[dd][ii]:
-                ret[dd].append(bb)
-    return ret
+__all__ = [
+    "make_stat_input",
+    "merge_sys_stat",
+    "_make_all_stat_ref",  # used by tests
+]
diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/nvnmd/utils/argcheck.py
index 2dc17ebc27..2b9362efb0 100644
--- a/deepmd/nvnmd/utils/argcheck.py
+++ b/deepmd/nvnmd/utils/argcheck.py
@@ -1,70 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from dargs import (
-    Argument,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.argcheck_nvnmd import (
+    nvnmd_args,
 )
 
-
-def nvnmd_args():
-    doc_version = (
-        "configuration the nvnmd version (0 | 1), 0 for 4 types, 1 for 32 types"
-    )
-    doc_max_nnei = "configuration the max number of neighbors, 128|256 for version 0, 128 for version 1"
-    doc_net_size_file = (
-        "configuration the number of nodes of fitting_net, just can be set as 128"
-    )
-    doc_map_file = "A file containing the mapping tables to replace the calculation of embedding nets"
-    doc_config_file = "A file containing the parameters about how to implement the model in certain hardware"
-    doc_weight_file = "a *.npy file containing the weights of the model"
-    doc_enable = "enable the nvnmd training"
-    doc_restore_descriptor = (
-        "enable to restore the parameter of embedding_net from weight.npy"
-    )
-    doc_restore_fitting_net = (
-        "enable to restore the parameter of fitting_net from weight.npy"
-    )
-    doc_quantize_descriptor = "enable the quantizatioin of descriptor"
-    doc_quantize_fitting_net = "enable the quantizatioin of fitting_net"
-    args = [
-        Argument("version", int, optional=False, default=0, doc=doc_version),
-        Argument("max_nnei", int, optional=False, default=128, doc=doc_max_nnei),
-        Argument("net_size", int, optional=False, default=128, doc=doc_net_size_file),
-        Argument("map_file", str, optional=False, default="none", doc=doc_map_file),
-        Argument(
-            "config_file", str, optional=False, default="none", doc=doc_config_file
-        ),
-        Argument(
-            "weight_file", str, optional=False, default="none", doc=doc_weight_file
-        ),
-        Argument("enable", bool, optional=False, default=False, doc=doc_enable),
-        Argument(
-            "restore_descriptor",
-            bool,
-            optional=False,
-            default=False,
-            doc=doc_restore_descriptor,
-        ),
-        Argument(
-            "restore_fitting_net",
-            bool,
-            optional=False,
-            default=False,
-            doc=doc_restore_fitting_net,
-        ),
-        Argument(
-            "quantize_descriptor",
-            bool,
-            optional=False,
-            default=False,
-            doc=doc_quantize_descriptor,
-        ),
-        Argument(
-            "quantize_fitting_net",
-            bool,
-            optional=False,
-            default=False,
-            doc=doc_quantize_fitting_net,
-        ),
-    ]
-
-    doc_nvnmd = "The nvnmd options."
-    return Argument("nvnmd", dict, args, [], optional=True, doc=doc_nvnmd)
+__all__ = [
+    "nvnmd_args",
+]
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 2c1d235801..05e7c767b8 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1,2028 +1,19 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import json
-import logging
-from typing import (
-    Callable,
-    List,
-    Optional,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.argcheck import (
+    gen_args,
+    gen_doc,
+    gen_json,
+    list_to_doc,
+    normalize,
+    type_embedding_args,
 )
 
-from dargs import (
-    Argument,
-    ArgumentEncoder,
-    Variant,
-    dargs,
-)
-
-from deepmd.common import (
-    ACTIVATION_FN_DICT,
-    PRECISION_DICT,
-)
-from deepmd.nvnmd.utils.argcheck import (
-    nvnmd_args,
-)
-from deepmd.utils.plugin import (
-    Plugin,
-)
-
-log = logging.getLogger(__name__)
-
-
-def list_to_doc(xx):
-    items = []
-    for ii in xx:
-        if len(items) == 0:
-            items.append(f'"{ii}"')
-        else:
-            items.append(f', "{ii}"')
-    items.append(".")
-    return "".join(items)
-
-
-def make_link(content, ref_key):
-    return (
-        f"`{content} <{ref_key}_>`_"
-        if not dargs.RAW_ANCHOR
-        else f"`{content} <#{ref_key}>`_"
-    )
-
-
-def type_embedding_args():
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_seed = "Random seed for parameter initialization"
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-
-    return [
-        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, default=None, doc=doc_seed),
-    ]
-
-
-def spin_args():
-    doc_use_spin = "Whether to use atomic spin model for each atom type"
-    doc_spin_norm = "The magnitude of atomic spin for each atom type with spin"
-    doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
-
-    return [
-        Argument("use_spin", List[bool], doc=doc_use_spin),
-        Argument("spin_norm", List[float], doc=doc_spin_norm),
-        Argument("virtual_len", List[float], doc=doc_virtual_len),
-    ]
-
-
-#  --- Descriptor configurations: --- #
-
-
-class ArgsPlugin:
-    def __init__(self) -> None:
-        self.__plugin = Plugin()
-
-    def register(
-        self, name: str, alias: Optional[List[str]] = None
-    ) -> Callable[[], List[Argument]]:
-        """Register a descriptor argument plugin.
-
-        Parameters
-        ----------
-        name : str
-            the name of a descriptor
-        alias : List[str], optional
-            the list of aliases of this descriptor
-
-        Returns
-        -------
-        Callable[[], List[Argument]]
-            the registered descriptor argument method
-
-        Examples
-        --------
-        >>> some_plugin = ArgsPlugin()
-        >>> @some_plugin.register("some_descrpt")
-            def descrpt_some_descrpt_args():
-                return []
-        """
-        # convert alias to hashed item
-        if isinstance(alias, list):
-            alias = tuple(alias)
-        return self.__plugin.register((name, alias))
-
-    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
-        """Get all arguments.
-
-        Parameters
-        ----------
-        exclude_hybrid : bool
-            exclude hybrid descriptor to prevent circular calls
-
-        Returns
-        -------
-        List[Argument]
-            all arguments
-        """
-        arguments = []
-        for (name, alias), metd in self.__plugin.plugins.items():
-            if exclude_hybrid and name == "hybrid":
-                continue
-            arguments.append(
-                Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias)
-            )
-        return arguments
-
-
-descrpt_args_plugin = ArgsPlugin()
-
-
-@descrpt_args_plugin.register("loc_frame")
-def descrpt_local_frame_args():
-    doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
-    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
-    doc_rcut = "The cut-off radius. The default value is 6.0"
-    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
-- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
-- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
-- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
-- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
-- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
-- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
-
-    return [
-        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
-        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
-    ]
-
-
-@descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
-def descrpt_se_a_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"])
-def descrpt_se_t_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"])
-def descrpt_se_a_tpe_args():
-    doc_type_nchanl = "number of channels for type embedding"
-    doc_type_nlayer = "number of hidden layers of type embedding net"
-    doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded."
-
-    return [
-        *descrpt_se_a_args(),
-        Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl),
-        Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-    ]
-
-
-@descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
-def descrpt_se_r_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("hybrid")
-def descrpt_hybrid_args():
-    doc_list = "A list of descriptor definitions"
-
-    return [
-        Argument(
-            "list",
-            list,
-            optional=False,
-            doc=doc_list,
-            repeat=True,
-            sub_fields=[],
-            sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)],
-            fold_subdoc=True,
-        )
-    ]
-
-
-def descrpt_se_atten_common_args():
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
-    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_attn = "The length of hidden vectors in attention layers"
-    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True"
-    doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
-    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
-
-    return [
-        Argument(
-            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
-        ),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument("attn", int, optional=True, default=128, doc=doc_attn),
-        Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
-        Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
-        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
-    ]
-
-
-@descrpt_args_plugin.register("se_atten")
-def descrpt_se_atten_args():
-    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
-    doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
-
-    return [
-        *descrpt_se_atten_common_args(),
-        Argument(
-            "stripped_type_embedding",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_stripped_type_embedding,
-        ),
-        Argument(
-            "smooth_type_embdding",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_smooth_type_embdding,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_atten_v2")
-def descrpt_se_atten_v2_args():
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
-
-    return [
-        *descrpt_se_atten_common_args(),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"])
-def descrpt_se_a_ebd_v2_args():
-    return descrpt_se_a_args()
-
-
-@descrpt_args_plugin.register("se_a_mask")
-def descrpt_se_a_mask_args():
-    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
-    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
-    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
-    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
-    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
-    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
-    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
-    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
-    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
-    doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
-- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
-- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
-- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
-- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
-- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
-- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
-- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
-- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
-- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
-
-    return Variant(
-        "type",
-        descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid),
-        doc=doc_descrpt_type,
-    )
-
-
-#  --- Fitting net configurations: --- #
-fitting_args_plugin = ArgsPlugin()
-
-
-@fitting_args_plugin.register("ener")
-def fitting_ener():
-    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
-    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
-- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
-    doc_layer_name = (
-        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
-        "If two layers, either in the same fitting or different fittings, "
-        "have the same name, they will share the same neural network parameters. "
-        "The shape of these layers should be the same. "
-        "If null is given for a layer, parameters will not be shared."
-    )
-    doc_use_aparam_as_mask = (
-        "Whether to use the aparam as a mask in input."
-        "If True, the aparam will not be used in fitting net for embedding."
-        "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True."
-    )
-
-    return [
-        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument(
-            "trainable",
-            [List[bool], bool],
-            optional=True,
-            default=True,
-            doc=doc_trainable,
-        ),
-        Argument(
-            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "atom_ener",
-            List[Optional[float]],
-            optional=True,
-            default=[],
-            doc=doc_atom_ener,
-        ),
-        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
-        Argument(
-            "use_aparam_as_mask",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_use_aparam_as_mask,
-        ),
-    ]
-
-
-@fitting_args_plugin.register("dos")
-def fitting_dos():
-    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
-    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
-- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    doc_numb_dos = (
-        "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)"
-    )
-
-    return [
-        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-        Argument(
-            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("precision", str, optional=True, default="float64", doc=doc_precision),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument(
-            "trainable",
-            [List[bool], bool],
-            optional=True,
-            default=True,
-            doc=doc_trainable,
-        ),
-        Argument(
-            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos),
-    ]
-
-
-@fitting_args_plugin.register("polar")
-def fitting_polar():
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``"
-    # doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
-    doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix."
-    doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-
-    # YWolfeee: user can decide whether to use shift diag
-    doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true."
-
-    return [
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
-        Argument(
-            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
-        ),
-        # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
-        Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
-        Argument(
-            "sel_type",
-            [List[int], int, None],
-            optional=True,
-            alias=["pol_type"],
-            doc=doc_sel_type,
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-# def fitting_global_polar():
-#    return fitting_polar()
-
-
-@fitting_args_plugin.register("dipole")
-def fitting_dipole():
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    return [
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument(
-            "sel_type",
-            [List[int], int, None],
-            optional=True,
-            alias=["dipole_type"],
-            doc=doc_sel_type,
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-#   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
-def fitting_variant_type_args():
-    doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\
-- `ener`: Fit an energy model (potential energy surface).\n\n\
-- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\
-- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
-- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
-
-    return Variant(
-        "type",
-        fitting_args_plugin.get_all_argument(),
-        optional=True,
-        default_tag="ener",
-        doc=doc_descrpt_type,
-    )
-
-
-#  --- Modifier configurations: --- #
-def modifier_dipole_charge():
-    doc_model_name = "The name of the frozen dipole model file."
-    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
-    doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
-    doc_ewald_h = "The grid spacing of the FFT grid. Unit is A"
-    doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
-
-    return [
-        Argument("model_name", str, optional=False, doc=doc_model_name),
-        Argument(
-            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
-        ),
-        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
-        Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
-        Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
-    ]
-
-
-def modifier_variant_type_args():
-    doc_modifier_type = "The type of modifier. See explanation below.\n\n\
--`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
-    return Variant(
-        "type",
-        [
-            Argument("dipole_charge", dict, modifier_dipole_charge()),
-        ],
-        optional=False,
-        doc=doc_modifier_type,
-    )
-
-
-#  --- model compression configurations: --- #
-def model_compression():
-    doc_model_file = "The input model file, which will be compressed by the DeePMD-kit."
-    doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
-    doc_min_nbor_dist = (
-        "The nearest distance between neighbor atoms saved in the frozen model."
-    )
-
-    return [
-        Argument("model_file", str, optional=False, doc=doc_model_file),
-        Argument("table_config", List[float], optional=False, doc=doc_table_config),
-        Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
-    ]
-
-
-#  --- model compression configurations: --- #
-def model_compression_type_args():
-    doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
-
-    return Variant(
-        "type",
-        [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])],
-        optional=True,
-        default_tag="se_e2_a",
-        doc=doc_compress_type,
-    )
-
-
-def model_args(exclude_hybrid=False):
-    doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
-    doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
-    doc_data_stat_protect = "Protect parameter for atomic energy regression."
-    doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
-    doc_type_embedding = "The type embedding."
-    doc_modifier = "The modifier of model output."
-    doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
-    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
-    doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
-    doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
-    doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided."
-    doc_compress_config = "Model compression configurations"
-    doc_spin = "The settings for systems with spin."
-    hybrid_models = []
-    if not exclude_hybrid:
-        hybrid_models.extend(
-            [
-                pairwise_dprc(),
-                linear_ener_model_args(),
-            ]
-        )
-    return Argument(
-        "model",
-        dict,
-        [
-            Argument("type_map", List[str], optional=True, doc=doc_type_map),
-            Argument(
-                "data_stat_nbatch",
-                int,
-                optional=True,
-                default=10,
-                doc=doc_data_stat_nbatch,
-            ),
-            Argument(
-                "data_stat_protect",
-                float,
-                optional=True,
-                default=1e-2,
-                doc=doc_data_stat_protect,
-            ),
-            Argument(
-                "data_bias_nsample",
-                int,
-                optional=True,
-                default=10,
-                doc=doc_data_bias_nsample,
-            ),
-            Argument("use_srtab", str, optional=True, doc=doc_use_srtab),
-            Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha),
-            Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin),
-            Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax),
-            Argument(
-                "srtab_add_bias",
-                bool,
-                optional=True,
-                default=True,
-                doc=doc_srtab_add_bias,
-            ),
-            Argument(
-                "type_embedding",
-                dict,
-                type_embedding_args(),
-                [],
-                optional=True,
-                doc=doc_type_embedding,
-            ),
-            Argument(
-                "modifier",
-                dict,
-                [],
-                [modifier_variant_type_args()],
-                optional=True,
-                doc=doc_modifier,
-            ),
-            Argument(
-                "compress",
-                dict,
-                [],
-                [model_compression_type_args()],
-                optional=True,
-                doc=doc_compress_config,
-                fold_subdoc=True,
-            ),
-            Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin),
-        ],
-        [
-            Variant(
-                "type",
-                [
-                    standard_model_args(),
-                    multi_model_args(),
-                    frozen_model_args(),
-                    pairtab_model_args(),
-                    *hybrid_models,
-                ],
-                optional=True,
-                default_tag="standard",
-            ),
-        ],
-    )
-
-
-def standard_model_args() -> Argument:
-    doc_descrpt = "The descriptor of atomic environment."
-    doc_fitting = "The fitting of physical properties."
-
-    ca = Argument(
-        "standard",
-        dict,
-        [
-            Argument(
-                "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt
-            ),
-            Argument(
-                "fitting_net",
-                dict,
-                [],
-                [fitting_variant_type_args()],
-                doc=doc_fitting,
-            ),
-        ],
-        doc="Stardard model, which contains a descriptor and a fitting.",
-    )
-    return ca
-
-
-def multi_model_args() -> Argument:
-    doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details."
-    doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`."
-
-    ca = Argument(
-        "multi",
-        dict,
-        [
-            Argument(
-                "descriptor",
-                dict,
-                [],
-                [descrpt_variant_type_args()],
-                doc=doc_descrpt,
-                fold_subdoc=True,
-            ),
-            Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict),
-        ],
-        doc="Multiple-task model.",
-    )
-    return ca
-
-
-def pairwise_dprc() -> Argument:
-    qm_model_args = model_args(exclude_hybrid=True)
-    qm_model_args.name = "qm_model"
-    qm_model_args.fold_subdoc = True
-    qmmm_model_args = model_args(exclude_hybrid=True)
-    qmmm_model_args.name = "qmmm_model"
-    qmmm_model_args.fold_subdoc = True
-    ca = Argument(
-        "pairwise_dprc",
-        dict,
-        [
-            qm_model_args,
-            qmmm_model_args,
-        ],
-    )
-    return ca
-
-
-def frozen_model_args() -> Argument:
-    doc_model_file = "Path to the frozen model file."
-    ca = Argument(
-        "frozen",
-        dict,
-        [
-            Argument("model_file", str, optional=False, doc=doc_model_file),
-        ],
-    )
-    return ca
-
-
-def pairtab_model_args() -> Argument:
-    doc_tab_file = "Path to the tabulation file."
-    doc_rcut = "The cut-off radius."
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
-    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    ca = Argument(
-        "pairtab",
-        dict,
-        [
-            Argument("tab_file", str, optional=False, doc=doc_tab_file),
-            Argument("rcut", float, optional=False, doc=doc_rcut),
-            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
-        ],
-        doc="Pairwise tabulation energy model.",
-    )
-    return ca
-
-
-def linear_ener_model_args() -> Argument:
-    doc_weights = (
-        "If the type is list of float, a list of weights for each model. "
-        'If "mean", the weights are set to be 1 / len(models). '
-        'If "sum", the weights are set to be 1.'
-    )
-    models_args = model_args(exclude_hybrid=True)
-    models_args.name = "models"
-    models_args.fold_subdoc = True
-    models_args.set_dtype(list)
-    models_args.set_repeat(True)
-    models_args.doc = "The sub-models."
-    ca = Argument(
-        "linear_ener",
-        dict,
-        [
-            models_args,
-            Argument(
-                "weights",
-                [list, str],
-                optional=False,
-                doc=doc_weights,
-            ),
-        ],
-    )
-    return ca
-
-
-#  --- Learning rate configurations: --- #
-def learning_rate_exp():
-    doc_start_lr = "The learning rate at the start of the training."
-    doc_stop_lr = "The desired learning rate at the end of the training."
-    doc_decay_steps = (
-        "The learning rate is decaying every this number of training steps."
-    )
-
-    args = [
-        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
-        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
-        Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
-    ]
-    return args
-
-
-def learning_rate_variant_type_args():
-    doc_lr = "The type of the learning rate."
-
-    return Variant(
-        "type",
-        [Argument("exp", dict, learning_rate_exp())],
-        optional=True,
-        default_tag="exp",
-        doc=doc_lr,
-    )
-
-
-def learning_rate_args():
-    doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
-    doc_lr = "The definitio of learning rate"
-    return Argument(
-        "learning_rate",
-        dict,
-        [
-            Argument(
-                "scale_by_worker",
-                str,
-                optional=True,
-                default="linear",
-                doc=doc_scale_by_worker,
-            )
-        ],
-        [learning_rate_variant_type_args()],
-        optional=True,
-        doc=doc_lr,
-    )
-
-
-def learning_rate_dict_args():
-    doc_learning_rate_dict = (
-        "The dictionary of definitions of learning rates in multi-task mode. "
-        "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n"
-    )
-    ca = Argument(
-        "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict
-    )
-    return ca
-
-
-#  --- Loss configurations: --- #
-def start_pref(item, label=None, abbr=None):
-    if label is None:
-        label = item
-    if abbr is None:
-        abbr = item
-    return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored."
-
-
-def limit_pref(item):
-    return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity."
-
-
-loss_args_plugin = ArgsPlugin()
-
-
-@loss_args_plugin.register("ener")
-def loss_ener():
-    doc_start_pref_e = start_pref("energy", abbr="e")
-    doc_limit_pref_e = limit_pref("energy")
-    doc_start_pref_f = start_pref("force", abbr="f")
-    doc_limit_pref_f = limit_pref("force")
-    doc_start_pref_v = start_pref("virial", abbr="v")
-    doc_limit_pref_v = limit_pref("virial")
-    doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae")
-    doc_limit_pref_ae = limit_pref("atomic energy")
-    doc_start_pref_pf = start_pref(
-        "atomic prefactor force", label="atom_pref", abbr="pf"
-    )
-    doc_limit_pref_pf = limit_pref("atomic prefactor force")
-    doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf")
-    doc_limit_pref_gf = limit_pref("generalized force")
-    doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used."
-    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
-    doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
-    return [
-        Argument(
-            "start_pref_e",
-            [float, int],
-            optional=True,
-            default=0.02,
-            doc=doc_start_pref_e,
-        ),
-        Argument(
-            "limit_pref_e",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_e,
-        ),
-        Argument(
-            "start_pref_f",
-            [float, int],
-            optional=True,
-            default=1000,
-            doc=doc_start_pref_f,
-        ),
-        Argument(
-            "limit_pref_f",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_f,
-        ),
-        Argument(
-            "start_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_v,
-        ),
-        Argument(
-            "limit_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_v,
-        ),
-        Argument(
-            "start_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_ae,
-        ),
-        Argument(
-            "limit_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_ae,
-        ),
-        Argument(
-            "start_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_pf,
-        ),
-        Argument(
-            "limit_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_pf,
-        ),
-        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
-        Argument(
-            "enable_atom_ener_coeff",
-            [bool],
-            optional=True,
-            default=False,
-            doc=doc_enable_atom_ener_coeff,
-        ),
-        Argument(
-            "start_pref_gf",
-            float,
-            optional=True,
-            default=0.0,
-            doc=doc_start_pref_gf,
-        ),
-        Argument(
-            "limit_pref_gf",
-            float,
-            optional=True,
-            default=0.0,
-            doc=doc_limit_pref_gf,
-        ),
-        Argument(
-            "numb_generalized_coord",
-            int,
-            optional=True,
-            default=0,
-            doc=doc_numb_generalized_coord,
-        ),
-    ]
-
-
-@loss_args_plugin.register("ener_spin")
-def loss_ener_spin():
-    doc_start_pref_e = start_pref("energy")
-    doc_limit_pref_e = limit_pref("energy")
-    doc_start_pref_fr = start_pref("force_real_atom")
-    doc_limit_pref_fr = limit_pref("force_real_atom")
-    doc_start_pref_fm = start_pref("force_magnetic")
-    doc_limit_pref_fm = limit_pref("force_magnetic")
-    doc_start_pref_v = start_pref("virial")
-    doc_limit_pref_v = limit_pref("virial")
-    doc_start_pref_ae = start_pref("atom_ener")
-    doc_limit_pref_ae = limit_pref("atom_ener")
-    doc_start_pref_pf = start_pref("atom_pref")
-    doc_limit_pref_pf = limit_pref("atom_pref")
-    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
-    doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
-    return [
-        Argument(
-            "start_pref_e",
-            [float, int],
-            optional=True,
-            default=0.02,
-            doc=doc_start_pref_e,
-        ),
-        Argument(
-            "limit_pref_e",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_e,
-        ),
-        Argument(
-            "start_pref_fr",
-            [float, int],
-            optional=True,
-            default=1000,
-            doc=doc_start_pref_fr,
-        ),
-        Argument(
-            "limit_pref_fr",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_fr,
-        ),
-        Argument(
-            "start_pref_fm",
-            [float, int],
-            optional=True,
-            default=10000,
-            doc=doc_start_pref_fm,
-        ),
-        Argument(
-            "limit_pref_fm",
-            [float, int],
-            optional=True,
-            default=10.0,
-            doc=doc_limit_pref_fm,
-        ),
-        Argument(
-            "start_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_v,
-        ),
-        Argument(
-            "limit_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_v,
-        ),
-        Argument(
-            "start_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_ae,
-        ),
-        Argument(
-            "limit_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_ae,
-        ),
-        Argument(
-            "start_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_pf,
-        ),
-        Argument(
-            "limit_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_pf,
-        ),
-        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
-        Argument(
-            "enable_atom_ener_coeff",
-            [bool],
-            optional=True,
-            default=False,
-            doc=doc_enable_atom_ener_coeff,
-        ),
-    ]
-
-
-@loss_args_plugin.register("dos")
-def loss_dos():
-    doc_start_pref_dos = start_pref("Density of State (DOS)")
-    doc_limit_pref_dos = limit_pref("Density of State (DOS)")
-    doc_start_pref_cdf = start_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
-    )
-    doc_limit_pref_cdf = limit_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
-    )
-    doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)")
-    doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)")
-    doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS")
-    doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS")
-    return [
-        Argument(
-            "start_pref_dos",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_dos,
-        ),
-        Argument(
-            "limit_pref_dos",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_dos,
-        ),
-        Argument(
-            "start_pref_cdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_cdf,
-        ),
-        Argument(
-            "limit_pref_cdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_cdf,
-        ),
-        Argument(
-            "start_pref_ados",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_start_pref_ados,
-        ),
-        Argument(
-            "limit_pref_ados",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_ados,
-        ),
-        Argument(
-            "start_pref_acdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_acdf,
-        ),
-        Argument(
-            "limit_pref_acdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_acdf,
-        ),
-    ]
-
-
-# YWolfeee: Modified to support tensor type of loss args.
-@loss_args_plugin.register("tensor")
-def loss_tensor():
-    # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]."
-    # doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well."
-    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included."
-    doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0."
-    return [
-        Argument(
-            "pref", [float, int], optional=False, default=None, doc=doc_global_weight
-        ),
-        Argument(
-            "pref_atomic",
-            [float, int],
-            optional=False,
-            default=None,
-            doc=doc_local_weight,
-        ),
-    ]
-
-
-def loss_variant_type_args():
-    doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`."
-
-    return Variant(
-        "type",
-        loss_args_plugin.get_all_argument(),
-        optional=True,
-        default_tag="ener",
-        doc=doc_loss,
-    )
-
-
-def loss_args():
-    doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset."
-    ca = Argument(
-        "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss
-    )
-    return ca
-
-
-def loss_dict_args():
-    doc_loss_dict = (
-        "The dictionary of definitions of multiple loss functions in multi-task mode. "
-        "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n"
-    )
-    ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict)
-    return ca
-
-
-#  --- Training configurations: --- #
-def training_data_args():  # ! added by Ziyao: new specification style for data systems.
-    link_sys = make_link("systems", "training/training_data/systems")
-    doc_systems = (
-        "The data systems for training. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
-    )
-    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
-    doc_batch_size = f'This key can be \n\n\
-- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
-- int: all {link_sys} use the same batch size.\n\n\
-- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
-- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\
-- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\
-If MPI is used, the value should be considered as the batch size per task.'
-    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
-- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
-- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = (
-        "A list of float if specified. "
-        "Should be of the same length as `systems`, "
-        "specifying the probability of each system."
-    )
-
-    args = [
-        Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
-        ),
-        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
-        Argument(
-            "batch_size",
-            [List[int], int, str],
-            optional=True,
-            default="auto",
-            doc=doc_batch_size,
-        ),
-        Argument(
-            "auto_prob",
-            str,
-            optional=True,
-            default="prob_sys_size",
-            doc=doc_auto_prob_style,
-            alias=[
-                "auto_prob_style",
-            ],
-        ),
-        Argument(
-            "sys_probs",
-            List[float],
-            optional=True,
-            default=None,
-            doc=doc_sys_probs,
-            alias=["sys_weights"],
-        ),
-    ]
-
-    doc_training_data = "Configurations of training data."
-    return Argument(
-        "training_data",
-        dict,
-        optional=True,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_training_data,
-    )
-
-
-def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
-    link_sys = make_link("systems", "training/validation_data/systems")
-    doc_systems = (
-        "The data systems for validation. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
-    )
-    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
-    doc_batch_size = f'This key can be \n\n\
-- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
-- int: all {link_sys} use the same batch size.\n\n\
-- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
-- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
-    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
-- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
-- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = (
-        "A list of float if specified. "
-        "Should be of the same length as `systems`, "
-        "specifying the probability of each system."
-    )
-    doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
-
-    args = [
-        Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
-        ),
-        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
-        Argument(
-            "batch_size",
-            [List[int], int, str],
-            optional=True,
-            default="auto",
-            doc=doc_batch_size,
-        ),
-        Argument(
-            "auto_prob",
-            str,
-            optional=True,
-            default="prob_sys_size",
-            doc=doc_auto_prob_style,
-            alias=[
-                "auto_prob_style",
-            ],
-        ),
-        Argument(
-            "sys_probs",
-            List[float],
-            optional=True,
-            default=None,
-            doc=doc_sys_probs,
-            alias=["sys_weights"],
-        ),
-        Argument(
-            "numb_btch",
-            int,
-            optional=True,
-            default=1,
-            doc=doc_numb_btch,
-            alias=[
-                "numb_batch",
-            ],
-        ),
-    ]
-
-    doc_validation_data = (
-        "Configurations of validation data. Similar to that of training data, "
-        "except that a `numb_btch` argument may be configured"
-    )
-    return Argument(
-        "validation_data",
-        dict,
-        optional=True,
-        default=None,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_validation_data,
-    )
-
-
-def mixed_precision_args():  # ! added by Denghui.
-    doc_output_prec = 'The precision for mixed precision params. " \
-        "The trainable variables precision during the mixed precision training process, " \
-        "supported options are float32 only currently.'
-    doc_compute_prec = 'The precision for mixed precision compute. " \
-        "The compute precision during the mixed precision training process, "" \
-        "supported options are float16 and bfloat16 currently.'
-
-    args = [
-        Argument(
-            "output_prec", str, optional=True, default="float32", doc=doc_output_prec
-        ),
-        Argument(
-            "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec
-        ),
-    ]
-
-    doc_mixed_precision = "Configurations of mixed precision."
-    return Argument(
-        "mixed_precision",
-        dict,
-        optional=True,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_mixed_precision,
-    )
-
-
-def training_args():  # ! modified by Ziyao: data configuration isolated.
-    doc_numb_steps = "Number of training batch. Each training uses one batch of data."
-    doc_seed = "The random seed for getting frames from the training data set."
-    doc_disp_file = "The file for printing learning curve."
-    doc_disp_freq = "The frequency of printing learning curve."
-    doc_save_freq = "The frequency of saving check point."
-    doc_save_ckpt = "The path prefix of saving check point files."
-    doc_disp_training = "Displaying verbose information during training."
-    doc_time_training = "Timing durining training."
-    doc_profiling = "Profiling during training."
-    doc_profiling_file = "Output file for profiling."
-    doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`."
-    doc_tensorboard = "Enable tensorboard"
-    doc_tensorboard_log_dir = "The log directory of tensorboard outputs"
-    doc_tensorboard_freq = "The frequency of writing tensorboard events."
-    doc_data_dict = (
-        "The dictionary of multi DataSystems in multi-task mode. "
-        "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
-        "contains training data and optional validation data definitions."
-    )
-    doc_fitting_weight = (
-        "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
-        "is the training weight of fitting net `fitting_key`. "
-        "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. "
-        "Weights will be normalized and minus ones will be ignored. "
-        "If not set, each fitting net will be equally selected when training."
-    )
-
-    arg_training_data = training_data_args()
-    arg_validation_data = validation_data_args()
-    mixed_precision_data = mixed_precision_args()
-
-    args = [
-        arg_training_data,
-        arg_validation_data,
-        mixed_precision_data,
-        Argument(
-            "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file
-        ),
-        Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
-        Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
-        Argument(
-            "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt
-        ),
-        Argument(
-            "disp_training", bool, optional=True, default=True, doc=doc_disp_training
-        ),
-        Argument(
-            "time_training", bool, optional=True, default=True, doc=doc_time_training
-        ),
-        Argument("profiling", bool, optional=True, default=False, doc=doc_profiling),
-        Argument(
-            "profiling_file",
-            str,
-            optional=True,
-            default="timeline.json",
-            doc=doc_profiling_file,
-        ),
-        Argument(
-            "enable_profiler",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_enable_profiler,
-        ),
-        Argument(
-            "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard
-        ),
-        Argument(
-            "tensorboard_log_dir",
-            str,
-            optional=True,
-            default="log",
-            doc=doc_tensorboard_log_dir,
-        ),
-        Argument(
-            "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq
-        ),
-        Argument("data_dict", dict, optional=True, doc=doc_data_dict),
-        Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight),
-    ]
-
-    doc_training = "The training options."
-    return Argument("training", dict, args, [], doc=doc_training)
-
-
-def make_index(keys):
-    ret = []
-    for ii in keys:
-        ret.append(make_link(ii, ii))
-    return ", ".join(ret)
-
-
-def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
-    if make_link:
-        make_anchor = True
-    ptr = []
-    for ii in gen_args():
-        ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
-
-    key_words = []
-    for ii in "\n\n".join(ptr).split("\n"):
-        if "argument path" in ii:
-            key_words.append(ii.split(":")[1].replace("`", "").strip())
-    # ptr.insert(0, make_index(key_words))
-
-    return "\n\n".join(ptr)
-
-
-def gen_json(**kwargs):
-    return json.dumps(
-        tuple(gen_args()),
-        cls=ArgumentEncoder,
-    )
-
-
-def gen_args(**kwargs) -> List[Argument]:
-    return [
-        model_args(),
-        learning_rate_args(),
-        learning_rate_dict_args(),
-        loss_args(),
-        loss_dict_args(),
-        training_args(),
-        nvnmd_args(),
-    ]
-
-
-def normalize_multi_task(data):
-    # single-task or multi-task mode
-    if data["model"].get("type", "standard") not in ("standard", "multi"):
-        return data
-    single_fitting_net = "fitting_net" in data["model"].keys()
-    single_training_data = "training_data" in data["training"].keys()
-    single_valid_data = "validation_data" in data["training"].keys()
-    single_loss = "loss" in data.keys()
-    single_learning_rate = "learning_rate" in data.keys()
-    multi_fitting_net = "fitting_net_dict" in data["model"].keys()
-    multi_training_data = "data_dict" in data["training"].keys()
-    multi_loss = "loss_dict" in data.keys()
-    multi_fitting_weight = "fitting_weight" in data["training"].keys()
-    multi_learning_rate = "learning_rate_dict" in data.keys()
-    assert (single_fitting_net == single_training_data) and (
-        multi_fitting_net == multi_training_data
-    ), (
-        "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! "
-        "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' "
-        "must be defined at the same time! Please check your input script. "
-    )
-    assert not (single_fitting_net and multi_fitting_net), (
-        "Single-task mode and multi-task mode can not be performed together. "
-        "Please check your input script and choose just one format! "
-    )
-    assert (
-        single_fitting_net or multi_fitting_net
-    ), "Please define your fitting net and training data! "
-    if multi_fitting_net:
-        assert not single_valid_data, (
-            "In multi-task mode, 'training/validation_data' should not appear "
-            "outside 'training/data_dict'! Please check your input script."
-        )
-        assert (
-            not single_loss
-        ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
-        assert (
-            "type_map" in data["model"]
-        ), "In multi-task mode, 'model/type_map' must be defined! "
-        data["model"]["type"] = "multi"
-        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(
-            data["model"]["fitting_net_dict"]
-        )
-        data["training"]["data_dict"] = normalize_data_dict(
-            data["training"]["data_dict"]
-        )
-        data["loss_dict"] = (
-            normalize_loss_dict(
-                data["model"]["fitting_net_dict"].keys(), data["loss_dict"]
-            )
-            if multi_loss
-            else {}
-        )
-        if multi_learning_rate:
-            data["learning_rate_dict"] = normalize_learning_rate_dict(
-                data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"]
-            )
-        elif single_learning_rate:
-            data[
-                "learning_rate_dict"
-            ] = normalize_learning_rate_dict_with_single_learning_rate(
-                data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
-            )
-        fitting_weight = (
-            data["training"]["fitting_weight"] if multi_fitting_weight else None
-        )
-        data["training"]["fitting_weight"] = normalize_fitting_weight(
-            data["model"]["fitting_net_dict"].keys(),
-            data["training"]["data_dict"].keys(),
-            fitting_weight=fitting_weight,
-        )
-    else:
-        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
-        assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
-    return data
-
-
-def normalize_fitting_net_dict(fitting_net_dict):
-    new_dict = {}
-    base = Argument("base", dict, [], [fitting_variant_type_args()], doc="")
-    for fitting_key_item in fitting_net_dict:
-        data = base.normalize_value(
-            fitting_net_dict[fitting_key_item], trim_pattern="_*"
-        )
-        base.check_value(data, strict=True)
-        new_dict[fitting_key_item] = data
-    return new_dict
-
-
-def normalize_data_dict(data_dict):
-    new_dict = {}
-    base = Argument(
-        "base", dict, [training_data_args(), validation_data_args()], [], doc=""
-    )
-    for data_system_key_item in data_dict:
-        data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[data_system_key_item] = data
-    return new_dict
-
-
-def normalize_loss_dict(fitting_keys, loss_dict):
-    # check the loss dict
-    failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
-    assert (
-        not failed_loss_keys
-    ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_loss_keys), str(list(fitting_keys))
-    )
-    new_dict = {}
-    base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
-    for item in loss_dict:
-        data = base.normalize_value(loss_dict[item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[item] = data
-    return new_dict
-
-
-def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
-    # check the learning_rate dict
-    failed_learning_rate_keys = [
-        item for item in learning_rate_dict if item not in fitting_keys
-    ]
-    assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_learning_rate_keys), str(list(fitting_keys))
-    )
-    new_dict = {}
-    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
-    for item in learning_rate_dict:
-        data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[item] = data
-    return new_dict
-
-
-def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate):
-    new_dict = {}
-    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
-    data = base.normalize_value(learning_rate, trim_pattern="_*")
-    base.check_value(data, strict=True)
-    for fitting_key in fitting_keys:
-        new_dict[fitting_key] = data
-    return new_dict
-
-
-def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
-    # check the mapping
-    failed_data_keys = [item for item in data_keys if item not in fitting_keys]
-    assert (
-        not failed_data_keys
-    ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_data_keys), str(list(fitting_keys))
-    )
-    empty_fitting_keys = []
-    valid_fitting_keys = []
-    for item in fitting_keys:
-        if item not in data_keys:
-            empty_fitting_keys.append(item)
-        else:
-            valid_fitting_keys.append(item)
-    if empty_fitting_keys:
-        log.warning(
-            "Fitting net(s) {} have no data and will not be used in training.".format(
-                str(empty_fitting_keys)
-            )
-        )
-    num_pair = len(valid_fitting_keys)
-    assert num_pair > 0, "No valid training data systems for fitting nets!"
-
-    # check and normalize the fitting weight
-    new_weight = {}
-    if fitting_weight is None:
-        equal_weight = 1.0 / num_pair
-        for item in fitting_keys:
-            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0
-    else:
-        failed_weight_keys = [
-            item for item in fitting_weight if item not in fitting_keys
-        ]
-        assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
-            str(failed_weight_keys), str(list(fitting_keys))
-        )
-        sum_prob = 0.0
-        for item in fitting_keys:
-            if item in valid_fitting_keys:
-                if (
-                    item in fitting_weight
-                    and isinstance(fitting_weight[item], (int, float))
-                    and fitting_weight[item] > 0.0
-                ):
-                    sum_prob += fitting_weight[item]
-                    new_weight[item] = fitting_weight[item]
-                else:
-                    valid_fitting_keys.remove(item)
-                    log.warning(
-                        f"Fitting net '{item}' has zero or invalid weight "
-                        "and will not be used in training."
-                    )
-                    new_weight[item] = 0.0
-            else:
-                new_weight[item] = 0.0
-        assert sum_prob > 0.0, "No valid training weight for fitting nets!"
-        # normalize
-        for item in new_weight:
-            new_weight[item] /= sum_prob
-    return new_weight
-
-
-def normalize(data):
-    data = normalize_multi_task(data)
-
-    base = Argument("base", dict, gen_args())
-    data = base.normalize_value(data, trim_pattern="_*")
-    base.check_value(data, strict=True)
-
-    return data
-
-
-if __name__ == "__main__":
-    gen_doc()
+__all__ = [
+    "list_to_doc",
+    "normalize",
+    "gen_doc",
+    "gen_json",
+    "gen_args",
+    "type_embedding_args",
+]
diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
index 5f9c14e6d8..91bf4021ee 100644
--- a/deepmd/utils/compat.py
+++ b/deepmd/utils/compat.py
@@ -1,392 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module providing compatibility between `0.x.x` and `1.x.x` input versions."""
-
-import json
-import warnings
-from pathlib import (
-    Path,
-)
-from typing import (
-    Any,
-    Dict,
-    Optional,
-    Sequence,
-    Union,
-)
-
-import numpy as np
-
-from deepmd.common import (
-    j_must_have,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.compat import (
+    convert_input_v0_v1,
+    convert_input_v1_v2,
+    deprecate_numb_test,
+    update_deepmd_input,
 )
 
-
-def convert_input_v0_v1(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    """Convert input from v0 format to v1.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        loaded json/yaml file
-    warning : bool, optional
-        whether to show deprecation warning, by default True
-    dump : Optional[Union[str, Path]], optional
-        whether to dump converted file, by default None
-
-    Returns
-    -------
-    Dict[str, Any]
-        converted output
-    """
-    output = {}
-    output["model"] = _model(jdata, jdata["use_smooth"])
-    output["learning_rate"] = _learning_rate(jdata)
-    output["loss"] = _loss(jdata)
-    output["training"] = _training(jdata)
-    if warning:
-        _warning_input_v0_v1(dump)
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(output, fp, indent=4)
-    return output
-
-
-def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
-    msg = (
-        "It seems that you are using a deepmd-kit input of version 0.x.x, "
-        "which is deprecated. we have converted the input to >2.0.0 compatible"
-    )
-    if fname is not None:
-        msg += f", and output it to file {fname}"
-    warnings.warn(msg)
-
-
-def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
-    """Convert data to v1 input for non-smooth model.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-    smooth : bool
-        whether to use smooth or non-smooth descriptor version
-
-    Returns
-    -------
-    Dict[str, Dict[str, Any]]
-        dictionary with model input parameters and sub-dictionaries for descriptor and
-        fitting net
-    """
-    model = {}
-    model["descriptor"] = (
-        _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata)
-    )
-    model["fitting_net"] = _fitting_net(jdata)
-    return model
-
-
-def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for non-smooth descriptor.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with descriptor parameters
-    """
-    descriptor = {}
-    descriptor["type"] = "loc_frame"
-    _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule"))
-    return descriptor
-
-
-def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for smooth descriptor.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with descriptor parameters
-    """
-    descriptor = {}
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        descriptor["seed"] = seed
-    descriptor["type"] = "se_a"
-    descriptor["sel"] = jdata["sel_a"]
-    _jcopy(jdata, descriptor, ("rcut",))
-    descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
-    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
-    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
-    descriptor["resnet_dt"] = False
-    if "resnet_dt" in jdata:
-        descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
-
-    return descriptor
-
-
-def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for fitting net.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with fitting net parameters
-    """
-    fitting_net = {}
-
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        fitting_net["seed"] = seed
-    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
-    fitting_net["resnet_dt"] = True
-    if "resnet_dt" in jdata:
-        fitting_net["resnet_dt"] = jdata["resnet_dt"]
-    if "fitting_resnet_dt" in jdata:
-        fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"]
-    return fitting_net
-
-
-def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for learning rate section.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with learning rate parameters
-    """
-    learning_rate = {}
-    learning_rate["type"] = "exp"
-    _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr"))
-    return learning_rate
-
-
-def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for loss function.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with loss function parameters
-    """
-    loss: Dict[str, Any] = {}
-    _jcopy(
-        jdata,
-        loss,
-        (
-            "start_pref_e",
-            "limit_pref_e",
-            "start_pref_f",
-            "limit_pref_f",
-            "start_pref_v",
-            "limit_pref_v",
-        ),
-    )
-    if "start_pref_ae" in jdata:
-        loss["start_pref_ae"] = jdata["start_pref_ae"]
-    if "limit_pref_ae" in jdata:
-        loss["limit_pref_ae"] = jdata["limit_pref_ae"]
-    return loss
-
-
-def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for training.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with training parameters
-    """
-    training = {}
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        training["seed"] = seed
-
-    _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size"))
-    training["disp_file"] = "lcurve.out"
-    if "disp_file" in jdata:
-        training["disp_file"] = jdata["disp_file"]
-    training["disp_freq"] = j_must_have(jdata, "disp_freq")
-    training["numb_test"] = j_must_have(jdata, "numb_test")
-    training["save_freq"] = j_must_have(jdata, "save_freq")
-    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
-    training["disp_training"] = j_must_have(jdata, "disp_training")
-    training["time_training"] = j_must_have(jdata, "time_training")
-    if "profiling" in jdata:
-        training["profiling"] = jdata["profiling"]
-        if training["profiling"]:
-            training["profiling_file"] = j_must_have(jdata, "profiling_file")
-    return training
-
-
-def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
-    """Copy specified keys from one dict to another.
-
-    Parameters
-    ----------
-    src : Dict[str, Any]
-        source dictionary
-    dst : Dict[str, Any]
-        destination dictionary, will be modified in place
-    keys : Sequence[str]
-        list of keys to copy
-    """
-    for k in keys:
-        dst[k] = src[k]
-
-
-def remove_decay_rate(jdata: Dict[str, Any]):
-    """Convert decay_rate to stop_lr.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        input data
-    """
-    lr = jdata["learning_rate"]
-    if "decay_rate" in lr:
-        decay_rate = lr["decay_rate"]
-        start_lr = lr["start_lr"]
-        stop_step = jdata["training"]["stop_batch"]
-        decay_steps = lr["decay_steps"]
-        stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr
-        lr["stop_lr"] = stop_lr
-        lr.pop("decay_rate")
-
-
-def convert_input_v1_v2(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    tr_cfg = jdata["training"]
-    tr_data_keys = {
-        "systems",
-        "set_prefix",
-        "batch_size",
-        "sys_prob",
-        "auto_prob",
-        # alias included
-        "sys_weights",
-        "auto_prob_style",
-    }
-
-    tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys}
-    new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys}
-    new_tr_cfg["training_data"] = tr_data_cfg
-    if "training_data" in tr_cfg:
-        raise RuntimeError(
-            "Both v1 (training/systems) and v2 (training/training_data) parameters are given."
-        )
-
-    jdata["training"] = new_tr_cfg
-
-    # remove deprecated arguments
-    remove_decay_rate(jdata)
-
-    if warning:
-        _warning_input_v1_v2(dump)
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(jdata, fp, indent=4)
-
-    return jdata
-
-
-def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
-    msg = (
-        "It seems that you are using a deepmd-kit input of version 1.x.x, "
-        "which is deprecated. we have converted the input to >2.0.0 compatible"
-    )
-    if fname is not None:
-        msg += f", and output it to file {fname}"
-    warnings.warn(msg)
-
-
-def deprecate_numb_test(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
-
-    See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        loaded json/yaml file
-    warning : bool, optional
-        whether to show deprecation warning, by default True
-    dump : Optional[Union[str, Path]], optional
-        whether to dump converted file, by default None
-
-    Returns
-    -------
-    Dict[str, Any]
-        converted output
-    """
-    try:
-        jdata.get("training", {}).pop("numb_test")
-    except KeyError:
-        pass
-    else:
-        if warning:
-            warnings.warn(
-                "The argument training->numb_test has been deprecated since v2.0.0. "
-                "Use training->validation_data->batch_size instead."
-            )
-
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(jdata, fp, indent=4)
-    return jdata
-
-
-def update_deepmd_input(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    def is_deepmd_v0_input(jdata):
-        return "model" not in jdata.keys()
-
-    def is_deepmd_v1_input(jdata):
-        return "systems" in j_must_have(jdata, "training").keys()
-
-    if is_deepmd_v0_input(jdata):
-        jdata = convert_input_v0_v1(jdata, warning, None)
-        jdata = convert_input_v1_v2(jdata, False, None)
-        jdata = deprecate_numb_test(jdata, False, dump)
-    elif is_deepmd_v1_input(jdata):
-        jdata = convert_input_v1_v2(jdata, warning, None)
-        jdata = deprecate_numb_test(jdata, False, dump)
-    else:
-        jdata = deprecate_numb_test(jdata, warning, dump)
-
-    return jdata
+__all__ = [
+    "convert_input_v0_v1",
+    "convert_input_v1_v2",
+    "deprecate_numb_test",
+    "update_deepmd_input",
+]
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 423745cddf..a6f888beac 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -1,614 +1,9 @@
-#!/usr/bin/env python3
-
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
-from typing import (
-    List,
-    Optional,
-)
-
-import numpy as np
-
-from deepmd.env import (
-    GLOBAL_ENER_FLOAT_PRECISION,
-    GLOBAL_NP_FLOAT_PRECISION,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.data import (
+    DeepmdData,
 )
-from deepmd.utils import random as dp_random
-from deepmd.utils.path import (
-    DPPath,
-)
-
-log = logging.getLogger(__name__)
-
-
-class DeepmdData:
-    """Class for a data system.
-
-    It loads data from hard disk, and mantains the data as a `data_dict`
-
-    Parameters
-    ----------
-    sys_path
-            Path to the data system
-    set_prefix
-            Prefix for the directories of different sets
-    shuffle_test
-            If the test data are shuffled
-    type_map
-            Gives the name of different atom types
-    optional_type_map
-            If the type_map.raw in each system is optional
-    modifier
-            Data modifier that has the method `modify_data`
-    trn_all_set
-            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
-    sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
-            descriptors except mixed types.
-    """
-
-    def __init__(
-        self,
-        sys_path: str,
-        set_prefix: str = "set",
-        shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
-        optional_type_map: bool = True,
-        modifier=None,
-        trn_all_set: bool = False,
-        sort_atoms: bool = True,
-    ):
-        """Constructor."""
-        root = DPPath(sys_path)
-        self.dirs = root.glob(set_prefix + ".*")
-        if not len(self.dirs):
-            raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}")
-        self.dirs.sort()
-        # check mix_type format
-        error_format_msg = (
-            "if one of the set is of mixed_type format, "
-            "then all of the sets in this system should be of mixed_type format!"
-        )
-        self.mixed_type = self._check_mode(self.dirs[0])
-        for set_item in self.dirs[1:]:
-            assert self._check_mode(set_item) == self.mixed_type, error_format_msg
-        # load atom type
-        self.atom_type = self._load_type(root)
-        self.natoms = len(self.atom_type)
-        # load atom type map
-        self.type_map = self._load_type_map(root)
-        assert (
-            optional_type_map or self.type_map is not None
-        ), f"System {sys_path} must have type_map.raw in this mode! "
-        if self.type_map is not None:
-            assert len(self.type_map) >= max(self.atom_type) + 1
-        # check pbc
-        self.pbc = self._check_pbc(root)
-        # enforce type_map if necessary
-        self.enforce_type_map = False
-        if type_map is not None and self.type_map is not None and len(type_map):
-            if not self.mixed_type:
-                atom_type_ = [
-                    type_map.index(self.type_map[ii]) for ii in self.atom_type
-                ]
-                self.atom_type = np.array(atom_type_, dtype=np.int32)
-            else:
-                self.enforce_type_map = True
-                sorter = np.argsort(type_map)
-                self.type_idx_map = np.array(
-                    sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
-                )
-                # padding for virtual atom
-                self.type_idx_map = np.append(
-                    self.type_idx_map, np.array([-1], dtype=np.int32)
-                )
-            self.type_map = type_map
-        if type_map is None and self.type_map is None and self.mixed_type:
-            raise RuntimeError("mixed_type format must have type_map!")
-        # make idx map
-        self.sort_atoms = sort_atoms
-        self.idx_map = self._make_idx_map(self.atom_type)
-        # train dirs
-        self.test_dir = self.dirs[-1]
-        if trn_all_set:
-            self.train_dirs = self.dirs
-        else:
-            if len(self.dirs) == 1:
-                self.train_dirs = self.dirs
-            else:
-                self.train_dirs = self.dirs[:-1]
-        self.data_dict = {}
-        # add box and coord
-        self.add("box", 9, must=self.pbc)
-        self.add("coord", 3, atomic=True, must=True)
-        # the training times of each frame
-        self.add("numb_copy", 1, must=False, default=1, dtype=int)
-        # set counters
-        self.set_count = 0
-        self.iterator = 0
-        self.shuffle_test = shuffle_test
-        # set modifier
-        self.modifier = modifier
-
-    def add(
-        self,
-        key: str,
-        ndof: int,
-        atomic: bool = False,
-        must: bool = False,
-        high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
-        repeat: int = 1,
-        default: float = 0.0,
-        dtype: Optional[np.dtype] = None,
-    ):
-        """Add a data item that to be loaded.
-
-        Parameters
-        ----------
-        key
-            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
-        ndof
-            The number of dof
-        atomic
-            The item is an atomic property.
-            If False, the size of the data should be nframes x ndof
-            If True, the size of data should be nframes x natoms x ndof
-        must
-            The data file `sys_path/set.*/key.npy` must exist.
-            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
-        high_prec
-            Load the data and store in float64, otherwise in float32
-        type_sel
-            Select certain type of atoms
-        repeat
-            The data will be repeated `repeat` times.
-        default : float, default=0.
-            default value of data
-        dtype : np.dtype, optional
-            the dtype of data, overwrites `high_prec` if provided
-        """
-        self.data_dict[key] = {
-            "ndof": ndof,
-            "atomic": atomic,
-            "must": must,
-            "high_prec": high_prec,
-            "type_sel": type_sel,
-            "repeat": repeat,
-            "reduce": None,
-            "default": default,
-            "dtype": dtype,
-        }
-        return self
-
-    def reduce(self, key_out: str, key_in: str):
-        """Generate a new item from the reduction of another atom.
-
-        Parameters
-        ----------
-        key_out
-            The name of the reduced item
-        key_in
-            The name of the data item to be reduced
-        """
-        assert key_in in self.data_dict, "cannot find input key"
-        assert self.data_dict[key_in]["atomic"], "reduced property should be atomic"
-        assert key_out not in self.data_dict, "output key should not have been added"
-        assert (
-            self.data_dict[key_in]["repeat"] == 1
-        ), "reduced proerties should not have been repeated"
-
-        self.data_dict[key_out] = {
-            "ndof": self.data_dict[key_in]["ndof"],
-            "atomic": False,
-            "must": True,
-            "high_prec": True,
-            "type_sel": None,
-            "repeat": 1,
-            "reduce": key_in,
-        }
-        return self
-
-    def get_data_dict(self) -> dict:
-        """Get the `data_dict`."""
-        return self.data_dict
-
-    def check_batch_size(self, batch_size):
-        """Check if the system can get a batch of data with `batch_size` frames."""
-        for ii in self.train_dirs:
-            if self.data_dict["coord"]["high_prec"]:
-                tmpe = (
-                    (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
-                )
-            else:
-                tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
-            if tmpe.ndim == 1:
-                tmpe = tmpe.reshape([1, -1])
-            if tmpe.shape[0] < batch_size:
-                return ii, tmpe.shape[0]
-        return None
-
-    def check_test_size(self, test_size):
-        """Check if the system can get a test dataset with `test_size` frames."""
-        if self.data_dict["coord"]["high_prec"]:
-            tmpe = (
-                (self.test_dir / "coord.npy")
-                .load_numpy()
-                .astype(GLOBAL_ENER_FLOAT_PRECISION)
-            )
-        else:
-            tmpe = (
-                (self.test_dir / "coord.npy")
-                .load_numpy()
-                .astype(GLOBAL_NP_FLOAT_PRECISION)
-            )
-        if tmpe.ndim == 1:
-            tmpe = tmpe.reshape([1, -1])
-        if tmpe.shape[0] < test_size:
-            return self.test_dir, tmpe.shape[0]
-        else:
-            return None
-
-    def get_batch(self, batch_size: int) -> dict:
-        """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
-
-        Parameters
-        ----------
-        batch_size
-            size of the batch
-        """
-        if hasattr(self, "batch_set"):
-            set_size = self.batch_set["coord"].shape[0]
-        else:
-            set_size = 0
-        if self.iterator + batch_size > set_size:
-            self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
-            self.set_count += 1
-            set_size = self.batch_set["coord"].shape[0]
-        iterator_1 = self.iterator + batch_size
-        if iterator_1 >= set_size:
-            iterator_1 = set_size
-        idx = np.arange(self.iterator, iterator_1)
-        self.iterator += batch_size
-        ret = self._get_subdata(self.batch_set, idx)
-        return ret
-
-    def get_test(self, ntests: int = -1) -> dict:
-        """Get the test data with `ntests` frames.
-
-        Parameters
-        ----------
-        ntests
-            Size of the test data set. If `ntests` is -1, all test data will be get.
-        """
-        if not hasattr(self, "test_set"):
-            self._load_test_set(self.test_dir, self.shuffle_test)
-        if ntests == -1:
-            idx = None
-        else:
-            ntests_ = (
-                ntests
-                if ntests < self.test_set["type"].shape[0]
-                else self.test_set["type"].shape[0]
-            )
-            # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
-            idx = np.arange(ntests_)
-        ret = self._get_subdata(self.test_set, idx=idx)
-        if self.modifier is not None:
-            self.modifier.modify_data(ret, self)
-        return ret
-
-    def get_ntypes(self) -> int:
-        """Number of atom types in the system."""
-        if self.type_map is not None:
-            return len(self.type_map)
-        else:
-            return max(self.get_atom_type()) + 1
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map."""
-        return self.type_map
-
-    def get_atom_type(self) -> List[int]:
-        """Get atom types."""
-        return self.atom_type
-
-    def get_numb_set(self) -> int:
-        """Get number of training sets."""
-        return len(self.train_dirs)
-
-    def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
-        """Get the number of batches in a set."""
-        data = self._load_set(self.train_dirs[set_idx])
-        ret = data["coord"].shape[0] // batch_size
-        if ret == 0:
-            ret = 1
-        return ret
-
-    def get_sys_numb_batch(self, batch_size: int) -> int:
-        """Get the number of batches in the data system."""
-        ret = 0
-        for ii in range(len(self.train_dirs)):
-            ret += self.get_numb_batch(batch_size, ii)
-        return ret
-
-    def get_natoms(self):
-        """Get number of atoms."""
-        return len(self.atom_type)
-
-    def get_natoms_vec(self, ntypes: int):
-        """Get number of atoms and number of atoms in different types.
-
-        Parameters
-        ----------
-        ntypes
-            Number of types (may be larger than the actual number of types in the system).
-
-        Returns
-        -------
-        natoms
-            natoms[0]: number of local atoms
-            natoms[1]: total number of atoms held by this processor
-            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
-        """
-        natoms, natoms_vec = self._get_natoms_2(ntypes)
-        tmp = [natoms, natoms]
-        tmp = np.append(tmp, natoms_vec)
-        return tmp.astype(np.int32)
-
-    def avg(self, key):
-        """Return the average value of an item."""
-        if key not in self.data_dict.keys():
-            raise RuntimeError("key %s has not been added" % key)
-        info = self.data_dict[key]
-        ndof = info["ndof"]
-        eners = []
-        for ii in self.train_dirs:
-            data = self._load_set(ii)
-            ei = data[key].reshape([-1, ndof])
-            eners.append(ei)
-        eners = np.concatenate(eners, axis=0)
-        if eners.size == 0:
-            return 0
-        else:
-            return np.average(eners, axis=0)
-
-    def _idx_map_sel(self, atom_type, type_sel):
-        new_types = []
-        for ii in atom_type:
-            if ii in type_sel:
-                new_types.append(ii)
-        new_types = np.array(new_types, dtype=int)
-        natoms = new_types.shape[0]
-        idx = np.arange(natoms)
-        idx_map = np.lexsort((idx, new_types))
-        return idx_map
-
-    def _get_natoms_2(self, ntypes):
-        sample_type = self.atom_type
-        natoms = len(sample_type)
-        natoms_vec = np.zeros(ntypes).astype(int)
-        for ii in range(ntypes):
-            natoms_vec[ii] = np.count_nonzero(sample_type == ii)
-        return natoms, natoms_vec
-
-    def _get_subdata(self, data, idx=None):
-        new_data = {}
-        for ii in data:
-            dd = data[ii]
-            if "find_" in ii:
-                new_data[ii] = dd
-            else:
-                if idx is not None:
-                    new_data[ii] = dd[idx]
-                else:
-                    new_data[ii] = dd
-        return new_data
-
-    def _load_batch_set(self, set_name: DPPath):
-        if not hasattr(self, "batch_set") or self.get_numb_set() > 1:
-            self.batch_set = self._load_set(set_name)
-            if self.modifier is not None:
-                self.modifier.modify_data(self.batch_set, self)
-        self.batch_set, _ = self._shuffle_data(self.batch_set)
-        self.reset_get_batch()
-
-    def reset_get_batch(self):
-        self.iterator = 0
-
-    def _load_test_set(self, set_name: DPPath, shuffle_test):
-        self.test_set = self._load_set(set_name)
-        if shuffle_test:
-            self.test_set, _ = self._shuffle_data(self.test_set)
-
-    def _shuffle_data(self, data):
-        ret = {}
-        nframes = data["coord"].shape[0]
-        idx = np.arange(nframes)
-        # the training times of each frame
-        idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,)))
-        dp_random.shuffle(idx)
-        for kk in data:
-            if (
-                type(data[kk]) == np.ndarray
-                and len(data[kk].shape) == 2
-                and data[kk].shape[0] == nframes
-                and "find_" not in kk
-            ):
-                ret[kk] = data[kk][idx]
-            else:
-                ret[kk] = data[kk]
-        return ret, idx
-
-    def _load_set(self, set_name: DPPath):
-        # get nframes
-        if not isinstance(set_name, DPPath):
-            set_name = DPPath(set_name)
-        path = set_name / "coord.npy"
-        if self.data_dict["coord"]["high_prec"]:
-            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
-        else:
-            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
-        if coord.ndim == 1:
-            coord = coord.reshape([1, -1])
-        nframes = coord.shape[0]
-        assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms
-        # load keys
-        data = {}
-        for kk in self.data_dict.keys():
-            if self.data_dict[kk]["reduce"] is None:
-                data["find_" + kk], data[kk] = self._load_data(
-                    set_name,
-                    kk,
-                    nframes,
-                    self.data_dict[kk]["ndof"],
-                    atomic=self.data_dict[kk]["atomic"],
-                    high_prec=self.data_dict[kk]["high_prec"],
-                    must=self.data_dict[kk]["must"],
-                    type_sel=self.data_dict[kk]["type_sel"],
-                    repeat=self.data_dict[kk]["repeat"],
-                    default=self.data_dict[kk]["default"],
-                    dtype=self.data_dict[kk]["dtype"],
-                )
-        for kk in self.data_dict.keys():
-            if self.data_dict[kk]["reduce"] is not None:
-                k_in = self.data_dict[kk]["reduce"]
-                ndof = self.data_dict[kk]["ndof"]
-                data["find_" + kk] = data["find_" + k_in]
-                tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION)
-                data[kk] = np.sum(
-                    np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1
-                )
-
-        if self.mixed_type:
-            # nframes x natoms
-            atom_type_mix = self._load_type_mix(set_name)
-            if self.enforce_type_map:
-                try:
-                    atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32)
-                except IndexError as e:
-                    raise IndexError(
-                        "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                            set_name, self.get_ntypes()
-                        )
-                    ) from e
-                atom_type_mix = atom_type_mix_
-            real_type = atom_type_mix.reshape([nframes, self.natoms])
-            data["type"] = real_type
-            natoms = data["type"].shape[1]
-            # nframes x ntypes
-            atom_type_nums = np.array(
-                [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
-                dtype=np.int32,
-            ).T
-            ghost_nums = np.array(
-                [(real_type == -1).sum(axis=-1)],
-                dtype=np.int32,
-            ).T
-            assert (
-                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
-            ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                set_name, self.get_ntypes()
-            )
-            data["real_natoms_vec"] = np.concatenate(
-                (
-                    np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
-                    atom_type_nums,
-                ),
-                axis=-1,
-            )
-        else:
-            data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
-
-        return data
-
-    def _load_data(
-        self,
-        set_name,
-        key,
-        nframes,
-        ndof_,
-        atomic=False,
-        must=True,
-        repeat=1,
-        high_prec=False,
-        type_sel=None,
-        default: float = 0.0,
-        dtype: Optional[np.dtype] = None,
-    ):
-        if atomic:
-            natoms = self.natoms
-            idx_map = self.idx_map
-            # if type_sel, then revise natoms and idx_map
-            if type_sel is not None:
-                natoms = 0
-                for jj in type_sel:
-                    natoms += np.sum(self.atom_type == jj)
-                idx_map = self._idx_map_sel(self.atom_type, type_sel)
-            ndof = ndof_ * natoms
-        else:
-            ndof = ndof_
-        if dtype is not None:
-            pass
-        elif high_prec:
-            dtype = GLOBAL_ENER_FLOAT_PRECISION
-        else:
-            dtype = GLOBAL_NP_FLOAT_PRECISION
-        path = set_name / (key + ".npy")
-        if path.is_file():
-            data = path.load_numpy().astype(dtype)
-            try:  # YWolfeee: deal with data shape error
-                if atomic:
-                    data = data.reshape([nframes, natoms, -1])
-                    data = data[:, idx_map, :]
-                    data = data.reshape([nframes, -1])
-                data = np.reshape(data, [nframes, ndof])
-            except ValueError as err_message:
-                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
-                log.error(str(err_message))
-                log.error(explanation)
-                raise ValueError(str(err_message) + ". " + explanation)
-            if repeat != 1:
-                data = np.repeat(data, repeat).reshape([nframes, -1])
-            return np.float32(1.0), data
-        elif must:
-            raise RuntimeError("%s not found!" % path)
-        else:
-            data = np.full([nframes, ndof], default, dtype=dtype)
-            if repeat != 1:
-                data = np.repeat(data, repeat).reshape([nframes, -1])
-            return np.float32(0.0), data
-
-    def _load_type(self, sys_path: DPPath):
-        atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32)
-        return atom_type
-
-    def _load_type_mix(self, set_name: DPPath):
-        type_path = set_name / "real_atom_types.npy"
-        real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms])
-        return real_type
-
-    def _make_idx_map(self, atom_type):
-        natoms = atom_type.shape[0]
-        idx = np.arange(natoms)
-        if self.sort_atoms:
-            idx_map = np.lexsort((idx, atom_type))
-        else:
-            idx_map = idx
-        return idx_map
-
-    def _load_type_map(self, sys_path: DPPath):
-        fname = sys_path / "type_map.raw"
-        if fname.is_file():
-            return fname.load_txt(dtype=str, ndmin=1).tolist()
-        else:
-            return None
-
-    def _check_pbc(self, sys_path: DPPath):
-        pbc = True
-        if (sys_path / "nopbc").is_file():
-            pbc = False
-        return pbc
 
-    def _check_mode(self, set_path: DPPath):
-        return (set_path / "real_atom_types.npy").is_file()
+__all__ = [
+    "DeepmdData",
+]
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 65cfdc053f..65e87d8ebc 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -1,654 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import collections
-import logging
-import warnings
-from functools import (
-    lru_cache,
-)
-from typing import (
-    List,
-    Optional,
-)
-
-import numpy as np
-
-from deepmd.common import (
-    make_default_mesh,
-)
-from deepmd.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-)
-from deepmd.utils import random as dp_random
-from deepmd.utils.data import (
-    DeepmdData,
-)
-
-log = logging.getLogger(__name__)
-
-
-class DeepmdDataSystem:
-    """Class for manipulating many data systems.
-
-    It is implemented with the help of DeepmdData
-    """
-
-    def __init__(
-        self,
-        systems: List[str],
-        batch_size: int,
-        test_size: int,
-        rcut: Optional[float] = None,
-        set_prefix: str = "set",
-        shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
-        optional_type_map: bool = True,
-        modifier=None,
-        trn_all_set=False,
-        sys_probs=None,
-        auto_prob_style="prob_sys_size",
-        sort_atoms: bool = True,
-    ):
-        """Constructor.
-
-        Parameters
-        ----------
-        systems
-            Specifying the paths to systems
-        batch_size
-            The batch size
-        test_size
-            The size of test data
-        rcut
-            The cut-off radius. Not used.
-        set_prefix
-            Prefix for the directories of different sets
-        shuffle_test
-            If the test data are shuffled
-        type_map
-            Gives the name of different atom types
-        optional_type_map
-            If the type_map.raw in each system is optional
-        modifier
-            Data modifier that has the method `modify_data`
-        trn_all_set
-            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
-        sys_probs : list of float
-            The probabilitis of systems to get the batch.
-            Summation of positive elements of this list should be no greater than 1.
-            Element of this list can be negative, the probability of the corresponding system is determined
-                automatically by the number of batches in the system.
-        auto_prob_style : str
-            Determine the probability of systems automatically. The method is assigned by this key and can be
-            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
-            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
-            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." :
-                                the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`,
-                                where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system,
-                                the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional
-                to the number of batches in the system.
-        sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
-            descriptors except mixed types.
-        """
-        # init data
-        del rcut
-        self.system_dirs = systems
-        self.nsystems = len(self.system_dirs)
-        self.data_systems = []
-        for ii in self.system_dirs:
-            self.data_systems.append(
-                DeepmdData(
-                    ii,
-                    set_prefix=set_prefix,
-                    shuffle_test=shuffle_test,
-                    type_map=type_map,
-                    optional_type_map=optional_type_map,
-                    modifier=modifier,
-                    trn_all_set=trn_all_set,
-                    sort_atoms=sort_atoms,
-                )
-            )
-        # check mix_type format
-        error_format_msg = (
-            "if one of the system is of mixed_type format, "
-            "then all of the systems should be of mixed_type format!"
-        )
-        if self.data_systems[0].mixed_type:
-            for data_sys in self.data_systems[1:]:
-                assert data_sys.mixed_type, error_format_msg
-            self.mixed_type = True
-        else:
-            for data_sys in self.data_systems[1:]:
-                assert not data_sys.mixed_type, error_format_msg
-            self.mixed_type = False
-        # batch size
-        self.batch_size = batch_size
-        is_auto_bs = False
-        self.mixed_systems = False
-        if isinstance(self.batch_size, int):
-            self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
-        elif isinstance(self.batch_size, str):
-            words = self.batch_size.split(":")
-            if "auto" == words[0]:
-                is_auto_bs = True
-                rule = 32
-                if len(words) == 2:
-                    rule = int(words[1])
-                self.batch_size = self._make_auto_bs(rule)
-            elif "mixed" == words[0]:
-                self.mixed_type = True
-                self.mixed_systems = True
-                if len(words) == 2:
-                    rule = int(words[1])
-                else:
-                    raise RuntimeError("batch size must be specified for mixed systems")
-                self.batch_size = rule * np.ones(self.nsystems, dtype=int)
-            else:
-                raise RuntimeError("unknown batch_size rule " + words[0])
-        elif isinstance(self.batch_size, list):
-            pass
-        else:
-            raise RuntimeError("invalid batch_size")
-        assert isinstance(self.batch_size, (list, np.ndarray))
-        assert len(self.batch_size) == self.nsystems
-
-        # natoms, nbatches
-        ntypes = []
-        for ii in self.data_systems:
-            ntypes.append(ii.get_ntypes())
-        self.sys_ntypes = max(ntypes)
-        self.natoms = []
-        self.natoms_vec = []
-        self.nbatches = []
-        type_map_list = []
-        for ii in range(self.nsystems):
-            self.natoms.append(self.data_systems[ii].get_natoms())
-            self.natoms_vec.append(
-                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
-            )
-            self.nbatches.append(
-                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
-            )
-            type_map_list.append(self.data_systems[ii].get_type_map())
-        self.type_map = self._check_type_map_consistency(type_map_list)
-
-        # ! altered by Marián Rynik
-        # test size
-        # now test size can be set as a percentage of systems data or test size
-        # can be set for each system individualy in the same manner as batch
-        # size. This enables one to use systems with diverse number of
-        # structures and different number of atoms.
-        self.test_size = test_size
-        if isinstance(self.test_size, int):
-            self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
-        elif isinstance(self.test_size, str):
-            words = self.test_size.split("%")
-            try:
-                percent = int(words[0])
-            except ValueError:
-                raise RuntimeError("unknown test_size rule " + words[0])
-            self.test_size = self._make_auto_ts(percent)
-        elif isinstance(self.test_size, list):
-            pass
-        else:
-            raise RuntimeError("invalid test_size")
-        assert isinstance(self.test_size, (list, np.ndarray))
-        assert len(self.test_size) == self.nsystems
-
-        # init pick idx
-        self.pick_idx = 0
-
-        # derive system probabilities
-        self.sys_probs = None
-        self.set_sys_probs(sys_probs, auto_prob_style)
-
-        # check batch and test size
-        for ii in range(self.nsystems):
-            chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
-            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
-                warnings.warn(
-                    "system %s required batch size is larger than the size of the dataset %s (%d > %d)"
-                    % (
-                        self.system_dirs[ii],
-                        chk_ret[0],
-                        self.batch_size[ii],
-                        chk_ret[1],
-                    )
-                )
-            chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
-            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
-                warnings.warn(
-                    "system %s required test size is larger than the size of the dataset %s (%d > %d)"
-                    % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])
-                )
-
-    def _load_test(self, ntests=-1):
-        self.test_data = collections.defaultdict(list)
-        for ii in range(self.nsystems):
-            test_system_data = self.data_systems[ii].get_test(ntests=ntests)
-            for nn in test_system_data:
-                self.test_data[nn].append(test_system_data[nn])
-
-    @property
-    @lru_cache(maxsize=None)
-    def default_mesh(self) -> List[np.ndarray]:
-        """Mesh for each system."""
-        return [
-            make_default_mesh(
-                self.data_systems[ii].pbc, self.data_systems[ii].mixed_type
-            )
-            for ii in range(self.nsystems)
-        ]
-
-    def compute_energy_shift(self, rcond=None, key="energy"):
-        sys_ener = []
-        for ss in self.data_systems:
-            sys_ener.append(ss.avg(key))
-        sys_ener = np.concatenate(sys_ener)
-        sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
-        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
-        sys_tynatom = sys_tynatom[:, 2:]
-        energy_shift, resd, rank, s_value = np.linalg.lstsq(
-            sys_tynatom, sys_ener, rcond=rcond
-        )
-        return energy_shift
-
-    def add_dict(self, adict: dict) -> None:
-        """Add items to the data system by a `dict`.
-        `adict` should have items like
-        .. code-block:: python.
-
-           adict[key] = {
-               "ndof": ndof,
-               "atomic": atomic,
-               "must": must,
-               "high_prec": high_prec,
-               "type_sel": type_sel,
-               "repeat": repeat,
-           }
-
-        For the explaination of the keys see `add`
-        """
-        for kk in adict:
-            self.add(
-                kk,
-                adict[kk]["ndof"],
-                atomic=adict[kk]["atomic"],
-                must=adict[kk]["must"],
-                high_prec=adict[kk]["high_prec"],
-                type_sel=adict[kk]["type_sel"],
-                repeat=adict[kk]["repeat"],
-                default=adict[kk]["default"],
-            )
-
-    def add(
-        self,
-        key: str,
-        ndof: int,
-        atomic: bool = False,
-        must: bool = False,
-        high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
-        repeat: int = 1,
-        default: float = 0.0,
-    ):
-        """Add a data item that to be loaded.
-
-        Parameters
-        ----------
-        key
-            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
-        ndof
-            The number of dof
-        atomic
-            The item is an atomic property.
-            If False, the size of the data should be nframes x ndof
-            If True, the size of data should be nframes x natoms x ndof
-        must
-            The data file `sys_path/set.*/key.npy` must exist.
-            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
-        high_prec
-            Load the data and store in float64, otherwise in float32
-        type_sel
-            Select certain type of atoms
-        repeat
-            The data will be repeated `repeat` times.
-        default, default=0.
-            Default value of data
-        """
-        for ii in self.data_systems:
-            ii.add(
-                key,
-                ndof,
-                atomic=atomic,
-                must=must,
-                high_prec=high_prec,
-                repeat=repeat,
-                type_sel=type_sel,
-                default=default,
-            )
-
-    def reduce(self, key_out, key_in):
-        """Generate a new item from the reduction of another atom.
-
-        Parameters
-        ----------
-        key_out
-            The name of the reduced item
-        key_in
-            The name of the data item to be reduced
-        """
-        for ii in self.data_systems:
-            ii.reduce(key_out, key_in)
-
-    def get_data_dict(self, ii: int = 0) -> dict:
-        return self.data_systems[ii].get_data_dict()
-
-    def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
-        if sys_probs is None:
-            if auto_prob_style == "prob_uniform":
-                prob_v = 1.0 / float(self.nsystems)
-                probs = [prob_v for ii in range(self.nsystems)]
-            elif auto_prob_style[:13] == "prob_sys_size":
-                if auto_prob_style == "prob_sys_size":
-                    prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0"
-                else:
-                    prob_style = auto_prob_style
-                probs = prob_sys_size_ext(
-                    prob_style, self.get_nsystems(), self.nbatches
-                )
-            else:
-                raise RuntimeError("Unknown auto prob style: " + auto_prob_style)
-        else:
-            probs = process_sys_probs(sys_probs, self.nbatches)
-        self.sys_probs = probs
-
-    def get_batch(self, sys_idx: Optional[int] = None) -> dict:
-        # batch generation style altered by Ziyao Li:
-        # one should specify the "sys_prob" and "auto_prob_style" params
-        # via set_sys_prob() function. The sys_probs this function uses is
-        # defined as a private variable, self.sys_probs, initialized in __init__().
-        # This is to optimize the (vain) efforts in evaluating sys_probs every batch.
-        """Get a batch of data from the data systems.
-
-        Parameters
-        ----------
-        sys_idx : int
-            The index of system from which the batch is get.
-            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
-            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
-            This option does not work for mixed systems.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        if not self.mixed_systems:
-            b_data = self.get_batch_standard(sys_idx)
-        else:
-            b_data = self.get_batch_mixed()
-        return b_data
-
-    def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict:
-        """Get a batch of data from the data systems in the standard way.
-
-        Parameters
-        ----------
-        sys_idx : int
-            The index of system from which the batch is get.
-            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
-            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        if sys_idx is not None:
-            self.pick_idx = sys_idx
-        else:
-            # prob = self._get_sys_probs(sys_probs, auto_prob_style)
-            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
-        b_data = self.data_systems[self.pick_idx].get_batch(
-            self.batch_size[self.pick_idx]
-        )
-        b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
-        b_data["default_mesh"] = self.default_mesh[self.pick_idx]
-        return b_data
-
-    def get_batch_mixed(self) -> dict:
-        """Get a batch of data from the data systems in the mixed way.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        # mixed systems have a global batch size
-        batch_size = self.batch_size[0]
-        batch_data = []
-        for _ in range(batch_size):
-            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
-            bb_data = self.data_systems[self.pick_idx].get_batch(1)
-            bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
-            bb_data["default_mesh"] = self.default_mesh[self.pick_idx]
-            batch_data.append(bb_data)
-        b_data = self._merge_batch_data(batch_data)
-        return b_data
-
-    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
-        """Merge batch data from different systems.
-
-        Parameters
-        ----------
-        batch_data : list of dict
-            A list of batch data from different systems.
-
-        Returns
-        -------
-        dict
-            The merged batch data.
-        """
-        b_data = {}
-        max_natoms = max(bb["natoms_vec"][0] for bb in batch_data)
-        # natoms_vec
-        natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int)
-        natoms_vec[0:3] = max_natoms
-        b_data["natoms_vec"] = natoms_vec
-        # real_natoms_vec
-        real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data])
-        b_data["real_natoms_vec"] = real_natoms_vec
-        # type
-        type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int)
-        for ii, bb in enumerate(batch_data):
-            type_vec[ii, : bb["type"].shape[1]] = bb["type"][0]
-        b_data["type"] = type_vec
-        # default_mesh
-        default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0)
-        b_data["default_mesh"] = default_mesh
-        # other data
-        data_dict = self.get_data_dict(0)
-        for kk, vv in data_dict.items():
-            if kk not in batch_data[0]:
-                continue
-            b_data["find_" + kk] = batch_data[0]["find_" + kk]
-            if not vv["atomic"]:
-                b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0)
-            else:
-                b_data[kk] = np.zeros(
-                    (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]),
-                    dtype=batch_data[0][kk].dtype,
-                )
-                for ii, bb in enumerate(batch_data):
-                    b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0]
-        return b_data
-
-    # ! altered by Marián Rynik
-    def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1):  # depreciated
-        """Get test data from the the data systems.
-
-        Parameters
-        ----------
-        sys_idx
-            The test dat of system with index `sys_idx` will be returned.
-            If is None, the currently selected system will be returned.
-        n_test
-            Number of test data. If set to -1 all test data will be get.
-        """
-        if not hasattr(self, "test_data"):
-            self._load_test(ntests=n_test)
-        if sys_idx is not None:
-            idx = sys_idx
-        else:
-            idx = self.pick_idx
-
-        test_system_data = {}
-        for nn in self.test_data:
-            test_system_data[nn] = self.test_data[nn][idx]
-        test_system_data["natoms_vec"] = self.natoms_vec[idx]
-        test_system_data["default_mesh"] = self.default_mesh[idx]
-        return test_system_data
-
-    def get_sys_ntest(self, sys_idx=None):
-        """Get number of tests for the currently selected system,
-        or one defined by sys_idx.
-        """
-        if sys_idx is not None:
-            return self.test_size[sys_idx]
-        else:
-            return self.test_size[self.pick_idx]
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map."""
-        return self.type_map
-
-    def get_nbatches(self) -> int:
-        """Get the total number of batches."""
-        return self.nbatches
-
-    def get_ntypes(self) -> int:
-        """Get the number of types."""
-        return self.sys_ntypes
-
-    def get_nsystems(self) -> int:
-        """Get the number of data systems."""
-        return self.nsystems
-
-    def get_sys(self, idx: int) -> DeepmdData:
-        """Get a certain data system."""
-        return self.data_systems[idx]
-
-    def get_batch_size(self) -> int:
-        """Get the batch size."""
-        return self.batch_size
-
-    def _format_name_length(self, name, width):
-        if len(name) <= width:
-            return "{: >{}}".format(name, width)
-        else:
-            name = name[-(width - 3) :]
-            name = "-- " + name
-            return name
-
-    def print_summary(self, name):
-        # width 65
-        sys_width = 42
-        log.info(
-            f"---Summary of DataSystem: {name:13s}-----------------------------------------------"
-        )
-        log.info("found %d system(s):" % self.nsystems)
-        log.info(
-            ("%s  " % self._format_name_length("system", sys_width))
-            + ("%6s  %6s  %6s  %9s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
-        )
-        for ii in range(self.nsystems):
-            log.info(
-                "%s  %6d  %6d  %6d  %9.3e  %3s"
-                % (
-                    self._format_name_length(self.system_dirs[ii], sys_width),
-                    self.natoms[ii],
-                    # TODO batch size * nbatches = number of structures
-                    self.batch_size[ii],
-                    self.nbatches[ii],
-                    self.sys_probs[ii],
-                    "T" if self.data_systems[ii].pbc else "F",
-                )
-            )
-        log.info(
-            "--------------------------------------------------------------------------------------"
-        )
-
-    def _make_auto_bs(self, rule):
-        bs = []
-        for ii in self.data_systems:
-            ni = ii.get_natoms()
-            bsi = rule // ni
-            if bsi * ni < rule:
-                bsi += 1
-            bs.append(bsi)
-        return bs
-
-    # ! added by Marián Rynik
-    def _make_auto_ts(self, percent):
-        ts = []
-        for ii in range(self.nsystems):
-            ni = self.batch_size[ii] * self.nbatches[ii]
-            tsi = int(ni * percent / 100)
-            ts.append(tsi)
-
-        return ts
-
-    def _check_type_map_consistency(self, type_map_list):
-        ret = []
-        for ii in type_map_list:
-            if ii is not None:
-                min_len = min([len(ii), len(ret)])
-                for idx in range(min_len):
-                    if ii[idx] != ret[idx]:
-                        raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}")
-                if len(ii) > len(ret):
-                    ret = ii
-        return ret
-
-
-def process_sys_probs(sys_probs, nbatch):
-    sys_probs = np.array(sys_probs)
-    type_filter = sys_probs >= 0
-    assigned_sum_prob = np.sum(type_filter * sys_probs)
-    # 1e-8 is to handle floating point error; See #1917
-    assert (
-        assigned_sum_prob <= 1.0 + 1e-8
-    ), "the sum of assigned probability should be less than 1"
-    rest_sum_prob = 1.0 - assigned_sum_prob
-    if not np.isclose(rest_sum_prob, 0):
-        rest_nbatch = (1 - type_filter) * nbatch
-        rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
-        ret_prob = rest_prob + type_filter * sys_probs
-    else:
-        ret_prob = sys_probs
-    assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
-    return ret_prob
-
-
-def prob_sys_size_ext(keywords, nsystems, nbatch):
-    block_str = keywords.split(";")[1:]
-    block_stt = []
-    block_end = []
-    block_weights = []
-    for ii in block_str:
-        stt = int(ii.split(":")[0])
-        end = int(ii.split(":")[1])
-        weight = float(ii.split(":")[2])
-        assert weight >= 0, "the weight of a block should be no less than 0"
-        block_stt.append(stt)
-        block_end.append(end)
-        block_weights.append(weight)
-    nblocks = len(block_str)
-    block_probs = np.array(block_weights) / np.sum(block_weights)
-    sys_probs = np.zeros([nsystems])
-    for ii in range(nblocks):
-        nbatch_block = nbatch[block_stt[ii] : block_end[ii]]
-        tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
-        sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
-    return sys_probs
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.data_system import (
+    DeepmdDataSystem,
+    prob_sys_size_ext,
+    process_sys_probs,
+)
+
+__all__ = [
+    "DeepmdDataSystem",
+    "process_sys_probs",
+    "prob_sys_size_ext",
+]
diff --git a/deepmd/utils/pair_tab.py b/deepmd/utils/pair_tab.py
index 4451f53379..1a526ac5fc 100644
--- a/deepmd/utils/pair_tab.py
+++ b/deepmd/utils/pair_tab.py
@@ -1,91 +1,9 @@
-#!/usr/bin/env python3
-
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Tuple,
-)
-
-import numpy as np
-from scipy.interpolate import (
-    CubicSpline,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.pair_tab import (
+    PairTab,
 )
 
-
-class PairTab:
-    """Pairwise tabulated potential.
-
-    Parameters
-    ----------
-    filename
-            File name for the short-range tabulated potential.
-            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
-            The first colume is the distance between atoms.
-            The second to the last columes are energies for pairs of certain types.
-            For example we have two atom types, 0 and 1.
-            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
-    """
-
-    def __init__(self, filename: str) -> None:
-        """Constructor."""
-        self.reinit(filename)
-
-    def reinit(self, filename: str) -> None:
-        """Initialize the tabulated interaction.
-
-        Parameters
-        ----------
-        filename
-            File name for the short-range tabulated potential.
-            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
-            The first colume is the distance between atoms.
-            The second to the last columes are energies for pairs of certain types.
-            For example we have two atom types, 0 and 1.
-            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
-        """
-        self.vdata = np.loadtxt(filename)
-        self.rmin = self.vdata[0][0]
-        self.hh = self.vdata[1][0] - self.vdata[0][0]
-        self.nspline = self.vdata.shape[0] - 1
-        ncol = self.vdata.shape[1] - 1
-        n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
-        self.ntypes = int(n0 + 0.1)
-        assert self.ntypes * (self.ntypes + 1) // 2 == ncol, (
-            "number of volumes provided in %s does not match guessed number of types %d"
-            % (filename, self.ntypes)
-        )
-        self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
-        self.tab_data = self._make_data()
-
-    def get(self) -> Tuple[np.array, np.array]:
-        """Get the serialized table."""
-        return self.tab_info, self.tab_data
-
-    def _make_data(self):
-        data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
-        stride = 4 * self.nspline
-        idx_iter = 0
-        xx = self.vdata[:, 0]
-        for t0 in range(self.ntypes):
-            for t1 in range(t0, self.ntypes):
-                vv = self.vdata[:, 1 + idx_iter]
-                cs = CubicSpline(xx, vv)
-                dd = cs(xx, 1)
-                dd *= self.hh
-                dtmp = np.zeros(stride)
-                for ii in range(self.nspline):
-                    dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1]
-                    dtmp[ii * 4 + 1] = (
-                        -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1]
-                    )
-                    dtmp[ii * 4 + 2] = dd[ii]
-                    dtmp[ii * 4 + 3] = vv[ii]
-                data[
-                    (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride
-                    + stride
-                ] = dtmp
-                data[
-                    (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride
-                    + stride
-                ] = dtmp
-                idx_iter += 1
-        return data
+__all__ = [
+    "PairTab",
+]
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index a8e4bc329f..780bc8cabf 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -1,358 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import os
-from abc import (
-    ABC,
-    abstractmethod,
-)
-from functools import (
-    lru_cache,
-)
-from pathlib import (
-    Path,
-)
-from typing import (
-    List,
-    Optional,
-)
-
-import h5py
-import numpy as np
-from wcmatch.glob import (
-    globfilter,
-)
-
-
-class DPPath(ABC):
-    """The path class to data system (DeepmdData).
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __new__(cls, path: str):
-        if cls is DPPath:
-            if os.path.isdir(path):
-                return super().__new__(DPOSPath)
-            elif os.path.isfile(path.split("#")[0]):
-                # assume h5 if it is not dir
-                # TODO: check if it is a real h5? or just check suffix?
-                return super().__new__(DPH5Path)
-            raise FileNotFoundError("%s not found" % path)
-        return super().__new__(cls)
-
-    @abstractmethod
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-
-    @abstractmethod
-    def load_txt(self, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-
-    @abstractmethod
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-
-    @abstractmethod
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-
-    @abstractmethod
-    def is_file(self) -> bool:
-        """Check if self is file."""
-
-    @abstractmethod
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-
-    @abstractmethod
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-
-    @abstractmethod
-    def __lt__(self, other: "DPPath") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-
-    @abstractmethod
-    def __str__(self) -> str:
-        """Represent string."""
-
-    def __repr__(self) -> str:
-        return f"{type(self)} ({self!s})"
-
-    def __eq__(self, other) -> bool:
-        return str(self) == str(other)
-
-    def __hash__(self):
-        return hash(str(self))
-
-
-class DPOSPath(DPPath):
-    """The OS path class to data system (DeepmdData) for real directories.
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __init__(self, path: str) -> None:
-        super().__init__()
-        if isinstance(path, Path):
-            self.path = path
-        else:
-            self.path = Path(path)
-
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return np.load(str(self.path))
-
-    def load_txt(self, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return np.loadtxt(str(self.path), **kwargs)
-
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        # currently DPOSPath will only derivative DPOSPath
-        # TODO: discuss if we want to mix DPOSPath and DPH5Path?
-        return [type(self)(p) for p in self.path.glob(pattern)]
-
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        return [type(self)(p) for p in self.path.rglob(pattern)]
-
-    def is_file(self) -> bool:
-        """Check if self is file."""
-        return self.path.is_file()
-
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-        return self.path.is_dir()
-
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-        return type(self)(self.path / key)
-
-    def __lt__(self, other: "DPOSPath") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-        return self.path < other.path
-
-    def __str__(self) -> str:
-        """Represent string."""
-        return str(self.path)
-
-
-class DPH5Path(DPPath):
-    """The path class to data system (DeepmdData) for HDF5 files.
-
-    Notes
-    -----
-    OS - HDF5 relationship:
-        directory - Group
-        file - Dataset
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __init__(self, path: str) -> None:
-        super().__init__()
-        # we use "#" to split path
-        # so we do not support file names containing #...
-        s = path.split("#")
-        self.root_path = s[0]
-        self.root = self._load_h5py(s[0])
-        # h5 path: default is the root path
-        self.name = s[1] if len(s) > 1 else "/"
-
-    @classmethod
-    @lru_cache(None)
-    def _load_h5py(cls, path: str) -> h5py.File:
-        """Load hdf5 file.
-
-        Parameters
-        ----------
-        path : str
-            path to hdf5 file
-        """
-        # this method has cache to avoid duplicated
-        # loading from different DPH5Path
-        # However the file will be never closed?
-        return h5py.File(path, "r")
-
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return self.root[self.name][:]
-
-    def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        arr = self.load_numpy()
-        if dtype:
-            arr = arr.astype(dtype)
-        return arr
-
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        # got paths starts with current path first, which is faster
-        subpaths = [ii for ii in self._keys if ii.startswith(self.name)]
-        return [
-            type(self)(f"{self.root_path}#{pp}")
-            for pp in globfilter(subpaths, self._connect_path(pattern))
-        ]
-
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        return self.glob("**" + pattern)
-
-    @property
-    def _keys(self) -> List[str]:
-        """Walk all groups and dataset."""
-        return self._file_keys(self.root)
-
-    @classmethod
-    @lru_cache(None)
-    def _file_keys(cls, file: h5py.File) -> List[str]:
-        """Walk all groups and dataset."""
-        l = []
-        file.visit(lambda x: l.append("/" + x))
-        return l
-
-    def is_file(self) -> bool:
-        """Check if self is file."""
-        if self.name not in self._keys:
-            return False
-        return isinstance(self.root[self.name], h5py.Dataset)
-
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-        if self.name not in self._keys:
-            return False
-        return isinstance(self.root[self.name], h5py.Group)
-
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-        return type(self)(f"{self.root_path}#{self._connect_path(key)}")
-
-    def _connect_path(self, path: str) -> str:
-        """Connect self with path."""
-        if self.name.endswith("/"):
-            return f"{self.name}{path}"
-        return f"{self.name}/{path}"
-
-    def __lt__(self, other: "DPH5Path") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-        if self.root_path == other.root_path:
-            return self.name < other.name
-        return self.root_path < other.root_path
-
-    def __str__(self) -> str:
-        """Returns path of self."""
-        return f"{self.root_path}#{self.name}"
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.path import (
+    DPH5Path,
+    DPOSPath,
+    DPPath,
+)
+
+__all__ = [
+    "DPPath",
+    "DPOSPath",
+    "DPH5Path",
+]
diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py
index 2a77b744c5..3b5b297304 100644
--- a/deepmd/utils/plugin.py
+++ b/deepmd/utils/plugin.py
@@ -1,95 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Base of plugin systems."""
-# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py
-
-from abc import (
-    ABCMeta,
-)
-from typing import (
-    Callable,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.plugin import (
+    Plugin,
+    PluginVariant,
+    VariantABCMeta,
+    VariantMeta,
 )
 
-
-class Plugin:
-    """A class to register and restore plugins.
-
-    Attributes
-    ----------
-    plugins : Dict[str, object]
-        plugins
-
-    Examples
-    --------
-    >>> plugin = Plugin()
-    >>> @plugin.register("xx")
-        def xxx():
-            pass
-    >>> print(plugin.plugins['xx'])
-    """
-
-    def __init__(self):
-        self.plugins = {}
-
-    def __add__(self, other) -> "Plugin":
-        self.plugins.update(other.plugins)
-        return self
-
-    def register(self, key: str) -> Callable[[object], object]:
-        """Register a plugin.
-
-        Parameters
-        ----------
-        key : str
-            key of the plugin
-
-        Returns
-        -------
-        Callable[[object], object]
-            decorator
-        """
-
-        def decorator(object: object) -> object:
-            self.plugins[key] = object
-            return object
-
-        return decorator
-
-    def get_plugin(self, key) -> object:
-        """Visit a plugin by key.
-
-        Parameters
-        ----------
-        key : str
-            key of the plugin
-
-        Returns
-        -------
-        object
-            the plugin
-        """
-        return self.plugins[key]
-
-
-class VariantMeta:
-    def __call__(cls, *args, **kwargs):
-        """Remove `type` and keys that starts with underline."""
-        obj = cls.__new__(cls, *args, **kwargs)
-        kwargs.pop("type", None)
-        to_pop = []
-        for kk in kwargs:
-            if kk[0] == "_":
-                to_pop.append(kk)
-        for kk in to_pop:
-            kwargs.pop(kk, None)
-        obj.__init__(*args, **kwargs)
-        return obj
-
-
-class VariantABCMeta(VariantMeta, ABCMeta):
-    pass
-
-
-class PluginVariant(metaclass=VariantABCMeta):
-    """A class to remove `type` from input arguments."""
-
-    pass
+__all__ = [
+    "Plugin",
+    "VariantMeta",
+    "VariantABCMeta",
+    "PluginVariant",
+]
diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
index 8944419412..09547eeac9 100644
--- a/deepmd/utils/random.py
+++ b/deepmd/utils/random.py
@@ -1,67 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Optional,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.random import (
+    choice,
+    random,
+    seed,
+    shuffle,
 )
 
-import numpy as np
-
-_RANDOM_GENERATOR = np.random.RandomState()
-
-
-def choice(a: np.ndarray, p: Optional[np.ndarray] = None):
-    """Generates a random sample from a given 1-D array.
-
-    Parameters
-    ----------
-    a : np.ndarray
-        A random sample is generated from its elements.
-    p : np.ndarray
-        The probabilities associated with each entry in a.
-
-    Returns
-    -------
-    np.ndarray
-        arrays with results and their shapes
-    """
-    return _RANDOM_GENERATOR.choice(a, p=p)
-
-
-def random(size=None):
-    """Return random floats in the half-open interval [0.0, 1.0).
-
-    Parameters
-    ----------
-    size
-        Output shape.
-
-    Returns
-    -------
-    np.ndarray
-        Arrays with results and their shapes.
-    """
-    return _RANDOM_GENERATOR.random_sample(size)
-
-
-def seed(val: Optional[int] = None):
-    """Seed the generator.
-
-    Parameters
-    ----------
-    val : int
-        Seed.
-    """
-    _RANDOM_GENERATOR.seed(val)
-
-
-def shuffle(x: np.ndarray):
-    """Modify a sequence in-place by shuffling its contents.
-
-    Parameters
-    ----------
-    x : np.ndarray
-        The array or list to be shuffled.
-    """
-    _RANDOM_GENERATOR.shuffle(x)
-
-
-__all__ = ["choice", "random", "seed", "shuffle"]
+__all__ = [
+    "choice",
+    "random",
+    "seed",
+    "shuffle",
+]
diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py
index b344d3bb75..267f89ed28 100644
--- a/deepmd/utils/weight_avg.py
+++ b/deepmd/utils/weight_avg.py
@@ -1,48 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from collections import (
-    defaultdict,
+"""Alias for backward compatibility."""
+from deepmd_utils.utils.weight_avg import (
+    weighted_average,
 )
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
-
-import numpy as np
-
-
-def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
-    """Compute wighted average of prediction errors (MAE or RMSE) for model.
-
-    Parameters
-    ----------
-    errors : List[Dict[str, Tuple[float, float]]]
-        List: the error of systems
-        Dict: the error of quantities, name given by the key
-        str: the name of the quantity, must starts with 'mae' or 'rmse'
-        Tuple: (error, weight)
 
-    Returns
-    -------
-    Dict
-        weighted averages
-    """
-    sum_err = defaultdict(float)
-    sum_siz = defaultdict(int)
-    for err in errors:
-        for kk, (ee, ss) in err.items():
-            if kk.startswith("mae"):
-                sum_err[kk] += ee * ss
-            elif kk.startswith("rmse"):
-                sum_err[kk] += ee * ee * ss
-            else:
-                raise RuntimeError("unknown error type")
-            sum_siz[kk] += ss
-    for kk in sum_err.keys():
-        if kk.startswith("mae"):
-            sum_err[kk] = sum_err[kk] / sum_siz[kk]
-        elif kk.startswith("rmse"):
-            sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk])
-        else:
-            raise RuntimeError("unknown error type")
-    return sum_err
+__all__ = [
+    "weighted_average",
+]
diff --git a/deepmd_utils/common.py b/deepmd_utils/common.py
new file mode 100644
index 0000000000..b594c54030
--- /dev/null
+++ b/deepmd_utils/common.py
@@ -0,0 +1,270 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import warnings
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+)
+
+try:
+    from typing import Literal  # python >=3.8
+except ImportError:
+    from typing_extensions import Literal  # type: ignore
+
+import numpy as np
+import yaml
+
+from deepmd_utils.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd_utils.utils.path import (
+    DPPath,
+)
+
+__all__ = [
+    "data_requirement",
+    "add_data_requirement",
+    "select_idx_map",
+    "make_default_mesh",
+    "j_must_have",
+    "j_loader",
+    "expand_sys_str",
+    "get_np_precision",
+]
+
+
+if TYPE_CHECKING:
+    _DICT_VAL = TypeVar("_DICT_VAL")
+    _PRECISION = Literal["default", "float16", "float32", "float64"]
+    _ACTIVATION = Literal[
+        "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"
+    ]
+    __all__.extend(
+        [
+            "_DICT_VAL",
+            "_PRECISION",
+            "_ACTIVATION",
+        ]
+    )
+
+
+# TODO this is not a good way to do things. This is some global variable to which
+# TODO anyone can write and there is no good way to keep track of the changes
+data_requirement = {}
+
+
+def add_data_requirement(
+    key: str,
+    ndof: int,
+    atomic: bool = False,
+    must: bool = False,
+    high_prec: bool = False,
+    type_sel: Optional[bool] = None,
+    repeat: int = 1,
+    default: float = 0.0,
+    dtype: Optional[np.dtype] = None,
+):
+    """Specify data requirements for training.
+
+    Parameters
+    ----------
+    key : str
+        type of data stored in corresponding `*.npy` file e.g. `forces` or `energy`
+    ndof : int
+        number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces
+        have `atomic=True` and `ndof=3`
+    atomic : bool, optional
+        specifies whwther the `ndof` keyworrd applies to per atom quantity or not,
+        by default False
+    must : bool, optional
+        specifi if the `*.npy` data file must exist, by default False
+    high_prec : bool, optional
+        if true load data to `np.float64` else `np.float32`, by default False
+    type_sel : bool, optional
+        select only certain type of atoms, by default None
+    repeat : int, optional
+        if specify repaeat data `repeat` times, by default 1
+    default : float, optional, default=0.
+        default value of data
+    dtype : np.dtype, optional
+        the dtype of data, overwrites `high_prec` if provided
+    """
+    data_requirement[key] = {
+        "ndof": ndof,
+        "atomic": atomic,
+        "must": must,
+        "high_prec": high_prec,
+        "type_sel": type_sel,
+        "repeat": repeat,
+        "default": default,
+        "dtype": dtype,
+    }
+
+
+def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
+    """Build map of indices for element supplied element types from all atoms list.
+
+    Parameters
+    ----------
+    atom_types : np.ndarray
+        array specifing type for each atoms as integer
+    select_types : np.ndarray
+        types of atoms you want to find indices for
+
+    Returns
+    -------
+    np.ndarray
+        indices of types of atoms defined by `select_types` in `atom_types` array
+
+    Warnings
+    --------
+    `select_types` array will be sorted before finding indices in `atom_types`
+    """
+    sort_select_types = np.sort(select_types)
+    idx_map = []
+    for ii in sort_select_types:
+        idx_map.append(np.where(atom_types == ii)[0])
+    return np.concatenate(idx_map)
+
+
+def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
+    """Make mesh.
+
+    Only the size of mesh matters, not the values:
+    * 6 for PBC, no mixed types
+    * 0 for no PBC, no mixed types
+    * 7 for PBC, mixed types
+    * 1 for no PBC, mixed types
+
+    Parameters
+    ----------
+    pbc : bool
+        if True, the mesh will be made for periodic boundary conditions
+    mixed_type : bool
+        if True, the mesh will be made for mixed types
+
+    Returns
+    -------
+    np.ndarray
+        mesh
+    """
+    mesh_size = int(pbc) * 6 + int(mixed_type)
+    default_mesh = np.zeros(mesh_size, dtype=np.int32)
+    return default_mesh
+
+
+# TODO maybe rename this to j_deprecated and only warn about deprecated keys,
+# TODO if the deprecated_key argument is left empty function puppose is only custom
+# TODO error since dict[key] already raises KeyError when the key is missing
+def j_must_have(
+    jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
+) -> "_DICT_VAL":
+    """Assert that supplied dictionary conaines specified key.
+
+    Returns
+    -------
+    _DICT_VAL
+        value that was store unde supplied key
+
+    Raises
+    ------
+    RuntimeError
+        if the key is not present
+    """
+    if key not in jdata.keys():
+        for ii in deprecated_key:
+            if ii in jdata.keys():
+                warnings.warn(f"the key {ii} is deprecated, please use {key} instead")
+                return jdata[ii]
+        else:
+            raise RuntimeError(f"json database must provide key {key}")
+    else:
+        return jdata[key]
+
+
+def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
+    """Load yaml or json settings file.
+
+    Parameters
+    ----------
+    filename : Union[str, Path]
+        path to file
+
+    Returns
+    -------
+    Dict[str, Any]
+        loaded dictionary
+
+    Raises
+    ------
+    TypeError
+        if the supplied file is of unsupported type
+    """
+    filepath = Path(filename)
+    if filepath.suffix.endswith("json"):
+        with filepath.open() as fp:
+            return json.load(fp)
+    elif filepath.suffix.endswith(("yml", "yaml")):
+        with filepath.open() as fp:
+            return yaml.safe_load(fp)
+    else:
+        raise TypeError("config file must be json, or yaml/yml")
+
+
+# TODO port completely to pathlib when all callers are ported
+def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
+    """Recursively iterate over directories taking those that contain `type.raw` file.
+
+    Parameters
+    ----------
+    root_dir : Union[str, Path]
+        starting directory
+
+    Returns
+    -------
+    List[str]
+        list of string pointing to system directories
+    """
+    root_dir = DPPath(root_dir)
+    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
+    if (root_dir / "type.raw").is_file():
+        matches.append(str(root_dir))
+    return matches
+
+
+def get_np_precision(precision: "_PRECISION") -> np.dtype:
+    """Get numpy precision constant from string.
+
+    Parameters
+    ----------
+    precision : _PRECISION
+        string name of numpy constant or default
+
+    Returns
+    -------
+    np.dtype
+        numpy presicion constant
+
+    Raises
+    ------
+    RuntimeError
+        if string is invalid
+    """
+    if precision == "default":
+        return GLOBAL_NP_FLOAT_PRECISION
+    elif precision == "float16":
+        return np.float16
+    elif precision == "float32":
+        return np.float32
+    elif precision == "float64":
+        return np.float64
+    else:
+        raise RuntimeError(f"{precision} is not a valid precision")
diff --git a/deepmd_utils/env.py b/deepmd_utils/env.py
new file mode 100644
index 0000000000..b1d4958ed8
--- /dev/null
+++ b/deepmd_utils/env.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+
+import numpy as np
+
+__all__ = [
+    "GLOBAL_NP_FLOAT_PRECISION",
+    "GLOBAL_ENER_FLOAT_PRECISION",
+    "global_float_prec",
+]
+
+# FLOAT_PREC
+dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
+if dp_float_prec in ("high", ""):
+    # default is high
+    GLOBAL_NP_FLOAT_PRECISION = np.float64
+    GLOBAL_ENER_FLOAT_PRECISION = np.float64
+    global_float_prec = "double"
+elif dp_float_prec == "low":
+    GLOBAL_NP_FLOAT_PRECISION = np.float32
+    GLOBAL_ENER_FLOAT_PRECISION = np.float64
+    global_float_prec = "float"
+else:
+    raise RuntimeError(
+        "Unsupported float precision option: %s. Supported: high,"
+        "low. Please set precision with environmental variable "
+        "DP_INTERFACE_PREC." % dp_float_prec
+    )
diff --git a/deepmd_utils/loggers/__init__.py b/deepmd_utils/loggers/__init__.py
new file mode 100644
index 0000000000..39aa76139d
--- /dev/null
+++ b/deepmd_utils/loggers/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Module taking care of logging duties."""
+
+from .loggers import (
+    set_log_handles,
+)
+
+__all__ = ["set_log_handles"]
diff --git a/deepmd_utils/loggers/loggers.py b/deepmd_utils/loggers/loggers.py
new file mode 100644
index 0000000000..015581f6bd
--- /dev/null
+++ b/deepmd_utils/loggers/loggers.py
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Logger initialization for package."""
+
+import logging
+import os
+from typing import (
+    TYPE_CHECKING,
+    Optional,
+)
+
+if TYPE_CHECKING:
+    from pathlib import (
+        Path,
+    )
+
+    from mpi4py import (
+        MPI,
+    )
+
+    _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND
+
+logging.getLogger(__name__)
+
+__all__ = ["set_log_handles"]
+
+# logger formater
+FFORMATTER = logging.Formatter(
+    "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
+)
+CFORMATTER = logging.Formatter(
+    #    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
+    "%(app_name)s %(levelname)-7s %(message)s"
+)
+FFORMATTER_MPI = logging.Formatter(
+    "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s"
+)
+CFORMATTER_MPI = logging.Formatter(
+    #    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
+    "%(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s"
+)
+
+
+class _AppFilter(logging.Filter):
+    """Add field `app_name` to log messages."""
+
+    def filter(self, record):
+        record.app_name = "DEEPMD"
+        return True
+
+
+class _MPIRankFilter(logging.Filter):
+    """Add MPI rank number to log messages, adds field `rank`."""
+
+    def __init__(self, rank: int) -> None:
+        super().__init__(name="MPI_rank_id")
+        self.mpi_rank = str(rank)
+
+    def filter(self, record):
+        record.rank = self.mpi_rank
+        return True
+
+
+class _MPIMasterFilter(logging.Filter):
+    """Filter that lets through only messages emited from rank==0."""
+
+    def __init__(self, rank: int) -> None:
+        super().__init__(name="MPI_master_log")
+        self.mpi_rank = rank
+
+    def filter(self, record):
+        if self.mpi_rank == 0:
+            return True
+        else:
+            return False
+
+
+class _MPIFileStream:
+    """Wrap MPI.File` so it has the same API as python file streams.
+
+    Parameters
+    ----------
+    filename : Path
+        disk location of the file stream
+    MPI : MPI
+        MPI communicator object
+    mode : str, optional
+        file write mode, by default _MPI_APPEND_MODE
+    """
+
+    def __init__(
+        self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE"
+    ) -> None:
+        self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode)
+        self.stream.Set_atomicity(True)
+        self.name = "MPIfilestream"
+
+    def write(self, msg: str):
+        """Write to MPI shared file stream.
+
+        Parameters
+        ----------
+        msg : str
+            message to write
+        """
+        b = bytearray()
+        b.extend(map(ord, msg))
+        self.stream.Write_shared(b)
+
+    def close(self):
+        """Synchronize and close MPI file stream."""
+        self.stream.Sync()
+        self.stream.Close()
+
+
+class _MPIHandler(logging.FileHandler):
+    """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to.
+
+    Parameters
+    ----------
+    filename : Path
+        file path
+    MPI : MPI
+        MPI communicator object
+    mode : str, optional
+        file access mode, by default "_MPI_APPEND_MODE"
+    """
+
+    def __init__(
+        self,
+        filename: "Path",
+        MPI: "MPI",
+        mode: str = "_MPI_APPEND_MODE",
+    ) -> None:
+        self.MPI = MPI
+        super().__init__(filename, mode=mode, encoding=None, delay=False)
+
+    def _open(self):
+        return _MPIFileStream(self.baseFilename, self.MPI, self.mode)
+
+    def setStream(self, stream):
+        """Stream canot be reasigned in MPI mode."""
+        raise NotImplementedError("Unable to do for MPI file handler!")
+
+
+def set_log_handles(
+    level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None
+):
+    """Set desired level for package loggers and add file handlers.
+
+    Parameters
+    ----------
+    level : int
+        logging level
+    log_path : Optional[str]
+        path to log file, if None logs will be send only to console. If the parent
+        directory does not exist it will be automatically created, by default None
+    mpi_log : Optional[str], optional
+        mpi log type. Has three options. `master` will output logs to file and console
+        only from rank==0. `collect` will write messages from all ranks to one file
+        opened under rank==0 and to console. `workers` will open one log file for each
+        worker designated by its rank, console behaviour is the same as for `collect`.
+        If this argument is specified, package 'mpi4py' must be already installed.
+        by default None
+
+    Raises
+    ------
+    RuntimeError
+        If the argument `mpi_log` is specified, package `mpi4py` is not installed.
+
+    References
+    ----------
+    https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U
+    https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error
+    https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu
+
+    Notes
+    -----
+    Logging levels:
+
+    +---------+--------------+----------------+----------------+----------------+
+    |         | our notation | python logging | tensorflow cpp | OpenMP         |
+    +=========+==============+================+================+================+
+    | debug   | 10           | 10             | 0              | 1/on/true/yes  |
+    +---------+--------------+----------------+----------------+----------------+
+    | info    | 20           | 20             | 1              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+    | warning | 30           | 30             | 2              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+    | error   | 40           | 40             | 3              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+
+    """
+    # silence logging for OpenMP when running on CPU if level is any other than debug
+    if level <= 10:
+        os.environ["KMP_WARNINGS"] = "FALSE"
+
+    # set TF cpp internal logging level
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1))
+
+    # get root logger
+    root_log = logging.getLogger("deepmd")
+    root_log.propagate = False
+
+    root_log.setLevel(level)
+
+    # check if arguments are present
+    MPI = None
+    if mpi_log:
+        try:
+            from mpi4py import (
+                MPI,
+            )
+        except ImportError as e:
+            raise RuntimeError(
+                "You cannot specify 'mpi_log' when mpi4py not installed"
+            ) from e
+
+    # * add console handler ************************************************************
+    ch = logging.StreamHandler()
+    if MPI:
+        rank = MPI.COMM_WORLD.Get_rank()
+        if mpi_log == "master":
+            ch.setFormatter(CFORMATTER)
+            ch.addFilter(_MPIMasterFilter(rank))
+        else:
+            ch.setFormatter(CFORMATTER_MPI)
+            ch.addFilter(_MPIRankFilter(rank))
+    else:
+        ch.setFormatter(CFORMATTER)
+
+    ch.setLevel(level)
+    ch.addFilter(_AppFilter())
+    # clean old handlers before adding new one
+    root_log.handlers.clear()
+    root_log.addHandler(ch)
+
+    # * add file handler ***************************************************************
+    if log_path:
+        # create directory
+        log_path.parent.mkdir(exist_ok=True, parents=True)
+
+        fh = None
+
+        if mpi_log == "master":
+            rank = MPI.COMM_WORLD.Get_rank()
+            if rank == 0:
+                fh = logging.FileHandler(log_path, mode="w")
+                fh.addFilter(_MPIMasterFilter(rank))
+                fh.setFormatter(FFORMATTER)
+        elif mpi_log == "collect":
+            rank = MPI.COMM_WORLD.Get_rank()
+            fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE)
+            fh.addFilter(_MPIRankFilter(rank))
+            fh.setFormatter(FFORMATTER_MPI)
+        elif mpi_log == "workers":
+            rank = MPI.COMM_WORLD.Get_rank()
+            # if file has suffix than inser rank number before suffix
+            # e.g deepmd.log -> deepmd_<rank>.log
+            # if no suffix is present, insert rank as suffix
+            # e.g. deepmdlog -> deepmdlog.<rank>
+            if log_path.suffix:
+                worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix(
+                    log_path.suffix
+                )
+            else:
+                worker_log = log_path.with_suffix(f".{rank}")
+
+            fh = logging.FileHandler(worker_log, mode="w")
+            fh.setFormatter(FFORMATTER)
+        else:
+            fh = logging.FileHandler(log_path, mode="w")
+            fh.setFormatter(FFORMATTER)
+
+        if fh:
+            fh.setLevel(level)
+            fh.addFilter(_AppFilter())
+            root_log.addHandler(fh)
diff --git a/deepmd_utils/utils/__init__.py b/deepmd_utils/utils/__init__.py
new file mode 100644
index 0000000000..bac6924ac1
--- /dev/null
+++ b/deepmd_utils/utils/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# For performance, do not add things to this file
+# import submodules instead
diff --git a/deepmd_utils/utils/argcheck.py b/deepmd_utils/utils/argcheck.py
new file mode 100644
index 0000000000..6c51a7b859
--- /dev/null
+++ b/deepmd_utils/utils/argcheck.py
@@ -0,0 +1,2028 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+)
+
+from dargs import (
+    Argument,
+    ArgumentEncoder,
+    Variant,
+    dargs,
+)
+
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+    PRECISION_DICT,
+)
+from deepmd_utils.utils.argcheck_nvnmd import (
+    nvnmd_args,
+)
+from deepmd_utils.utils.plugin import (
+    Plugin,
+)
+
+log = logging.getLogger(__name__)
+
+
+def list_to_doc(xx):
+    items = []
+    for ii in xx:
+        if len(items) == 0:
+            items.append(f'"{ii}"')
+        else:
+            items.append(f', "{ii}"')
+    items.append(".")
+    return "".join(items)
+
+
+def make_link(content, ref_key):
+    return (
+        f"`{content} <{ref_key}_>`_"
+        if not dargs.RAW_ANCHOR
+        else f"`{content} <#{ref_key}>`_"
+    )
+
+
+def type_embedding_args():
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_seed = "Random seed for parameter initialization"
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+
+    return [
+        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, default=None, doc=doc_seed),
+    ]
+
+
+def spin_args():
+    doc_use_spin = "Whether to use atomic spin model for each atom type"
+    doc_spin_norm = "The magnitude of atomic spin for each atom type with spin"
+    doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
+
+    return [
+        Argument("use_spin", List[bool], doc=doc_use_spin),
+        Argument("spin_norm", List[float], doc=doc_spin_norm),
+        Argument("virtual_len", List[float], doc=doc_virtual_len),
+    ]
+
+
+#  --- Descriptor configurations: --- #
+
+
+class ArgsPlugin:
+    def __init__(self) -> None:
+        self.__plugin = Plugin()
+
+    def register(
+        self, name: str, alias: Optional[List[str]] = None
+    ) -> Callable[[], List[Argument]]:
+        """Register a descriptor argument plugin.
+
+        Parameters
+        ----------
+        name : str
+            the name of a descriptor
+        alias : List[str], optional
+            the list of aliases of this descriptor
+
+        Returns
+        -------
+        Callable[[], List[Argument]]
+            the registered descriptor argument method
+
+        Examples
+        --------
+        >>> some_plugin = ArgsPlugin()
+        >>> @some_plugin.register("some_descrpt")
+            def descrpt_some_descrpt_args():
+                return []
+        """
+        # convert alias to hashed item
+        if isinstance(alias, list):
+            alias = tuple(alias)
+        return self.__plugin.register((name, alias))
+
+    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
+        """Get all arguments.
+
+        Parameters
+        ----------
+        exclude_hybrid : bool
+            exclude hybrid descriptor to prevent circular calls
+
+        Returns
+        -------
+        List[Argument]
+            all arguments
+        """
+        arguments = []
+        for (name, alias), metd in self.__plugin.plugins.items():
+            if exclude_hybrid and name == "hybrid":
+                continue
+            arguments.append(
+                Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias)
+            )
+        return arguments
+
+
+descrpt_args_plugin = ArgsPlugin()
+
+
+@descrpt_args_plugin.register("loc_frame")
+def descrpt_local_frame_args():
+    doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
+    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
+    doc_rcut = "The cut-off radius. The default value is 6.0"
+    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
+- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
+- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
+- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
+- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
+
+    return [
+        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
+        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
+def descrpt_se_a_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"])
+def descrpt_se_t_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"])
+def descrpt_se_a_tpe_args():
+    doc_type_nchanl = "number of channels for type embedding"
+    doc_type_nlayer = "number of hidden layers of type embedding net"
+    doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded."
+
+    return [
+        *descrpt_se_a_args(),
+        Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl),
+        Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
+def descrpt_se_r_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("hybrid")
+def descrpt_hybrid_args():
+    doc_list = "A list of descriptor definitions"
+
+    return [
+        Argument(
+            "list",
+            list,
+            optional=False,
+            doc=doc_list,
+            repeat=True,
+            sub_fields=[],
+            sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)],
+            fold_subdoc=True,
+        )
+    ]
+
+
+def descrpt_se_atten_common_args():
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_attn = "The length of hidden vectors in attention layers"
+    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True"
+    doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
+    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
+
+    return [
+        Argument(
+            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
+        ),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument("attn", int, optional=True, default=128, doc=doc_attn),
+        Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
+        Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
+        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
+    ]
+
+
+@descrpt_args_plugin.register("se_atten")
+def descrpt_se_atten_args():
+    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
+    doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+
+    return [
+        *descrpt_se_atten_common_args(),
+        Argument(
+            "stripped_type_embedding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_stripped_type_embedding,
+        ),
+        Argument(
+            "smooth_type_embdding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_smooth_type_embdding,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_atten_v2")
+def descrpt_se_atten_v2_args():
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+
+    return [
+        *descrpt_se_atten_common_args(),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"])
+def descrpt_se_a_ebd_v2_args():
+    return descrpt_se_a_args()
+
+
+@descrpt_args_plugin.register("se_a_mask")
+def descrpt_se_a_mask_args():
+    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
+    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
+    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
+    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
+    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
+    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
+    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
+    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
+    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
+    doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
+- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
+- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
+- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
+- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
+- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
+- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
+- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
+- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
+- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
+
+    return Variant(
+        "type",
+        descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid),
+        doc=doc_descrpt_type,
+    )
+
+
+#  --- Fitting net configurations: --- #
+fitting_args_plugin = ArgsPlugin()
+
+
+@fitting_args_plugin.register("ener")
+def fitting_ener():
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
+    doc_layer_name = (
+        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
+        "If two layers, either in the same fitting or different fittings, "
+        "have the same name, they will share the same neural network parameters. "
+        "The shape of these layers should be the same. "
+        "If null is given for a layer, parameters will not be shared."
+    )
+    doc_use_aparam_as_mask = (
+        "Whether to use the aparam as a mask in input."
+        "If True, the aparam will not be used in fitting net for embedding."
+        "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True."
+    )
+
+    return [
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
+        ),
+        Argument(
+            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "atom_ener",
+            List[Optional[float]],
+            optional=True,
+            default=[],
+            doc=doc_atom_ener,
+        ),
+        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
+        Argument(
+            "use_aparam_as_mask",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_aparam_as_mask,
+        ),
+    ]
+
+
+@fitting_args_plugin.register("dos")
+def fitting_dos():
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_numb_dos = (
+        "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)"
+    )
+
+    return [
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="float64", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
+        ),
+        Argument(
+            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos),
+    ]
+
+
+@fitting_args_plugin.register("polar")
+def fitting_polar():
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``"
+    # doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
+    doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix."
+    doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+
+    # YWolfeee: user can decide whether to use shift diag
+    doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true."
+
+    return [
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
+        Argument(
+            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
+        ),
+        # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
+        Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
+        Argument(
+            "sel_type",
+            [List[int], int, None],
+            optional=True,
+            alias=["pol_type"],
+            doc=doc_sel_type,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+# def fitting_global_polar():
+#    return fitting_polar()
+
+
+@fitting_args_plugin.register("dipole")
+def fitting_dipole():
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    return [
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument(
+            "sel_type",
+            [List[int], int, None],
+            optional=True,
+            alias=["dipole_type"],
+            doc=doc_sel_type,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+#   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
+def fitting_variant_type_args():
+    doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\
+- `ener`: Fit an energy model (potential energy surface).\n\n\
+- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\
+- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
+- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
+
+    return Variant(
+        "type",
+        fitting_args_plugin.get_all_argument(),
+        optional=True,
+        default_tag="ener",
+        doc=doc_descrpt_type,
+    )
+
+
+#  --- Modifier configurations: --- #
+def modifier_dipole_charge():
+    doc_model_name = "The name of the frozen dipole model file."
+    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
+    doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
+    doc_ewald_h = "The grid spacing of the FFT grid. Unit is A"
+    doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
+
+    return [
+        Argument("model_name", str, optional=False, doc=doc_model_name),
+        Argument(
+            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
+        ),
+        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
+        Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
+        Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
+    ]
+
+
+def modifier_variant_type_args():
+    doc_modifier_type = "The type of modifier. See explanation below.\n\n\
+-`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
+    return Variant(
+        "type",
+        [
+            Argument("dipole_charge", dict, modifier_dipole_charge()),
+        ],
+        optional=False,
+        doc=doc_modifier_type,
+    )
+
+
+#  --- model compression configurations: --- #
+def model_compression():
+    doc_model_file = "The input model file, which will be compressed by the DeePMD-kit."
+    doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
+    doc_min_nbor_dist = (
+        "The nearest distance between neighbor atoms saved in the frozen model."
+    )
+
+    return [
+        Argument("model_file", str, optional=False, doc=doc_model_file),
+        Argument("table_config", List[float], optional=False, doc=doc_table_config),
+        Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
+    ]
+
+
+#  --- model compression configurations: --- #
+def model_compression_type_args():
+    doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
+
+    return Variant(
+        "type",
+        [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])],
+        optional=True,
+        default_tag="se_e2_a",
+        doc=doc_compress_type,
+    )
+
+
+def model_args(exclude_hybrid=False):
+    doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
+    doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
+    doc_data_stat_protect = "Protect parameter for atomic energy regression."
+    doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
+    doc_type_embedding = "The type embedding."
+    doc_modifier = "The modifier of model output."
+    doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
+    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
+    doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided."
+    doc_compress_config = "Model compression configurations"
+    doc_spin = "The settings for systems with spin."
+    hybrid_models = []
+    if not exclude_hybrid:
+        hybrid_models.extend(
+            [
+                pairwise_dprc(),
+                linear_ener_model_args(),
+            ]
+        )
+    return Argument(
+        "model",
+        dict,
+        [
+            Argument("type_map", List[str], optional=True, doc=doc_type_map),
+            Argument(
+                "data_stat_nbatch",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_stat_nbatch,
+            ),
+            Argument(
+                "data_stat_protect",
+                float,
+                optional=True,
+                default=1e-2,
+                doc=doc_data_stat_protect,
+            ),
+            Argument(
+                "data_bias_nsample",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_bias_nsample,
+            ),
+            Argument("use_srtab", str, optional=True, doc=doc_use_srtab),
+            Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha),
+            Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin),
+            Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax),
+            Argument(
+                "srtab_add_bias",
+                bool,
+                optional=True,
+                default=True,
+                doc=doc_srtab_add_bias,
+            ),
+            Argument(
+                "type_embedding",
+                dict,
+                type_embedding_args(),
+                [],
+                optional=True,
+                doc=doc_type_embedding,
+            ),
+            Argument(
+                "modifier",
+                dict,
+                [],
+                [modifier_variant_type_args()],
+                optional=True,
+                doc=doc_modifier,
+            ),
+            Argument(
+                "compress",
+                dict,
+                [],
+                [model_compression_type_args()],
+                optional=True,
+                doc=doc_compress_config,
+                fold_subdoc=True,
+            ),
+            Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin),
+        ],
+        [
+            Variant(
+                "type",
+                [
+                    standard_model_args(),
+                    multi_model_args(),
+                    frozen_model_args(),
+                    pairtab_model_args(),
+                    *hybrid_models,
+                ],
+                optional=True,
+                default_tag="standard",
+            ),
+        ],
+    )
+
+
+def standard_model_args() -> Argument:
+    doc_descrpt = "The descriptor of atomic environment."
+    doc_fitting = "The fitting of physical properties."
+
+    ca = Argument(
+        "standard",
+        dict,
+        [
+            Argument(
+                "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt
+            ),
+            Argument(
+                "fitting_net",
+                dict,
+                [],
+                [fitting_variant_type_args()],
+                doc=doc_fitting,
+            ),
+        ],
+        doc="Stardard model, which contains a descriptor and a fitting.",
+    )
+    return ca
+
+
+def multi_model_args() -> Argument:
+    doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details."
+    doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`."
+
+    ca = Argument(
+        "multi",
+        dict,
+        [
+            Argument(
+                "descriptor",
+                dict,
+                [],
+                [descrpt_variant_type_args()],
+                doc=doc_descrpt,
+                fold_subdoc=True,
+            ),
+            Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict),
+        ],
+        doc="Multiple-task model.",
+    )
+    return ca
+
+
+def pairwise_dprc() -> Argument:
+    qm_model_args = model_args(exclude_hybrid=True)
+    qm_model_args.name = "qm_model"
+    qm_model_args.fold_subdoc = True
+    qmmm_model_args = model_args(exclude_hybrid=True)
+    qmmm_model_args.name = "qmmm_model"
+    qmmm_model_args.fold_subdoc = True
+    ca = Argument(
+        "pairwise_dprc",
+        dict,
+        [
+            qm_model_args,
+            qmmm_model_args,
+        ],
+    )
+    return ca
+
+
+def frozen_model_args() -> Argument:
+    doc_model_file = "Path to the frozen model file."
+    ca = Argument(
+        "frozen",
+        dict,
+        [
+            Argument("model_file", str, optional=False, doc=doc_model_file),
+        ],
+    )
+    return ca
+
+
+def pairtab_model_args() -> Argument:
+    doc_tab_file = "Path to the tabulation file."
+    doc_rcut = "The cut-off radius."
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    ca = Argument(
+        "pairtab",
+        dict,
+        [
+            Argument("tab_file", str, optional=False, doc=doc_tab_file),
+            Argument("rcut", float, optional=False, doc=doc_rcut),
+            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
+        ],
+        doc="Pairwise tabulation energy model.",
+    )
+    return ca
+
+
+def linear_ener_model_args() -> Argument:
+    doc_weights = (
+        "If the type is list of float, a list of weights for each model. "
+        'If "mean", the weights are set to be 1 / len(models). '
+        'If "sum", the weights are set to be 1.'
+    )
+    models_args = model_args(exclude_hybrid=True)
+    models_args.name = "models"
+    models_args.fold_subdoc = True
+    models_args.set_dtype(list)
+    models_args.set_repeat(True)
+    models_args.doc = "The sub-models."
+    ca = Argument(
+        "linear_ener",
+        dict,
+        [
+            models_args,
+            Argument(
+                "weights",
+                [list, str],
+                optional=False,
+                doc=doc_weights,
+            ),
+        ],
+    )
+    return ca
+
+
+#  --- Learning rate configurations: --- #
+def learning_rate_exp():
+    doc_start_lr = "The learning rate at the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "The learning rate is decaying every this number of training steps."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
+    ]
+    return args
+
+
+def learning_rate_variant_type_args():
+    doc_lr = "The type of the learning rate."
+
+    return Variant(
+        "type",
+        [Argument("exp", dict, learning_rate_exp())],
+        optional=True,
+        default_tag="exp",
+        doc=doc_lr,
+    )
+
+
+def learning_rate_args():
+    doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
+    doc_lr = "The definitio of learning rate"
+    return Argument(
+        "learning_rate",
+        dict,
+        [
+            Argument(
+                "scale_by_worker",
+                str,
+                optional=True,
+                default="linear",
+                doc=doc_scale_by_worker,
+            )
+        ],
+        [learning_rate_variant_type_args()],
+        optional=True,
+        doc=doc_lr,
+    )
+
+
+def learning_rate_dict_args():
+    doc_learning_rate_dict = (
+        "The dictionary of definitions of learning rates in multi-task mode. "
+        "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n"
+    )
+    ca = Argument(
+        "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict
+    )
+    return ca
+
+
+#  --- Loss configurations: --- #
+def start_pref(item, label=None, abbr=None):
+    if label is None:
+        label = item
+    if abbr is None:
+        abbr = item
+    return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored."
+
+
+def limit_pref(item):
+    return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity."
+
+
+loss_args_plugin = ArgsPlugin()
+
+
+@loss_args_plugin.register("ener")
+def loss_ener():
+    doc_start_pref_e = start_pref("energy", abbr="e")
+    doc_limit_pref_e = limit_pref("energy")
+    doc_start_pref_f = start_pref("force", abbr="f")
+    doc_limit_pref_f = limit_pref("force")
+    doc_start_pref_v = start_pref("virial", abbr="v")
+    doc_limit_pref_v = limit_pref("virial")
+    doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae")
+    doc_limit_pref_ae = limit_pref("atomic energy")
+    doc_start_pref_pf = start_pref(
+        "atomic prefactor force", label="atom_pref", abbr="pf"
+    )
+    doc_limit_pref_pf = limit_pref("atomic prefactor force")
+    doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf")
+    doc_limit_pref_gf = limit_pref("generalized force")
+    doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used."
+    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
+    doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
+    return [
+        Argument(
+            "start_pref_e",
+            [float, int],
+            optional=True,
+            default=0.02,
+            doc=doc_start_pref_e,
+        ),
+        Argument(
+            "limit_pref_e",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_e,
+        ),
+        Argument(
+            "start_pref_f",
+            [float, int],
+            optional=True,
+            default=1000,
+            doc=doc_start_pref_f,
+        ),
+        Argument(
+            "limit_pref_f",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_f,
+        ),
+        Argument(
+            "start_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_v,
+        ),
+        Argument(
+            "limit_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_v,
+        ),
+        Argument(
+            "start_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_ae,
+        ),
+        Argument(
+            "limit_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_ae,
+        ),
+        Argument(
+            "start_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_pf,
+        ),
+        Argument(
+            "limit_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_pf,
+        ),
+        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
+        Argument(
+            "enable_atom_ener_coeff",
+            [bool],
+            optional=True,
+            default=False,
+            doc=doc_enable_atom_ener_coeff,
+        ),
+        Argument(
+            "start_pref_gf",
+            float,
+            optional=True,
+            default=0.0,
+            doc=doc_start_pref_gf,
+        ),
+        Argument(
+            "limit_pref_gf",
+            float,
+            optional=True,
+            default=0.0,
+            doc=doc_limit_pref_gf,
+        ),
+        Argument(
+            "numb_generalized_coord",
+            int,
+            optional=True,
+            default=0,
+            doc=doc_numb_generalized_coord,
+        ),
+    ]
+
+
+@loss_args_plugin.register("ener_spin")
+def loss_ener_spin():
+    doc_start_pref_e = start_pref("energy")
+    doc_limit_pref_e = limit_pref("energy")
+    doc_start_pref_fr = start_pref("force_real_atom")
+    doc_limit_pref_fr = limit_pref("force_real_atom")
+    doc_start_pref_fm = start_pref("force_magnetic")
+    doc_limit_pref_fm = limit_pref("force_magnetic")
+    doc_start_pref_v = start_pref("virial")
+    doc_limit_pref_v = limit_pref("virial")
+    doc_start_pref_ae = start_pref("atom_ener")
+    doc_limit_pref_ae = limit_pref("atom_ener")
+    doc_start_pref_pf = start_pref("atom_pref")
+    doc_limit_pref_pf = limit_pref("atom_pref")
+    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
+    doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
+    return [
+        Argument(
+            "start_pref_e",
+            [float, int],
+            optional=True,
+            default=0.02,
+            doc=doc_start_pref_e,
+        ),
+        Argument(
+            "limit_pref_e",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_e,
+        ),
+        Argument(
+            "start_pref_fr",
+            [float, int],
+            optional=True,
+            default=1000,
+            doc=doc_start_pref_fr,
+        ),
+        Argument(
+            "limit_pref_fr",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_fr,
+        ),
+        Argument(
+            "start_pref_fm",
+            [float, int],
+            optional=True,
+            default=10000,
+            doc=doc_start_pref_fm,
+        ),
+        Argument(
+            "limit_pref_fm",
+            [float, int],
+            optional=True,
+            default=10.0,
+            doc=doc_limit_pref_fm,
+        ),
+        Argument(
+            "start_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_v,
+        ),
+        Argument(
+            "limit_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_v,
+        ),
+        Argument(
+            "start_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_ae,
+        ),
+        Argument(
+            "limit_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_ae,
+        ),
+        Argument(
+            "start_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_pf,
+        ),
+        Argument(
+            "limit_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_pf,
+        ),
+        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
+        Argument(
+            "enable_atom_ener_coeff",
+            [bool],
+            optional=True,
+            default=False,
+            doc=doc_enable_atom_ener_coeff,
+        ),
+    ]
+
+
+@loss_args_plugin.register("dos")
+def loss_dos():
+    doc_start_pref_dos = start_pref("Density of State (DOS)")
+    doc_limit_pref_dos = limit_pref("Density of State (DOS)")
+    doc_start_pref_cdf = start_pref(
+        "Cumulative Distribution Function (cumulative intergral of DOS)"
+    )
+    doc_limit_pref_cdf = limit_pref(
+        "Cumulative Distribution Function (cumulative intergral of DOS)"
+    )
+    doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)")
+    doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)")
+    doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS")
+    doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS")
+    return [
+        Argument(
+            "start_pref_dos",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_dos,
+        ),
+        Argument(
+            "limit_pref_dos",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_dos,
+        ),
+        Argument(
+            "start_pref_cdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_cdf,
+        ),
+        Argument(
+            "limit_pref_cdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_cdf,
+        ),
+        Argument(
+            "start_pref_ados",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_start_pref_ados,
+        ),
+        Argument(
+            "limit_pref_ados",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_ados,
+        ),
+        Argument(
+            "start_pref_acdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_acdf,
+        ),
+        Argument(
+            "limit_pref_acdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_acdf,
+        ),
+    ]
+
+
+# YWolfeee: Modified to support tensor type of loss args.
+@loss_args_plugin.register("tensor")
+def loss_tensor():
+    # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]."
+    # doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well."
+    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included."
+    doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0."
+    return [
+        Argument(
+            "pref", [float, int], optional=False, default=None, doc=doc_global_weight
+        ),
+        Argument(
+            "pref_atomic",
+            [float, int],
+            optional=False,
+            default=None,
+            doc=doc_local_weight,
+        ),
+    ]
+
+
+def loss_variant_type_args():
+    doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`."
+
+    return Variant(
+        "type",
+        loss_args_plugin.get_all_argument(),
+        optional=True,
+        default_tag="ener",
+        doc=doc_loss,
+    )
+
+
+def loss_args():
+    doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset."
+    ca = Argument(
+        "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss
+    )
+    return ca
+
+
+def loss_dict_args():
+    doc_loss_dict = (
+        "The dictionary of definitions of multiple loss functions in multi-task mode. "
+        "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n"
+    )
+    ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict)
+    return ca
+
+
+#  --- Training configurations: --- #
+def training_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/training_data/systems")
+    doc_systems = (
+        "The data systems for training. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\
+- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\
+If MPI is used, the value should be considered as the batch size per task.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
+        "specifying the probability of each system."
+    )
+
+    args = [
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [List[int], int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            List[float],
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
+    ]
+
+    doc_training_data = "Configurations of training data."
+    return Argument(
+        "training_data",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_training_data,
+    )
+
+
+def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/validation_data/systems")
+    doc_systems = (
+        "The data systems for validation. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
+        "specifying the probability of each system."
+    )
+    doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
+
+    args = [
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [List[int], int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            List[float],
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
+        Argument(
+            "numb_btch",
+            int,
+            optional=True,
+            default=1,
+            doc=doc_numb_btch,
+            alias=[
+                "numb_batch",
+            ],
+        ),
+    ]
+
+    doc_validation_data = (
+        "Configurations of validation data. Similar to that of training data, "
+        "except that a `numb_btch` argument may be configured"
+    )
+    return Argument(
+        "validation_data",
+        dict,
+        optional=True,
+        default=None,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_validation_data,
+    )
+
+
+def mixed_precision_args():  # ! added by Denghui.
+    doc_output_prec = 'The precision for mixed precision params. " \
+        "The trainable variables precision during the mixed precision training process, " \
+        "supported options are float32 only currently.'
+    doc_compute_prec = 'The precision for mixed precision compute. " \
+        "The compute precision during the mixed precision training process, "" \
+        "supported options are float16 and bfloat16 currently.'
+
+    args = [
+        Argument(
+            "output_prec", str, optional=True, default="float32", doc=doc_output_prec
+        ),
+        Argument(
+            "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec
+        ),
+    ]
+
+    doc_mixed_precision = "Configurations of mixed precision."
+    return Argument(
+        "mixed_precision",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_mixed_precision,
+    )
+
+
+def training_args():  # ! modified by Ziyao: data configuration isolated.
+    doc_numb_steps = "Number of training batch. Each training uses one batch of data."
+    doc_seed = "The random seed for getting frames from the training data set."
+    doc_disp_file = "The file for printing learning curve."
+    doc_disp_freq = "The frequency of printing learning curve."
+    doc_save_freq = "The frequency of saving check point."
+    doc_save_ckpt = "The path prefix of saving check point files."
+    doc_disp_training = "Displaying verbose information during training."
+    doc_time_training = "Timing durining training."
+    doc_profiling = "Profiling during training."
+    doc_profiling_file = "Output file for profiling."
+    doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`."
+    doc_tensorboard = "Enable tensorboard"
+    doc_tensorboard_log_dir = "The log directory of tensorboard outputs"
+    doc_tensorboard_freq = "The frequency of writing tensorboard events."
+    doc_data_dict = (
+        "The dictionary of multi DataSystems in multi-task mode. "
+        "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "contains training data and optional validation data definitions."
+    )
+    doc_fitting_weight = (
+        "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "is the training weight of fitting net `fitting_key`. "
+        "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. "
+        "Weights will be normalized and minus ones will be ignored. "
+        "If not set, each fitting net will be equally selected when training."
+    )
+
+    arg_training_data = training_data_args()
+    arg_validation_data = validation_data_args()
+    mixed_precision_data = mixed_precision_args()
+
+    args = [
+        arg_training_data,
+        arg_validation_data,
+        mixed_precision_data,
+        Argument(
+            "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file
+        ),
+        Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
+        Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
+        Argument(
+            "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt
+        ),
+        Argument(
+            "disp_training", bool, optional=True, default=True, doc=doc_disp_training
+        ),
+        Argument(
+            "time_training", bool, optional=True, default=True, doc=doc_time_training
+        ),
+        Argument("profiling", bool, optional=True, default=False, doc=doc_profiling),
+        Argument(
+            "profiling_file",
+            str,
+            optional=True,
+            default="timeline.json",
+            doc=doc_profiling_file,
+        ),
+        Argument(
+            "enable_profiler",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_enable_profiler,
+        ),
+        Argument(
+            "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard
+        ),
+        Argument(
+            "tensorboard_log_dir",
+            str,
+            optional=True,
+            default="log",
+            doc=doc_tensorboard_log_dir,
+        ),
+        Argument(
+            "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq
+        ),
+        Argument("data_dict", dict, optional=True, doc=doc_data_dict),
+        Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight),
+    ]
+
+    doc_training = "The training options."
+    return Argument("training", dict, args, [], doc=doc_training)
+
+
+def make_index(keys):
+    ret = []
+    for ii in keys:
+        ret.append(make_link(ii, ii))
+    return ", ".join(ret)
+
+
+def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
+    if make_link:
+        make_anchor = True
+    ptr = []
+    for ii in gen_args():
+        ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+
+    key_words = []
+    for ii in "\n\n".join(ptr).split("\n"):
+        if "argument path" in ii:
+            key_words.append(ii.split(":")[1].replace("`", "").strip())
+    # ptr.insert(0, make_index(key_words))
+
+    return "\n\n".join(ptr)
+
+
+def gen_json(**kwargs):
+    return json.dumps(
+        tuple(gen_args()),
+        cls=ArgumentEncoder,
+    )
+
+
+def gen_args(**kwargs) -> List[Argument]:
+    return [
+        model_args(),
+        learning_rate_args(),
+        learning_rate_dict_args(),
+        loss_args(),
+        loss_dict_args(),
+        training_args(),
+        nvnmd_args(),
+    ]
+
+
+def normalize_multi_task(data):
+    # single-task or multi-task mode
+    if data["model"].get("type", "standard") not in ("standard", "multi"):
+        return data
+    single_fitting_net = "fitting_net" in data["model"].keys()
+    single_training_data = "training_data" in data["training"].keys()
+    single_valid_data = "validation_data" in data["training"].keys()
+    single_loss = "loss" in data.keys()
+    single_learning_rate = "learning_rate" in data.keys()
+    multi_fitting_net = "fitting_net_dict" in data["model"].keys()
+    multi_training_data = "data_dict" in data["training"].keys()
+    multi_loss = "loss_dict" in data.keys()
+    multi_fitting_weight = "fitting_weight" in data["training"].keys()
+    multi_learning_rate = "learning_rate_dict" in data.keys()
+    assert (single_fitting_net == single_training_data) and (
+        multi_fitting_net == multi_training_data
+    ), (
+        "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! "
+        "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' "
+        "must be defined at the same time! Please check your input script. "
+    )
+    assert not (single_fitting_net and multi_fitting_net), (
+        "Single-task mode and multi-task mode can not be performed together. "
+        "Please check your input script and choose just one format! "
+    )
+    assert (
+        single_fitting_net or multi_fitting_net
+    ), "Please define your fitting net and training data! "
+    if multi_fitting_net:
+        assert not single_valid_data, (
+            "In multi-task mode, 'training/validation_data' should not appear "
+            "outside 'training/data_dict'! Please check your input script."
+        )
+        assert (
+            not single_loss
+        ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
+        assert (
+            "type_map" in data["model"]
+        ), "In multi-task mode, 'model/type_map' must be defined! "
+        data["model"]["type"] = "multi"
+        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(
+            data["model"]["fitting_net_dict"]
+        )
+        data["training"]["data_dict"] = normalize_data_dict(
+            data["training"]["data_dict"]
+        )
+        data["loss_dict"] = (
+            normalize_loss_dict(
+                data["model"]["fitting_net_dict"].keys(), data["loss_dict"]
+            )
+            if multi_loss
+            else {}
+        )
+        if multi_learning_rate:
+            data["learning_rate_dict"] = normalize_learning_rate_dict(
+                data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"]
+            )
+        elif single_learning_rate:
+            data[
+                "learning_rate_dict"
+            ] = normalize_learning_rate_dict_with_single_learning_rate(
+                data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
+            )
+        fitting_weight = (
+            data["training"]["fitting_weight"] if multi_fitting_weight else None
+        )
+        data["training"]["fitting_weight"] = normalize_fitting_weight(
+            data["model"]["fitting_net_dict"].keys(),
+            data["training"]["data_dict"].keys(),
+            fitting_weight=fitting_weight,
+        )
+    else:
+        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
+        assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
+    return data
+
+
+def normalize_fitting_net_dict(fitting_net_dict):
+    new_dict = {}
+    base = Argument("base", dict, [], [fitting_variant_type_args()], doc="")
+    for fitting_key_item in fitting_net_dict:
+        data = base.normalize_value(
+            fitting_net_dict[fitting_key_item], trim_pattern="_*"
+        )
+        base.check_value(data, strict=True)
+        new_dict[fitting_key_item] = data
+    return new_dict
+
+
+def normalize_data_dict(data_dict):
+    new_dict = {}
+    base = Argument(
+        "base", dict, [training_data_args(), validation_data_args()], [], doc=""
+    )
+    for data_system_key_item in data_dict:
+        data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[data_system_key_item] = data
+    return new_dict
+
+
+def normalize_loss_dict(fitting_keys, loss_dict):
+    # check the loss dict
+    failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
+    assert (
+        not failed_loss_keys
+    ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
+        str(failed_loss_keys), str(list(fitting_keys))
+    )
+    new_dict = {}
+    base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
+    for item in loss_dict:
+        data = base.normalize_value(loss_dict[item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[item] = data
+    return new_dict
+
+
+def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
+    # check the learning_rate dict
+    failed_learning_rate_keys = [
+        item for item in learning_rate_dict if item not in fitting_keys
+    ]
+    assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
+        str(failed_learning_rate_keys), str(list(fitting_keys))
+    )
+    new_dict = {}
+    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
+    for item in learning_rate_dict:
+        data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[item] = data
+    return new_dict
+
+
+def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate):
+    new_dict = {}
+    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
+    data = base.normalize_value(learning_rate, trim_pattern="_*")
+    base.check_value(data, strict=True)
+    for fitting_key in fitting_keys:
+        new_dict[fitting_key] = data
+    return new_dict
+
+
+def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
+    # check the mapping
+    failed_data_keys = [item for item in data_keys if item not in fitting_keys]
+    assert (
+        not failed_data_keys
+    ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
+        str(failed_data_keys), str(list(fitting_keys))
+    )
+    empty_fitting_keys = []
+    valid_fitting_keys = []
+    for item in fitting_keys:
+        if item not in data_keys:
+            empty_fitting_keys.append(item)
+        else:
+            valid_fitting_keys.append(item)
+    if empty_fitting_keys:
+        log.warning(
+            "Fitting net(s) {} have no data and will not be used in training.".format(
+                str(empty_fitting_keys)
+            )
+        )
+    num_pair = len(valid_fitting_keys)
+    assert num_pair > 0, "No valid training data systems for fitting nets!"
+
+    # check and normalize the fitting weight
+    new_weight = {}
+    if fitting_weight is None:
+        equal_weight = 1.0 / num_pair
+        for item in fitting_keys:
+            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0
+    else:
+        failed_weight_keys = [
+            item for item in fitting_weight if item not in fitting_keys
+        ]
+        assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
+            str(failed_weight_keys), str(list(fitting_keys))
+        )
+        sum_prob = 0.0
+        for item in fitting_keys:
+            if item in valid_fitting_keys:
+                if (
+                    item in fitting_weight
+                    and isinstance(fitting_weight[item], (int, float))
+                    and fitting_weight[item] > 0.0
+                ):
+                    sum_prob += fitting_weight[item]
+                    new_weight[item] = fitting_weight[item]
+                else:
+                    valid_fitting_keys.remove(item)
+                    log.warning(
+                        f"Fitting net '{item}' has zero or invalid weight "
+                        "and will not be used in training."
+                    )
+                    new_weight[item] = 0.0
+            else:
+                new_weight[item] = 0.0
+        assert sum_prob > 0.0, "No valid training weight for fitting nets!"
+        # normalize
+        for item in new_weight:
+            new_weight[item] /= sum_prob
+    return new_weight
+
+
+def normalize(data):
+    data = normalize_multi_task(data)
+
+    base = Argument("base", dict, gen_args())
+    data = base.normalize_value(data, trim_pattern="_*")
+    base.check_value(data, strict=True)
+
+    return data
+
+
+if __name__ == "__main__":
+    gen_doc()
diff --git a/deepmd_utils/utils/argcheck_nvnmd.py b/deepmd_utils/utils/argcheck_nvnmd.py
new file mode 100644
index 0000000000..2dc17ebc27
--- /dev/null
+++ b/deepmd_utils/utils/argcheck_nvnmd.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from dargs import (
+    Argument,
+)
+
+
+def nvnmd_args():
+    doc_version = (
+        "configuration the nvnmd version (0 | 1), 0 for 4 types, 1 for 32 types"
+    )
+    doc_max_nnei = "configuration the max number of neighbors, 128|256 for version 0, 128 for version 1"
+    doc_net_size_file = (
+        "configuration the number of nodes of fitting_net, just can be set as 128"
+    )
+    doc_map_file = "A file containing the mapping tables to replace the calculation of embedding nets"
+    doc_config_file = "A file containing the parameters about how to implement the model in certain hardware"
+    doc_weight_file = "a *.npy file containing the weights of the model"
+    doc_enable = "enable the nvnmd training"
+    doc_restore_descriptor = (
+        "enable to restore the parameter of embedding_net from weight.npy"
+    )
+    doc_restore_fitting_net = (
+        "enable to restore the parameter of fitting_net from weight.npy"
+    )
+    doc_quantize_descriptor = "enable the quantizatioin of descriptor"
+    doc_quantize_fitting_net = "enable the quantizatioin of fitting_net"
+    args = [
+        Argument("version", int, optional=False, default=0, doc=doc_version),
+        Argument("max_nnei", int, optional=False, default=128, doc=doc_max_nnei),
+        Argument("net_size", int, optional=False, default=128, doc=doc_net_size_file),
+        Argument("map_file", str, optional=False, default="none", doc=doc_map_file),
+        Argument(
+            "config_file", str, optional=False, default="none", doc=doc_config_file
+        ),
+        Argument(
+            "weight_file", str, optional=False, default="none", doc=doc_weight_file
+        ),
+        Argument("enable", bool, optional=False, default=False, doc=doc_enable),
+        Argument(
+            "restore_descriptor",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_restore_descriptor,
+        ),
+        Argument(
+            "restore_fitting_net",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_restore_fitting_net,
+        ),
+        Argument(
+            "quantize_descriptor",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_quantize_descriptor,
+        ),
+        Argument(
+            "quantize_fitting_net",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_quantize_fitting_net,
+        ),
+    ]
+
+    doc_nvnmd = "The nvnmd options."
+    return Argument("nvnmd", dict, args, [], optional=True, doc=doc_nvnmd)
diff --git a/deepmd_utils/utils/compat.py b/deepmd_utils/utils/compat.py
new file mode 100644
index 0000000000..5f9c14e6d8
--- /dev/null
+++ b/deepmd_utils/utils/compat.py
@@ -0,0 +1,392 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Module providing compatibility between `0.x.x` and `1.x.x` input versions."""
+
+import json
+import warnings
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+    Dict,
+    Optional,
+    Sequence,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.common import (
+    j_must_have,
+)
+
+
+def convert_input_v0_v1(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    """Convert input from v0 format to v1.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        loaded json/yaml file
+    warning : bool, optional
+        whether to show deprecation warning, by default True
+    dump : Optional[Union[str, Path]], optional
+        whether to dump converted file, by default None
+
+    Returns
+    -------
+    Dict[str, Any]
+        converted output
+    """
+    output = {}
+    output["model"] = _model(jdata, jdata["use_smooth"])
+    output["learning_rate"] = _learning_rate(jdata)
+    output["loss"] = _loss(jdata)
+    output["training"] = _training(jdata)
+    if warning:
+        _warning_input_v0_v1(dump)
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(output, fp, indent=4)
+    return output
+
+
+def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 0.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
+    if fname is not None:
+        msg += f", and output it to file {fname}"
+    warnings.warn(msg)
+
+
+def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
+    """Convert data to v1 input for non-smooth model.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+    smooth : bool
+        whether to use smooth or non-smooth descriptor version
+
+    Returns
+    -------
+    Dict[str, Dict[str, Any]]
+        dictionary with model input parameters and sub-dictionaries for descriptor and
+        fitting net
+    """
+    model = {}
+    model["descriptor"] = (
+        _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata)
+    )
+    model["fitting_net"] = _fitting_net(jdata)
+    return model
+
+
+def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for non-smooth descriptor.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with descriptor parameters
+    """
+    descriptor = {}
+    descriptor["type"] = "loc_frame"
+    _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule"))
+    return descriptor
+
+
+def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for smooth descriptor.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with descriptor parameters
+    """
+    descriptor = {}
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        descriptor["seed"] = seed
+    descriptor["type"] = "se_a"
+    descriptor["sel"] = jdata["sel_a"]
+    _jcopy(jdata, descriptor, ("rcut",))
+    descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
+    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
+    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
+    descriptor["resnet_dt"] = False
+    if "resnet_dt" in jdata:
+        descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
+
+    return descriptor
+
+
+def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for fitting net.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with fitting net parameters
+    """
+    fitting_net = {}
+
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        fitting_net["seed"] = seed
+    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
+    fitting_net["resnet_dt"] = True
+    if "resnet_dt" in jdata:
+        fitting_net["resnet_dt"] = jdata["resnet_dt"]
+    if "fitting_resnet_dt" in jdata:
+        fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"]
+    return fitting_net
+
+
+def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for learning rate section.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with learning rate parameters
+    """
+    learning_rate = {}
+    learning_rate["type"] = "exp"
+    _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr"))
+    return learning_rate
+
+
+def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for loss function.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with loss function parameters
+    """
+    loss: Dict[str, Any] = {}
+    _jcopy(
+        jdata,
+        loss,
+        (
+            "start_pref_e",
+            "limit_pref_e",
+            "start_pref_f",
+            "limit_pref_f",
+            "start_pref_v",
+            "limit_pref_v",
+        ),
+    )
+    if "start_pref_ae" in jdata:
+        loss["start_pref_ae"] = jdata["start_pref_ae"]
+    if "limit_pref_ae" in jdata:
+        loss["limit_pref_ae"] = jdata["limit_pref_ae"]
+    return loss
+
+
+def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for training.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with training parameters
+    """
+    training = {}
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        training["seed"] = seed
+
+    _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size"))
+    training["disp_file"] = "lcurve.out"
+    if "disp_file" in jdata:
+        training["disp_file"] = jdata["disp_file"]
+    training["disp_freq"] = j_must_have(jdata, "disp_freq")
+    training["numb_test"] = j_must_have(jdata, "numb_test")
+    training["save_freq"] = j_must_have(jdata, "save_freq")
+    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
+    training["disp_training"] = j_must_have(jdata, "disp_training")
+    training["time_training"] = j_must_have(jdata, "time_training")
+    if "profiling" in jdata:
+        training["profiling"] = jdata["profiling"]
+        if training["profiling"]:
+            training["profiling_file"] = j_must_have(jdata, "profiling_file")
+    return training
+
+
+def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
+    """Copy specified keys from one dict to another.
+
+    Parameters
+    ----------
+    src : Dict[str, Any]
+        source dictionary
+    dst : Dict[str, Any]
+        destination dictionary, will be modified in place
+    keys : Sequence[str]
+        list of keys to copy
+    """
+    for k in keys:
+        dst[k] = src[k]
+
+
+def remove_decay_rate(jdata: Dict[str, Any]):
+    """Convert decay_rate to stop_lr.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        input data
+    """
+    lr = jdata["learning_rate"]
+    if "decay_rate" in lr:
+        decay_rate = lr["decay_rate"]
+        start_lr = lr["start_lr"]
+        stop_step = jdata["training"]["stop_batch"]
+        decay_steps = lr["decay_steps"]
+        stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr
+        lr["stop_lr"] = stop_lr
+        lr.pop("decay_rate")
+
+
+def convert_input_v1_v2(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    tr_cfg = jdata["training"]
+    tr_data_keys = {
+        "systems",
+        "set_prefix",
+        "batch_size",
+        "sys_prob",
+        "auto_prob",
+        # alias included
+        "sys_weights",
+        "auto_prob_style",
+    }
+
+    tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys}
+    new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys}
+    new_tr_cfg["training_data"] = tr_data_cfg
+    if "training_data" in tr_cfg:
+        raise RuntimeError(
+            "Both v1 (training/systems) and v2 (training/training_data) parameters are given."
+        )
+
+    jdata["training"] = new_tr_cfg
+
+    # remove deprecated arguments
+    remove_decay_rate(jdata)
+
+    if warning:
+        _warning_input_v1_v2(dump)
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+
+    return jdata
+
+
+def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 1.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
+    if fname is not None:
+        msg += f", and output it to file {fname}"
+    warnings.warn(msg)
+
+
+def deprecate_numb_test(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
+
+    See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        loaded json/yaml file
+    warning : bool, optional
+        whether to show deprecation warning, by default True
+    dump : Optional[Union[str, Path]], optional
+        whether to dump converted file, by default None
+
+    Returns
+    -------
+    Dict[str, Any]
+        converted output
+    """
+    try:
+        jdata.get("training", {}).pop("numb_test")
+    except KeyError:
+        pass
+    else:
+        if warning:
+            warnings.warn(
+                "The argument training->numb_test has been deprecated since v2.0.0. "
+                "Use training->validation_data->batch_size instead."
+            )
+
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+    return jdata
+
+
+def update_deepmd_input(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    def is_deepmd_v0_input(jdata):
+        return "model" not in jdata.keys()
+
+    def is_deepmd_v1_input(jdata):
+        return "systems" in j_must_have(jdata, "training").keys()
+
+    if is_deepmd_v0_input(jdata):
+        jdata = convert_input_v0_v1(jdata, warning, None)
+        jdata = convert_input_v1_v2(jdata, False, None)
+        jdata = deprecate_numb_test(jdata, False, dump)
+    elif is_deepmd_v1_input(jdata):
+        jdata = convert_input_v1_v2(jdata, warning, None)
+        jdata = deprecate_numb_test(jdata, False, dump)
+    else:
+        jdata = deprecate_numb_test(jdata, warning, dump)
+
+    return jdata
diff --git a/deepmd_utils/utils/data.py b/deepmd_utils/utils/data.py
new file mode 100644
index 0000000000..2689257e16
--- /dev/null
+++ b/deepmd_utils/utils/data.py
@@ -0,0 +1,614 @@
+#!/usr/bin/env python3
+
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd_utils.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd_utils.utils import random as dp_random
+from deepmd_utils.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DeepmdData:
+    """Class for a data system.
+
+    It loads data from hard disk, and mantains the data as a `data_dict`
+
+    Parameters
+    ----------
+    sys_path
+            Path to the data system
+    set_prefix
+            Prefix for the directories of different sets
+    shuffle_test
+            If the test data are shuffled
+    type_map
+            Gives the name of different atom types
+    optional_type_map
+            If the type_map.raw in each system is optional
+    modifier
+            Data modifier that has the method `modify_data`
+    trn_all_set
+            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
+    sort_atoms : bool
+            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            descriptors except mixed types.
+    """
+
+    def __init__(
+        self,
+        sys_path: str,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: Optional[List[str]] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set: bool = False,
+        sort_atoms: bool = True,
+    ):
+        """Constructor."""
+        root = DPPath(sys_path)
+        self.dirs = root.glob(set_prefix + ".*")
+        if not len(self.dirs):
+            raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}")
+        self.dirs.sort()
+        # check mix_type format
+        error_format_msg = (
+            "if one of the set is of mixed_type format, "
+            "then all of the sets in this system should be of mixed_type format!"
+        )
+        self.mixed_type = self._check_mode(self.dirs[0])
+        for set_item in self.dirs[1:]:
+            assert self._check_mode(set_item) == self.mixed_type, error_format_msg
+        # load atom type
+        self.atom_type = self._load_type(root)
+        self.natoms = len(self.atom_type)
+        # load atom type map
+        self.type_map = self._load_type_map(root)
+        assert (
+            optional_type_map or self.type_map is not None
+        ), f"System {sys_path} must have type_map.raw in this mode! "
+        if self.type_map is not None:
+            assert len(self.type_map) >= max(self.atom_type) + 1
+        # check pbc
+        self.pbc = self._check_pbc(root)
+        # enforce type_map if necessary
+        self.enforce_type_map = False
+        if type_map is not None and self.type_map is not None and len(type_map):
+            if not self.mixed_type:
+                atom_type_ = [
+                    type_map.index(self.type_map[ii]) for ii in self.atom_type
+                ]
+                self.atom_type = np.array(atom_type_, dtype=np.int32)
+            else:
+                self.enforce_type_map = True
+                sorter = np.argsort(type_map)
+                self.type_idx_map = np.array(
+                    sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
+                )
+                # padding for virtual atom
+                self.type_idx_map = np.append(
+                    self.type_idx_map, np.array([-1], dtype=np.int32)
+                )
+            self.type_map = type_map
+        if type_map is None and self.type_map is None and self.mixed_type:
+            raise RuntimeError("mixed_type format must have type_map!")
+        # make idx map
+        self.sort_atoms = sort_atoms
+        self.idx_map = self._make_idx_map(self.atom_type)
+        # train dirs
+        self.test_dir = self.dirs[-1]
+        if trn_all_set:
+            self.train_dirs = self.dirs
+        else:
+            if len(self.dirs) == 1:
+                self.train_dirs = self.dirs
+            else:
+                self.train_dirs = self.dirs[:-1]
+        self.data_dict = {}
+        # add box and coord
+        self.add("box", 9, must=self.pbc)
+        self.add("coord", 3, atomic=True, must=True)
+        # the training times of each frame
+        self.add("numb_copy", 1, must=False, default=1, dtype=int)
+        # set counters
+        self.set_count = 0
+        self.iterator = 0
+        self.shuffle_test = shuffle_test
+        # set modifier
+        self.modifier = modifier
+
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+    ):
+        """Add a data item that to be loaded.
+
+        Parameters
+        ----------
+        key
+            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+        ndof
+            The number of dof
+        atomic
+            The item is an atomic property.
+            If False, the size of the data should be nframes x ndof
+            If True, the size of data should be nframes x natoms x ndof
+        must
+            The data file `sys_path/set.*/key.npy` must exist.
+            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+        high_prec
+            Load the data and store in float64, otherwise in float32
+        type_sel
+            Select certain type of atoms
+        repeat
+            The data will be repeated `repeat` times.
+        default : float, default=0.
+            default value of data
+        dtype : np.dtype, optional
+            the dtype of data, overwrites `high_prec` if provided
+        """
+        self.data_dict[key] = {
+            "ndof": ndof,
+            "atomic": atomic,
+            "must": must,
+            "high_prec": high_prec,
+            "type_sel": type_sel,
+            "repeat": repeat,
+            "reduce": None,
+            "default": default,
+            "dtype": dtype,
+        }
+        return self
+
+    def reduce(self, key_out: str, key_in: str):
+        """Generate a new item from the reduction of another atom.
+
+        Parameters
+        ----------
+        key_out
+            The name of the reduced item
+        key_in
+            The name of the data item to be reduced
+        """
+        assert key_in in self.data_dict, "cannot find input key"
+        assert self.data_dict[key_in]["atomic"], "reduced property should be atomic"
+        assert key_out not in self.data_dict, "output key should not have been added"
+        assert (
+            self.data_dict[key_in]["repeat"] == 1
+        ), "reduced proerties should not have been repeated"
+
+        self.data_dict[key_out] = {
+            "ndof": self.data_dict[key_in]["ndof"],
+            "atomic": False,
+            "must": True,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "reduce": key_in,
+        }
+        return self
+
+    def get_data_dict(self) -> dict:
+        """Get the `data_dict`."""
+        return self.data_dict
+
+    def check_batch_size(self, batch_size):
+        """Check if the system can get a batch of data with `batch_size` frames."""
+        for ii in self.train_dirs:
+            if self.data_dict["coord"]["high_prec"]:
+                tmpe = (
+                    (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+                )
+            else:
+                tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
+            if tmpe.ndim == 1:
+                tmpe = tmpe.reshape([1, -1])
+            if tmpe.shape[0] < batch_size:
+                return ii, tmpe.shape[0]
+        return None
+
+    def check_test_size(self, test_size):
+        """Check if the system can get a test dataset with `test_size` frames."""
+        if self.data_dict["coord"]["high_prec"]:
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_ENER_FLOAT_PRECISION)
+            )
+        else:
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_NP_FLOAT_PRECISION)
+            )
+        if tmpe.ndim == 1:
+            tmpe = tmpe.reshape([1, -1])
+        if tmpe.shape[0] < test_size:
+            return self.test_dir, tmpe.shape[0]
+        else:
+            return None
+
+    def get_batch(self, batch_size: int) -> dict:
+        """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
+
+        Parameters
+        ----------
+        batch_size
+            size of the batch
+        """
+        if hasattr(self, "batch_set"):
+            set_size = self.batch_set["coord"].shape[0]
+        else:
+            set_size = 0
+        if self.iterator + batch_size > set_size:
+            self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
+            self.set_count += 1
+            set_size = self.batch_set["coord"].shape[0]
+        iterator_1 = self.iterator + batch_size
+        if iterator_1 >= set_size:
+            iterator_1 = set_size
+        idx = np.arange(self.iterator, iterator_1)
+        self.iterator += batch_size
+        ret = self._get_subdata(self.batch_set, idx)
+        return ret
+
+    def get_test(self, ntests: int = -1) -> dict:
+        """Get the test data with `ntests` frames.
+
+        Parameters
+        ----------
+        ntests
+            Size of the test data set. If `ntests` is -1, all test data will be get.
+        """
+        if not hasattr(self, "test_set"):
+            self._load_test_set(self.test_dir, self.shuffle_test)
+        if ntests == -1:
+            idx = None
+        else:
+            ntests_ = (
+                ntests
+                if ntests < self.test_set["type"].shape[0]
+                else self.test_set["type"].shape[0]
+            )
+            # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
+            idx = np.arange(ntests_)
+        ret = self._get_subdata(self.test_set, idx=idx)
+        if self.modifier is not None:
+            self.modifier.modify_data(ret, self)
+        return ret
+
+    def get_ntypes(self) -> int:
+        """Number of atom types in the system."""
+        if self.type_map is not None:
+            return len(self.type_map)
+        else:
+            return max(self.get_atom_type()) + 1
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_atom_type(self) -> List[int]:
+        """Get atom types."""
+        return self.atom_type
+
+    def get_numb_set(self) -> int:
+        """Get number of training sets."""
+        return len(self.train_dirs)
+
+    def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
+        """Get the number of batches in a set."""
+        data = self._load_set(self.train_dirs[set_idx])
+        ret = data["coord"].shape[0] // batch_size
+        if ret == 0:
+            ret = 1
+        return ret
+
+    def get_sys_numb_batch(self, batch_size: int) -> int:
+        """Get the number of batches in the data system."""
+        ret = 0
+        for ii in range(len(self.train_dirs)):
+            ret += self.get_numb_batch(batch_size, ii)
+        return ret
+
+    def get_natoms(self):
+        """Get number of atoms."""
+        return len(self.atom_type)
+
+    def get_natoms_vec(self, ntypes: int):
+        """Get number of atoms and number of atoms in different types.
+
+        Parameters
+        ----------
+        ntypes
+            Number of types (may be larger than the actual number of types in the system).
+
+        Returns
+        -------
+        natoms
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+        """
+        natoms, natoms_vec = self._get_natoms_2(ntypes)
+        tmp = [natoms, natoms]
+        tmp = np.append(tmp, natoms_vec)
+        return tmp.astype(np.int32)
+
+    def avg(self, key):
+        """Return the average value of an item."""
+        if key not in self.data_dict.keys():
+            raise RuntimeError("key %s has not been added" % key)
+        info = self.data_dict[key]
+        ndof = info["ndof"]
+        eners = []
+        for ii in self.train_dirs:
+            data = self._load_set(ii)
+            ei = data[key].reshape([-1, ndof])
+            eners.append(ei)
+        eners = np.concatenate(eners, axis=0)
+        if eners.size == 0:
+            return 0
+        else:
+            return np.average(eners, axis=0)
+
+    def _idx_map_sel(self, atom_type, type_sel):
+        new_types = []
+        for ii in atom_type:
+            if ii in type_sel:
+                new_types.append(ii)
+        new_types = np.array(new_types, dtype=int)
+        natoms = new_types.shape[0]
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, new_types))
+        return idx_map
+
+    def _get_natoms_2(self, ntypes):
+        sample_type = self.atom_type
+        natoms = len(sample_type)
+        natoms_vec = np.zeros(ntypes).astype(int)
+        for ii in range(ntypes):
+            natoms_vec[ii] = np.count_nonzero(sample_type == ii)
+        return natoms, natoms_vec
+
+    def _get_subdata(self, data, idx=None):
+        new_data = {}
+        for ii in data:
+            dd = data[ii]
+            if "find_" in ii:
+                new_data[ii] = dd
+            else:
+                if idx is not None:
+                    new_data[ii] = dd[idx]
+                else:
+                    new_data[ii] = dd
+        return new_data
+
+    def _load_batch_set(self, set_name: DPPath):
+        if not hasattr(self, "batch_set") or self.get_numb_set() > 1:
+            self.batch_set = self._load_set(set_name)
+            if self.modifier is not None:
+                self.modifier.modify_data(self.batch_set, self)
+        self.batch_set, _ = self._shuffle_data(self.batch_set)
+        self.reset_get_batch()
+
+    def reset_get_batch(self):
+        self.iterator = 0
+
+    def _load_test_set(self, set_name: DPPath, shuffle_test):
+        self.test_set = self._load_set(set_name)
+        if shuffle_test:
+            self.test_set, _ = self._shuffle_data(self.test_set)
+
+    def _shuffle_data(self, data):
+        ret = {}
+        nframes = data["coord"].shape[0]
+        idx = np.arange(nframes)
+        # the training times of each frame
+        idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,)))
+        dp_random.shuffle(idx)
+        for kk in data:
+            if (
+                type(data[kk]) == np.ndarray
+                and len(data[kk].shape) == 2
+                and data[kk].shape[0] == nframes
+                and "find_" not in kk
+            ):
+                ret[kk] = data[kk][idx]
+            else:
+                ret[kk] = data[kk]
+        return ret, idx
+
+    def _load_set(self, set_name: DPPath):
+        # get nframes
+        if not isinstance(set_name, DPPath):
+            set_name = DPPath(set_name)
+        path = set_name / "coord.npy"
+        if self.data_dict["coord"]["high_prec"]:
+            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        else:
+            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
+        if coord.ndim == 1:
+            coord = coord.reshape([1, -1])
+        nframes = coord.shape[0]
+        assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms
+        # load keys
+        data = {}
+        for kk in self.data_dict.keys():
+            if self.data_dict[kk]["reduce"] is None:
+                data["find_" + kk], data[kk] = self._load_data(
+                    set_name,
+                    kk,
+                    nframes,
+                    self.data_dict[kk]["ndof"],
+                    atomic=self.data_dict[kk]["atomic"],
+                    high_prec=self.data_dict[kk]["high_prec"],
+                    must=self.data_dict[kk]["must"],
+                    type_sel=self.data_dict[kk]["type_sel"],
+                    repeat=self.data_dict[kk]["repeat"],
+                    default=self.data_dict[kk]["default"],
+                    dtype=self.data_dict[kk]["dtype"],
+                )
+        for kk in self.data_dict.keys():
+            if self.data_dict[kk]["reduce"] is not None:
+                k_in = self.data_dict[kk]["reduce"]
+                ndof = self.data_dict[kk]["ndof"]
+                data["find_" + kk] = data["find_" + k_in]
+                tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION)
+                data[kk] = np.sum(
+                    np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1
+                )
+
+        if self.mixed_type:
+            # nframes x natoms
+            atom_type_mix = self._load_type_mix(set_name)
+            if self.enforce_type_map:
+                try:
+                    atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32)
+                except IndexError as e:
+                    raise IndexError(
+                        "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
+                            set_name, self.get_ntypes()
+                        )
+                    ) from e
+                atom_type_mix = atom_type_mix_
+            real_type = atom_type_mix.reshape([nframes, self.natoms])
+            data["type"] = real_type
+            natoms = data["type"].shape[1]
+            # nframes x ntypes
+            atom_type_nums = np.array(
+                [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
+                dtype=np.int32,
+            ).T
+            ghost_nums = np.array(
+                [(real_type == -1).sum(axis=-1)],
+                dtype=np.int32,
+            ).T
+            assert (
+                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
+            ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
+                set_name, self.get_ntypes()
+            )
+            data["real_natoms_vec"] = np.concatenate(
+                (
+                    np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
+                    atom_type_nums,
+                ),
+                axis=-1,
+            )
+        else:
+            data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
+
+        return data
+
+    def _load_data(
+        self,
+        set_name,
+        key,
+        nframes,
+        ndof_,
+        atomic=False,
+        must=True,
+        repeat=1,
+        high_prec=False,
+        type_sel=None,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+    ):
+        if atomic:
+            natoms = self.natoms
+            idx_map = self.idx_map
+            # if type_sel, then revise natoms and idx_map
+            if type_sel is not None:
+                natoms = 0
+                for jj in type_sel:
+                    natoms += np.sum(self.atom_type == jj)
+                idx_map = self._idx_map_sel(self.atom_type, type_sel)
+            ndof = ndof_ * natoms
+        else:
+            ndof = ndof_
+        if dtype is not None:
+            pass
+        elif high_prec:
+            dtype = GLOBAL_ENER_FLOAT_PRECISION
+        else:
+            dtype = GLOBAL_NP_FLOAT_PRECISION
+        path = set_name / (key + ".npy")
+        if path.is_file():
+            data = path.load_numpy().astype(dtype)
+            try:  # YWolfeee: deal with data shape error
+                if atomic:
+                    data = data.reshape([nframes, natoms, -1])
+                    data = data[:, idx_map, :]
+                    data = data.reshape([nframes, -1])
+                data = np.reshape(data, [nframes, ndof])
+            except ValueError as err_message:
+                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
+                log.error(str(err_message))
+                log.error(explanation)
+                raise ValueError(str(err_message) + ". " + explanation)
+            if repeat != 1:
+                data = np.repeat(data, repeat).reshape([nframes, -1])
+            return np.float32(1.0), data
+        elif must:
+            raise RuntimeError("%s not found!" % path)
+        else:
+            data = np.full([nframes, ndof], default, dtype=dtype)
+            if repeat != 1:
+                data = np.repeat(data, repeat).reshape([nframes, -1])
+            return np.float32(0.0), data
+
+    def _load_type(self, sys_path: DPPath):
+        atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32)
+        return atom_type
+
+    def _load_type_mix(self, set_name: DPPath):
+        type_path = set_name / "real_atom_types.npy"
+        real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms])
+        return real_type
+
+    def _make_idx_map(self, atom_type):
+        natoms = atom_type.shape[0]
+        idx = np.arange(natoms)
+        if self.sort_atoms:
+            idx_map = np.lexsort((idx, atom_type))
+        else:
+            idx_map = idx
+        return idx_map
+
+    def _load_type_map(self, sys_path: DPPath):
+        fname = sys_path / "type_map.raw"
+        if fname.is_file():
+            return fname.load_txt(dtype=str, ndmin=1).tolist()
+        else:
+            return None
+
+    def _check_pbc(self, sys_path: DPPath):
+        pbc = True
+        if (sys_path / "nopbc").is_file():
+            pbc = False
+        return pbc
+
+    def _check_mode(self, set_path: DPPath):
+        return (set_path / "real_atom_types.npy").is_file()
diff --git a/deepmd_utils/utils/data_system.py b/deepmd_utils/utils/data_system.py
new file mode 100644
index 0000000000..f83f587590
--- /dev/null
+++ b/deepmd_utils/utils/data_system.py
@@ -0,0 +1,654 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import collections
+import logging
+import warnings
+from functools import (
+    lru_cache,
+)
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+
+import deepmd_utils.utils.random as dp_random
+from deepmd_utils.common import (
+    make_default_mesh,
+)
+from deepmd_utils.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd_utils.utils.data import (
+    DeepmdData,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DeepmdDataSystem:
+    """Class for manipulating many data systems.
+
+    It is implemented with the help of DeepmdData
+    """
+
+    def __init__(
+        self,
+        systems: List[str],
+        batch_size: int,
+        test_size: int,
+        rcut: Optional[float] = None,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: Optional[List[str]] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set=False,
+        sys_probs=None,
+        auto_prob_style="prob_sys_size",
+        sort_atoms: bool = True,
+    ):
+        """Constructor.
+
+        Parameters
+        ----------
+        systems
+            Specifying the paths to systems
+        batch_size
+            The batch size
+        test_size
+            The size of test data
+        rcut
+            The cut-off radius. Not used.
+        set_prefix
+            Prefix for the directories of different sets
+        shuffle_test
+            If the test data are shuffled
+        type_map
+            Gives the name of different atom types
+        optional_type_map
+            If the type_map.raw in each system is optional
+        modifier
+            Data modifier that has the method `modify_data`
+        trn_all_set
+            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
+        sys_probs : list of float
+            The probabilitis of systems to get the batch.
+            Summation of positive elements of this list should be no greater than 1.
+            Element of this list can be negative, the probability of the corresponding system is determined
+                automatically by the number of batches in the system.
+        auto_prob_style : str
+            Determine the probability of systems automatically. The method is assigned by this key and can be
+            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
+            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
+            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." :
+                                the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`,
+                                where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system,
+                                the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional
+                to the number of batches in the system.
+        sort_atoms : bool
+            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            descriptors except mixed types.
+        """
+        # init data
+        del rcut
+        self.system_dirs = systems
+        self.nsystems = len(self.system_dirs)
+        self.data_systems = []
+        for ii in self.system_dirs:
+            self.data_systems.append(
+                DeepmdData(
+                    ii,
+                    set_prefix=set_prefix,
+                    shuffle_test=shuffle_test,
+                    type_map=type_map,
+                    optional_type_map=optional_type_map,
+                    modifier=modifier,
+                    trn_all_set=trn_all_set,
+                    sort_atoms=sort_atoms,
+                )
+            )
+        # check mix_type format
+        error_format_msg = (
+            "if one of the system is of mixed_type format, "
+            "then all of the systems should be of mixed_type format!"
+        )
+        if self.data_systems[0].mixed_type:
+            for data_sys in self.data_systems[1:]:
+                assert data_sys.mixed_type, error_format_msg
+            self.mixed_type = True
+        else:
+            for data_sys in self.data_systems[1:]:
+                assert not data_sys.mixed_type, error_format_msg
+            self.mixed_type = False
+        # batch size
+        self.batch_size = batch_size
+        is_auto_bs = False
+        self.mixed_systems = False
+        if isinstance(self.batch_size, int):
+            self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
+        elif isinstance(self.batch_size, str):
+            words = self.batch_size.split(":")
+            if "auto" == words[0]:
+                is_auto_bs = True
+                rule = 32
+                if len(words) == 2:
+                    rule = int(words[1])
+                self.batch_size = self._make_auto_bs(rule)
+            elif "mixed" == words[0]:
+                self.mixed_type = True
+                self.mixed_systems = True
+                if len(words) == 2:
+                    rule = int(words[1])
+                else:
+                    raise RuntimeError("batch size must be specified for mixed systems")
+                self.batch_size = rule * np.ones(self.nsystems, dtype=int)
+            else:
+                raise RuntimeError("unknown batch_size rule " + words[0])
+        elif isinstance(self.batch_size, list):
+            pass
+        else:
+            raise RuntimeError("invalid batch_size")
+        assert isinstance(self.batch_size, (list, np.ndarray))
+        assert len(self.batch_size) == self.nsystems
+
+        # natoms, nbatches
+        ntypes = []
+        for ii in self.data_systems:
+            ntypes.append(ii.get_ntypes())
+        self.sys_ntypes = max(ntypes)
+        self.natoms = []
+        self.natoms_vec = []
+        self.nbatches = []
+        type_map_list = []
+        for ii in range(self.nsystems):
+            self.natoms.append(self.data_systems[ii].get_natoms())
+            self.natoms_vec.append(
+                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
+            )
+            self.nbatches.append(
+                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
+            )
+            type_map_list.append(self.data_systems[ii].get_type_map())
+        self.type_map = self._check_type_map_consistency(type_map_list)
+
+        # ! altered by Marián Rynik
+        # test size
+        # now test size can be set as a percentage of systems data or test size
+        # can be set for each system individualy in the same manner as batch
+        # size. This enables one to use systems with diverse number of
+        # structures and different number of atoms.
+        self.test_size = test_size
+        if isinstance(self.test_size, int):
+            self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
+        elif isinstance(self.test_size, str):
+            words = self.test_size.split("%")
+            try:
+                percent = int(words[0])
+            except ValueError:
+                raise RuntimeError("unknown test_size rule " + words[0])
+            self.test_size = self._make_auto_ts(percent)
+        elif isinstance(self.test_size, list):
+            pass
+        else:
+            raise RuntimeError("invalid test_size")
+        assert isinstance(self.test_size, (list, np.ndarray))
+        assert len(self.test_size) == self.nsystems
+
+        # init pick idx
+        self.pick_idx = 0
+
+        # derive system probabilities
+        self.sys_probs = None
+        self.set_sys_probs(sys_probs, auto_prob_style)
+
+        # check batch and test size
+        for ii in range(self.nsystems):
+            chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
+            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
+                warnings.warn(
+                    "system %s required batch size is larger than the size of the dataset %s (%d > %d)"
+                    % (
+                        self.system_dirs[ii],
+                        chk_ret[0],
+                        self.batch_size[ii],
+                        chk_ret[1],
+                    )
+                )
+            chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
+            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
+                warnings.warn(
+                    "system %s required test size is larger than the size of the dataset %s (%d > %d)"
+                    % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])
+                )
+
+    def _load_test(self, ntests=-1):
+        self.test_data = collections.defaultdict(list)
+        for ii in range(self.nsystems):
+            test_system_data = self.data_systems[ii].get_test(ntests=ntests)
+            for nn in test_system_data:
+                self.test_data[nn].append(test_system_data[nn])
+
+    @property
+    @lru_cache(maxsize=None)
+    def default_mesh(self) -> List[np.ndarray]:
+        """Mesh for each system."""
+        return [
+            make_default_mesh(
+                self.data_systems[ii].pbc, self.data_systems[ii].mixed_type
+            )
+            for ii in range(self.nsystems)
+        ]
+
+    def compute_energy_shift(self, rcond=None, key="energy"):
+        sys_ener = []
+        for ss in self.data_systems:
+            sys_ener.append(ss.avg(key))
+        sys_ener = np.concatenate(sys_ener)
+        sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
+        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
+        sys_tynatom = sys_tynatom[:, 2:]
+        energy_shift, resd, rank, s_value = np.linalg.lstsq(
+            sys_tynatom, sys_ener, rcond=rcond
+        )
+        return energy_shift
+
+    def add_dict(self, adict: dict) -> None:
+        """Add items to the data system by a `dict`.
+        `adict` should have items like
+        .. code-block:: python.
+
+           adict[key] = {
+               "ndof": ndof,
+               "atomic": atomic,
+               "must": must,
+               "high_prec": high_prec,
+               "type_sel": type_sel,
+               "repeat": repeat,
+           }
+
+        For the explaination of the keys see `add`
+        """
+        for kk in adict:
+            self.add(
+                kk,
+                adict[kk]["ndof"],
+                atomic=adict[kk]["atomic"],
+                must=adict[kk]["must"],
+                high_prec=adict[kk]["high_prec"],
+                type_sel=adict[kk]["type_sel"],
+                repeat=adict[kk]["repeat"],
+                default=adict[kk]["default"],
+            )
+
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+    ):
+        """Add a data item that to be loaded.
+
+        Parameters
+        ----------
+        key
+            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+        ndof
+            The number of dof
+        atomic
+            The item is an atomic property.
+            If False, the size of the data should be nframes x ndof
+            If True, the size of data should be nframes x natoms x ndof
+        must
+            The data file `sys_path/set.*/key.npy` must exist.
+            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+        high_prec
+            Load the data and store in float64, otherwise in float32
+        type_sel
+            Select certain type of atoms
+        repeat
+            The data will be repeated `repeat` times.
+        default, default=0.
+            Default value of data
+        """
+        for ii in self.data_systems:
+            ii.add(
+                key,
+                ndof,
+                atomic=atomic,
+                must=must,
+                high_prec=high_prec,
+                repeat=repeat,
+                type_sel=type_sel,
+                default=default,
+            )
+
+    def reduce(self, key_out, key_in):
+        """Generate a new item from the reduction of another atom.
+
+        Parameters
+        ----------
+        key_out
+            The name of the reduced item
+        key_in
+            The name of the data item to be reduced
+        """
+        for ii in self.data_systems:
+            ii.reduce(key_out, key_in)
+
+    def get_data_dict(self, ii: int = 0) -> dict:
+        return self.data_systems[ii].get_data_dict()
+
+    def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
+        if sys_probs is None:
+            if auto_prob_style == "prob_uniform":
+                prob_v = 1.0 / float(self.nsystems)
+                probs = [prob_v for ii in range(self.nsystems)]
+            elif auto_prob_style[:13] == "prob_sys_size":
+                if auto_prob_style == "prob_sys_size":
+                    prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0"
+                else:
+                    prob_style = auto_prob_style
+                probs = prob_sys_size_ext(
+                    prob_style, self.get_nsystems(), self.nbatches
+                )
+            else:
+                raise RuntimeError("Unknown auto prob style: " + auto_prob_style)
+        else:
+            probs = process_sys_probs(sys_probs, self.nbatches)
+        self.sys_probs = probs
+
+    def get_batch(self, sys_idx: Optional[int] = None) -> dict:
+        # batch generation style altered by Ziyao Li:
+        # one should specify the "sys_prob" and "auto_prob_style" params
+        # via set_sys_prob() function. The sys_probs this function uses is
+        # defined as a private variable, self.sys_probs, initialized in __init__().
+        # This is to optimize the (vain) efforts in evaluating sys_probs every batch.
+        """Get a batch of data from the data systems.
+
+        Parameters
+        ----------
+        sys_idx : int
+            The index of system from which the batch is get.
+            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
+            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+            This option does not work for mixed systems.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        if not self.mixed_systems:
+            b_data = self.get_batch_standard(sys_idx)
+        else:
+            b_data = self.get_batch_mixed()
+        return b_data
+
+    def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict:
+        """Get a batch of data from the data systems in the standard way.
+
+        Parameters
+        ----------
+        sys_idx : int
+            The index of system from which the batch is get.
+            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
+            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        if sys_idx is not None:
+            self.pick_idx = sys_idx
+        else:
+            # prob = self._get_sys_probs(sys_probs, auto_prob_style)
+            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
+        b_data = self.data_systems[self.pick_idx].get_batch(
+            self.batch_size[self.pick_idx]
+        )
+        b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
+        b_data["default_mesh"] = self.default_mesh[self.pick_idx]
+        return b_data
+
+    def get_batch_mixed(self) -> dict:
+        """Get a batch of data from the data systems in the mixed way.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        # mixed systems have a global batch size
+        batch_size = self.batch_size[0]
+        batch_data = []
+        for _ in range(batch_size):
+            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
+            bb_data = self.data_systems[self.pick_idx].get_batch(1)
+            bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
+            bb_data["default_mesh"] = self.default_mesh[self.pick_idx]
+            batch_data.append(bb_data)
+        b_data = self._merge_batch_data(batch_data)
+        return b_data
+
+    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
+        """Merge batch data from different systems.
+
+        Parameters
+        ----------
+        batch_data : list of dict
+            A list of batch data from different systems.
+
+        Returns
+        -------
+        dict
+            The merged batch data.
+        """
+        b_data = {}
+        max_natoms = max(bb["natoms_vec"][0] for bb in batch_data)
+        # natoms_vec
+        natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int)
+        natoms_vec[0:3] = max_natoms
+        b_data["natoms_vec"] = natoms_vec
+        # real_natoms_vec
+        real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data])
+        b_data["real_natoms_vec"] = real_natoms_vec
+        # type
+        type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int)
+        for ii, bb in enumerate(batch_data):
+            type_vec[ii, : bb["type"].shape[1]] = bb["type"][0]
+        b_data["type"] = type_vec
+        # default_mesh
+        default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0)
+        b_data["default_mesh"] = default_mesh
+        # other data
+        data_dict = self.get_data_dict(0)
+        for kk, vv in data_dict.items():
+            if kk not in batch_data[0]:
+                continue
+            b_data["find_" + kk] = batch_data[0]["find_" + kk]
+            if not vv["atomic"]:
+                b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0)
+            else:
+                b_data[kk] = np.zeros(
+                    (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]),
+                    dtype=batch_data[0][kk].dtype,
+                )
+                for ii, bb in enumerate(batch_data):
+                    b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0]
+        return b_data
+
+    # ! altered by Marián Rynik
+    def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1):  # depreciated
+        """Get test data from the the data systems.
+
+        Parameters
+        ----------
+        sys_idx
+            The test dat of system with index `sys_idx` will be returned.
+            If is None, the currently selected system will be returned.
+        n_test
+            Number of test data. If set to -1 all test data will be get.
+        """
+        if not hasattr(self, "test_data"):
+            self._load_test(ntests=n_test)
+        if sys_idx is not None:
+            idx = sys_idx
+        else:
+            idx = self.pick_idx
+
+        test_system_data = {}
+        for nn in self.test_data:
+            test_system_data[nn] = self.test_data[nn][idx]
+        test_system_data["natoms_vec"] = self.natoms_vec[idx]
+        test_system_data["default_mesh"] = self.default_mesh[idx]
+        return test_system_data
+
+    def get_sys_ntest(self, sys_idx=None):
+        """Get number of tests for the currently selected system,
+        or one defined by sys_idx.
+        """
+        if sys_idx is not None:
+            return self.test_size[sys_idx]
+        else:
+            return self.test_size[self.pick_idx]
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_nbatches(self) -> int:
+        """Get the total number of batches."""
+        return self.nbatches
+
+    def get_ntypes(self) -> int:
+        """Get the number of types."""
+        return self.sys_ntypes
+
+    def get_nsystems(self) -> int:
+        """Get the number of data systems."""
+        return self.nsystems
+
+    def get_sys(self, idx: int) -> DeepmdData:
+        """Get a certain data system."""
+        return self.data_systems[idx]
+
+    def get_batch_size(self) -> int:
+        """Get the batch size."""
+        return self.batch_size
+
+    def _format_name_length(self, name, width):
+        if len(name) <= width:
+            return "{: >{}}".format(name, width)
+        else:
+            name = name[-(width - 3) :]
+            name = "-- " + name
+            return name
+
+    def print_summary(self, name):
+        # width 65
+        sys_width = 42
+        log.info(
+            f"---Summary of DataSystem: {name:13s}-----------------------------------------------"
+        )
+        log.info("found %d system(s):" % self.nsystems)
+        log.info(
+            ("%s  " % self._format_name_length("system", sys_width))
+            + ("%6s  %6s  %6s  %9s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
+        )
+        for ii in range(self.nsystems):
+            log.info(
+                "%s  %6d  %6d  %6d  %9.3e  %3s"
+                % (
+                    self._format_name_length(self.system_dirs[ii], sys_width),
+                    self.natoms[ii],
+                    # TODO batch size * nbatches = number of structures
+                    self.batch_size[ii],
+                    self.nbatches[ii],
+                    self.sys_probs[ii],
+                    "T" if self.data_systems[ii].pbc else "F",
+                )
+            )
+        log.info(
+            "--------------------------------------------------------------------------------------"
+        )
+
+    def _make_auto_bs(self, rule):
+        bs = []
+        for ii in self.data_systems:
+            ni = ii.get_natoms()
+            bsi = rule // ni
+            if bsi * ni < rule:
+                bsi += 1
+            bs.append(bsi)
+        return bs
+
+    # ! added by Marián Rynik
+    def _make_auto_ts(self, percent):
+        ts = []
+        for ii in range(self.nsystems):
+            ni = self.batch_size[ii] * self.nbatches[ii]
+            tsi = int(ni * percent / 100)
+            ts.append(tsi)
+
+        return ts
+
+    def _check_type_map_consistency(self, type_map_list):
+        ret = []
+        for ii in type_map_list:
+            if ii is not None:
+                min_len = min([len(ii), len(ret)])
+                for idx in range(min_len):
+                    if ii[idx] != ret[idx]:
+                        raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}")
+                if len(ii) > len(ret):
+                    ret = ii
+        return ret
+
+
+def process_sys_probs(sys_probs, nbatch):
+    sys_probs = np.array(sys_probs)
+    type_filter = sys_probs >= 0
+    assigned_sum_prob = np.sum(type_filter * sys_probs)
+    # 1e-8 is to handle floating point error; See #1917
+    assert (
+        assigned_sum_prob <= 1.0 + 1e-8
+    ), "the sum of assigned probability should be less than 1"
+    rest_sum_prob = 1.0 - assigned_sum_prob
+    if not np.isclose(rest_sum_prob, 0):
+        rest_nbatch = (1 - type_filter) * nbatch
+        rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
+        ret_prob = rest_prob + type_filter * sys_probs
+    else:
+        ret_prob = sys_probs
+    assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
+    return ret_prob
+
+
+def prob_sys_size_ext(keywords, nsystems, nbatch):
+    block_str = keywords.split(";")[1:]
+    block_stt = []
+    block_end = []
+    block_weights = []
+    for ii in block_str:
+        stt = int(ii.split(":")[0])
+        end = int(ii.split(":")[1])
+        weight = float(ii.split(":")[2])
+        assert weight >= 0, "the weight of a block should be no less than 0"
+        block_stt.append(stt)
+        block_end.append(end)
+        block_weights.append(weight)
+    nblocks = len(block_str)
+    block_probs = np.array(block_weights) / np.sum(block_weights)
+    sys_probs = np.zeros([nsystems])
+    for ii in range(nblocks):
+        nbatch_block = nbatch[block_stt[ii] : block_end[ii]]
+        tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
+        sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
+    return sys_probs
diff --git a/deepmd_utils/utils/model_stat.py b/deepmd_utils/utils/model_stat.py
new file mode 100644
index 0000000000..d2cc918b64
--- /dev/null
+++ b/deepmd_utils/utils/model_stat.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from collections import (
+    defaultdict,
+)
+
+import numpy as np
+
+
+def _make_all_stat_ref(data, nbatches):
+    all_stat = defaultdict(list)
+    for ii in range(data.get_nsystems()):
+        for jj in range(nbatches):
+            stat_data = data.get_batch(sys_idx=ii)
+            for dd in stat_data:
+                if dd == "natoms_vec":
+                    stat_data[dd] = stat_data[dd].astype(np.int32)
+                all_stat[dd].append(stat_data[dd])
+    return all_stat
+
+
+def make_stat_input(data, nbatches, merge_sys=True):
+    """Pack data for statistics.
+
+    Parameters
+    ----------
+    data
+        The data
+    nbatches : int
+        The number of batches
+    merge_sys : bool (True)
+        Merge system data
+
+    Returns
+    -------
+    all_stat:
+        A dictionary of list of list storing data for stat.
+        if merge_sys == False data can be accessed by
+            all_stat[key][sys_idx][batch_idx][frame_idx]
+        else merge_sys == True can be accessed by
+            all_stat[key][batch_idx][frame_idx]
+    """
+    all_stat = defaultdict(list)
+    for ii in range(data.get_nsystems()):
+        sys_stat = defaultdict(list)
+        for jj in range(nbatches):
+            stat_data = data.get_batch(sys_idx=ii)
+            for dd in stat_data:
+                if dd == "natoms_vec":
+                    stat_data[dd] = stat_data[dd].astype(np.int32)
+                sys_stat[dd].append(stat_data[dd])
+        for dd in sys_stat:
+            if merge_sys:
+                for bb in sys_stat[dd]:
+                    all_stat[dd].append(bb)
+            else:
+                all_stat[dd].append(sys_stat[dd])
+    return all_stat
+
+
+def merge_sys_stat(all_stat):
+    first_key = next(iter(all_stat.keys()))
+    nsys = len(all_stat[first_key])
+    ret = defaultdict(list)
+    for ii in range(nsys):
+        for dd in all_stat:
+            for bb in all_stat[dd][ii]:
+                ret[dd].append(bb)
+    return ret
diff --git a/deepmd_utils/utils/pair_tab.py b/deepmd_utils/utils/pair_tab.py
new file mode 100644
index 0000000000..4451f53379
--- /dev/null
+++ b/deepmd_utils/utils/pair_tab.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Tuple,
+)
+
+import numpy as np
+from scipy.interpolate import (
+    CubicSpline,
+)
+
+
+class PairTab:
+    """Pairwise tabulated potential.
+
+    Parameters
+    ----------
+    filename
+            File name for the short-range tabulated potential.
+            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+            The first colume is the distance between atoms.
+            The second to the last columes are energies for pairs of certain types.
+            For example we have two atom types, 0 and 1.
+            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
+    """
+
+    def __init__(self, filename: str) -> None:
+        """Constructor."""
+        self.reinit(filename)
+
+    def reinit(self, filename: str) -> None:
+        """Initialize the tabulated interaction.
+
+        Parameters
+        ----------
+        filename
+            File name for the short-range tabulated potential.
+            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+            The first colume is the distance between atoms.
+            The second to the last columes are energies for pairs of certain types.
+            For example we have two atom types, 0 and 1.
+            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
+        """
+        self.vdata = np.loadtxt(filename)
+        self.rmin = self.vdata[0][0]
+        self.hh = self.vdata[1][0] - self.vdata[0][0]
+        self.nspline = self.vdata.shape[0] - 1
+        ncol = self.vdata.shape[1] - 1
+        n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
+        self.ntypes = int(n0 + 0.1)
+        assert self.ntypes * (self.ntypes + 1) // 2 == ncol, (
+            "number of volumes provided in %s does not match guessed number of types %d"
+            % (filename, self.ntypes)
+        )
+        self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
+        self.tab_data = self._make_data()
+
+    def get(self) -> Tuple[np.array, np.array]:
+        """Get the serialized table."""
+        return self.tab_info, self.tab_data
+
+    def _make_data(self):
+        data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
+        stride = 4 * self.nspline
+        idx_iter = 0
+        xx = self.vdata[:, 0]
+        for t0 in range(self.ntypes):
+            for t1 in range(t0, self.ntypes):
+                vv = self.vdata[:, 1 + idx_iter]
+                cs = CubicSpline(xx, vv)
+                dd = cs(xx, 1)
+                dd *= self.hh
+                dtmp = np.zeros(stride)
+                for ii in range(self.nspline):
+                    dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1]
+                    dtmp[ii * 4 + 1] = (
+                        -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1]
+                    )
+                    dtmp[ii * 4 + 2] = dd[ii]
+                    dtmp[ii * 4 + 3] = vv[ii]
+                data[
+                    (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride
+                    + stride
+                ] = dtmp
+                data[
+                    (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride
+                    + stride
+                ] = dtmp
+                idx_iter += 1
+        return data
diff --git a/deepmd_utils/utils/path.py b/deepmd_utils/utils/path.py
new file mode 100644
index 0000000000..a8e4bc329f
--- /dev/null
+++ b/deepmd_utils/utils/path.py
@@ -0,0 +1,358 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from functools import (
+    lru_cache,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+)
+
+import h5py
+import numpy as np
+from wcmatch.glob import (
+    globfilter,
+)
+
+
+class DPPath(ABC):
+    """The path class to data system (DeepmdData).
+
+    Parameters
+    ----------
+    path : str
+        path
+    """
+
+    def __new__(cls, path: str):
+        if cls is DPPath:
+            if os.path.isdir(path):
+                return super().__new__(DPOSPath)
+            elif os.path.isfile(path.split("#")[0]):
+                # assume h5 if it is not dir
+                # TODO: check if it is a real h5? or just check suffix?
+                return super().__new__(DPH5Path)
+            raise FileNotFoundError("%s not found" % path)
+        return super().__new__(cls)
+
+    @abstractmethod
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+
+    @abstractmethod
+    def load_txt(self, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+
+    @abstractmethod
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+
+    @abstractmethod
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+
+    @abstractmethod
+    def is_file(self) -> bool:
+        """Check if self is file."""
+
+    @abstractmethod
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+
+    @abstractmethod
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+
+    @abstractmethod
+    def __lt__(self, other: "DPPath") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+
+    @abstractmethod
+    def __str__(self) -> str:
+        """Represent string."""
+
+    def __repr__(self) -> str:
+        return f"{type(self)} ({self!s})"
+
+    def __eq__(self, other) -> bool:
+        return str(self) == str(other)
+
+    def __hash__(self):
+        return hash(str(self))
+
+
+class DPOSPath(DPPath):
+    """The OS path class to data system (DeepmdData) for real directories.
+
+    Parameters
+    ----------
+    path : str
+        path
+    """
+
+    def __init__(self, path: str) -> None:
+        super().__init__()
+        if isinstance(path, Path):
+            self.path = path
+        else:
+            self.path = Path(path)
+
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return np.load(str(self.path))
+
+    def load_txt(self, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return np.loadtxt(str(self.path), **kwargs)
+
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        # currently DPOSPath will only derivative DPOSPath
+        # TODO: discuss if we want to mix DPOSPath and DPH5Path?
+        return [type(self)(p) for p in self.path.glob(pattern)]
+
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        return [type(self)(p) for p in self.path.rglob(pattern)]
+
+    def is_file(self) -> bool:
+        """Check if self is file."""
+        return self.path.is_file()
+
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+        return self.path.is_dir()
+
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+        return type(self)(self.path / key)
+
+    def __lt__(self, other: "DPOSPath") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+        return self.path < other.path
+
+    def __str__(self) -> str:
+        """Represent string."""
+        return str(self.path)
+
+
+class DPH5Path(DPPath):
+    """The path class to data system (DeepmdData) for HDF5 files.
+
+    Notes
+    -----
+    OS - HDF5 relationship:
+        directory - Group
+        file - Dataset
+
+    Parameters
+    ----------
+    path : str
+        path
+    """
+
+    def __init__(self, path: str) -> None:
+        super().__init__()
+        # we use "#" to split path
+        # so we do not support file names containing #...
+        s = path.split("#")
+        self.root_path = s[0]
+        self.root = self._load_h5py(s[0])
+        # h5 path: default is the root path
+        self.name = s[1] if len(s) > 1 else "/"
+
+    @classmethod
+    @lru_cache(None)
+    def _load_h5py(cls, path: str) -> h5py.File:
+        """Load hdf5 file.
+
+        Parameters
+        ----------
+        path : str
+            path to hdf5 file
+        """
+        # this method has cache to avoid duplicated
+        # loading from different DPH5Path
+        # However the file will be never closed?
+        return h5py.File(path, "r")
+
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return self.root[self.name][:]
+
+    def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        arr = self.load_numpy()
+        if dtype:
+            arr = arr.astype(dtype)
+        return arr
+
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        # got paths starts with current path first, which is faster
+        subpaths = [ii for ii in self._keys if ii.startswith(self.name)]
+        return [
+            type(self)(f"{self.root_path}#{pp}")
+            for pp in globfilter(subpaths, self._connect_path(pattern))
+        ]
+
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        return self.glob("**" + pattern)
+
+    @property
+    def _keys(self) -> List[str]:
+        """Walk all groups and dataset."""
+        return self._file_keys(self.root)
+
+    @classmethod
+    @lru_cache(None)
+    def _file_keys(cls, file: h5py.File) -> List[str]:
+        """Walk all groups and dataset."""
+        l = []
+        file.visit(lambda x: l.append("/" + x))
+        return l
+
+    def is_file(self) -> bool:
+        """Check if self is file."""
+        if self.name not in self._keys:
+            return False
+        return isinstance(self.root[self.name], h5py.Dataset)
+
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+        if self.name not in self._keys:
+            return False
+        return isinstance(self.root[self.name], h5py.Group)
+
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+        return type(self)(f"{self.root_path}#{self._connect_path(key)}")
+
+    def _connect_path(self, path: str) -> str:
+        """Connect self with path."""
+        if self.name.endswith("/"):
+            return f"{self.name}{path}"
+        return f"{self.name}/{path}"
+
+    def __lt__(self, other: "DPH5Path") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+        if self.root_path == other.root_path:
+            return self.name < other.name
+        return self.root_path < other.root_path
+
+    def __str__(self) -> str:
+        """Returns path of self."""
+        return f"{self.root_path}#{self.name}"
diff --git a/deepmd_utils/utils/plugin.py b/deepmd_utils/utils/plugin.py
new file mode 100644
index 0000000000..2a77b744c5
--- /dev/null
+++ b/deepmd_utils/utils/plugin.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Base of plugin systems."""
+# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py
+
+from abc import (
+    ABCMeta,
+)
+from typing import (
+    Callable,
+)
+
+
+class Plugin:
+    """A class to register and restore plugins.
+
+    Attributes
+    ----------
+    plugins : Dict[str, object]
+        plugins
+
+    Examples
+    --------
+    >>> plugin = Plugin()
+    >>> @plugin.register("xx")
+        def xxx():
+            pass
+    >>> print(plugin.plugins['xx'])
+    """
+
+    def __init__(self):
+        self.plugins = {}
+
+    def __add__(self, other) -> "Plugin":
+        self.plugins.update(other.plugins)
+        return self
+
+    def register(self, key: str) -> Callable[[object], object]:
+        """Register a plugin.
+
+        Parameters
+        ----------
+        key : str
+            key of the plugin
+
+        Returns
+        -------
+        Callable[[object], object]
+            decorator
+        """
+
+        def decorator(object: object) -> object:
+            self.plugins[key] = object
+            return object
+
+        return decorator
+
+    def get_plugin(self, key) -> object:
+        """Visit a plugin by key.
+
+        Parameters
+        ----------
+        key : str
+            key of the plugin
+
+        Returns
+        -------
+        object
+            the plugin
+        """
+        return self.plugins[key]
+
+
+class VariantMeta:
+    def __call__(cls, *args, **kwargs):
+        """Remove `type` and keys that starts with underline."""
+        obj = cls.__new__(cls, *args, **kwargs)
+        kwargs.pop("type", None)
+        to_pop = []
+        for kk in kwargs:
+            if kk[0] == "_":
+                to_pop.append(kk)
+        for kk in to_pop:
+            kwargs.pop(kk, None)
+        obj.__init__(*args, **kwargs)
+        return obj
+
+
+class VariantABCMeta(VariantMeta, ABCMeta):
+    pass
+
+
+class PluginVariant(metaclass=VariantABCMeta):
+    """A class to remove `type` from input arguments."""
+
+    pass
diff --git a/deepmd_utils/utils/random.py b/deepmd_utils/utils/random.py
new file mode 100644
index 0000000000..8944419412
--- /dev/null
+++ b/deepmd_utils/utils/random.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import numpy as np
+
+_RANDOM_GENERATOR = np.random.RandomState()
+
+
+def choice(a: np.ndarray, p: Optional[np.ndarray] = None):
+    """Generates a random sample from a given 1-D array.
+
+    Parameters
+    ----------
+    a : np.ndarray
+        A random sample is generated from its elements.
+    p : np.ndarray
+        The probabilities associated with each entry in a.
+
+    Returns
+    -------
+    np.ndarray
+        arrays with results and their shapes
+    """
+    return _RANDOM_GENERATOR.choice(a, p=p)
+
+
+def random(size=None):
+    """Return random floats in the half-open interval [0.0, 1.0).
+
+    Parameters
+    ----------
+    size
+        Output shape.
+
+    Returns
+    -------
+    np.ndarray
+        Arrays with results and their shapes.
+    """
+    return _RANDOM_GENERATOR.random_sample(size)
+
+
+def seed(val: Optional[int] = None):
+    """Seed the generator.
+
+    Parameters
+    ----------
+    val : int
+        Seed.
+    """
+    _RANDOM_GENERATOR.seed(val)
+
+
+def shuffle(x: np.ndarray):
+    """Modify a sequence in-place by shuffling its contents.
+
+    Parameters
+    ----------
+    x : np.ndarray
+        The array or list to be shuffled.
+    """
+    _RANDOM_GENERATOR.shuffle(x)
+
+
+__all__ = ["choice", "random", "seed", "shuffle"]
diff --git a/deepmd_utils/utils/weight_avg.py b/deepmd_utils/utils/weight_avg.py
new file mode 100644
index 0000000000..b344d3bb75
--- /dev/null
+++ b/deepmd_utils/utils/weight_avg.py
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from collections import (
+    defaultdict,
+)
+from typing import (
+    Dict,
+    List,
+    Tuple,
+)
+
+import numpy as np
+
+
+def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
+    """Compute wighted average of prediction errors (MAE or RMSE) for model.
+
+    Parameters
+    ----------
+    errors : List[Dict[str, Tuple[float, float]]]
+        List: the error of systems
+        Dict: the error of quantities, name given by the key
+        str: the name of the quantity, must starts with 'mae' or 'rmse'
+        Tuple: (error, weight)
+
+    Returns
+    -------
+    Dict
+        weighted averages
+    """
+    sum_err = defaultdict(float)
+    sum_siz = defaultdict(int)
+    for err in errors:
+        for kk, (ee, ss) in err.items():
+            if kk.startswith("mae"):
+                sum_err[kk] += ee * ss
+            elif kk.startswith("rmse"):
+                sum_err[kk] += ee * ee * ss
+            else:
+                raise RuntimeError("unknown error type")
+            sum_siz[kk] += ss
+    for kk in sum_err.keys():
+        if kk.startswith("mae"):
+            sum_err[kk] = sum_err[kk] / sum_siz[kk]
+        elif kk.startswith("rmse"):
+            sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk])
+        else:
+            raise RuntimeError("unknown error type")
+    return sum_err

From 539e4ab4545be8e45da28549edf73605bd7ba9f1 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 14 Jan 2024 00:34:45 -0500
Subject: [PATCH 82/97] add cross-platform AutoBatchSize (#3143)

See https://github.com/deepmodeling/deepmd-kit/issues/3118 and
https://github.com/dptech-corp/deepmd-pytorch/issues/137. Subclass needs
to implement `is_gpu_available` and `is_oom_error`.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/utils/batch_size.py       | 208 +++------------------------
 deepmd_utils/utils/batch_size.py | 233 +++++++++++++++++++++++++++++++
 2 files changed, 250 insertions(+), 191 deletions(-)
 create mode 100644 deepmd_utils/utils/batch_size.py

diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index fe876a65a5..863520b3f4 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -1,12 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
-import os
-from typing import (
-    Callable,
-    Tuple,
-)
-
-import numpy as np
 from packaging.version import (
     Version,
 )
@@ -18,197 +10,31 @@
 from deepmd.utils.errors import (
     OutOfMemoryError,
 )
+from deepmd_utils.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
 
-log = logging.getLogger(__name__)
-
-
-class AutoBatchSize:
-    """This class allows DeePMD-kit to automatically decide the maximum
-    batch size that will not cause an OOM error.
-
-    Notes
-    -----
-    In some CPU environments, the program may be directly killed when OOM. In
-    this case, by default the batch size will not be increased for CPUs. The
-    environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size.
-
-    In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`.
-
-    Parameters
-    ----------
-    initial_batch_size : int, default: 1024
-        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
-        is not set
-    factor : float, default: 2.
-        increased factor
-
-    Attributes
-    ----------
-    current_batch_size : int
-        current batch size (number of total atoms)
-    maximum_working_batch_size : int
-        maximum working batch size
-    minimal_not_working_batch_size : int
-        minimal not working batch size
-    """
-
-    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
-        # See also PyTorchLightning/pytorch-lightning#1638
-        # TODO: discuss a proper initial batch size
-        self.current_batch_size = initial_batch_size
-        DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
-        if DP_INFER_BATCH_SIZE > 0:
-            self.current_batch_size = DP_INFER_BATCH_SIZE
-            self.maximum_working_batch_size = DP_INFER_BATCH_SIZE
-            self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
-        else:
-            self.maximum_working_batch_size = initial_batch_size
-            if (
-                Version(TF_VERSION) >= Version("1.14")
-                and tf.config.experimental.get_visible_devices("GPU")
-            ) or tf.test.is_gpu_available():
-                self.minimal_not_working_batch_size = 2**31
-            else:
-                self.minimal_not_working_batch_size = (
-                    self.maximum_working_batch_size + 1
-                )
-                log.warning(
-                    "You can use the environment variable DP_INFER_BATCH_SIZE to"
-                    "control the inference batch size (nframes * natoms). "
-                    "The default value is %d." % initial_batch_size
-                )
 
-        self.factor = factor
-
-    def execute(
-        self, callable: Callable, start_index: int, natoms: int
-    ) -> Tuple[int, tuple]:
-        """Excuate a method with given batch size.
-
-        Parameters
-        ----------
-        callable : Callable
-            The method should accept the batch size and start_index as parameters,
-            and returns executed batch size and data.
-        start_index : int
-            start index
-        natoms : int
-            natoms
+class AutoBatchSize(AutoBatchSizeBase):
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
 
         Returns
         -------
-        int
-            executed batch size * number of atoms
-        tuple
-            result from callable, None if failing to execute
-
-        Raises
-        ------
-        OutOfMemoryError
-            OOM when batch size is 1
+        bool
+            True if GPU is available
         """
-        if natoms > 0:
-            batch_nframes = self.current_batch_size // natoms
-        else:
-            batch_nframes = self.current_batch_size
-        try:
-            n_batch, result = callable(max(batch_nframes, 1), start_index)
-        except OutOfMemoryError as e:
-            # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
-            # but luckily we only need to catch once
-            self.minimal_not_working_batch_size = min(
-                self.minimal_not_working_batch_size, self.current_batch_size
-            )
-            if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
-                self.maximum_working_batch_size = int(
-                    self.minimal_not_working_batch_size / self.factor
-                )
-            if self.minimal_not_working_batch_size <= natoms:
-                raise OutOfMemoryError(
-                    "The callable still throws an out-of-memory (OOM) error even when batch size is 1!"
-                ) from e
-            # adjust the next batch size
-            self._adjust_batch_size(1.0 / self.factor)
-            return 0, None
-        else:
-            n_tot = n_batch * natoms
-            self.maximum_working_batch_size = max(
-                self.maximum_working_batch_size, n_tot
-            )
-            # adjust the next batch size
-            if (
-                n_tot + natoms > self.current_batch_size
-                and self.current_batch_size * self.factor
-                < self.minimal_not_working_batch_size
-            ):
-                self._adjust_batch_size(self.factor)
-            return n_batch, result
+        return (
+            Version(TF_VERSION) >= Version("1.14")
+            and tf.config.experimental.get_visible_devices("GPU")
+        ) or tf.test.is_gpu_available()
 
-    def _adjust_batch_size(self, factor: float):
-        old_batch_size = self.current_batch_size
-        self.current_batch_size = int(self.current_batch_size * factor)
-        log.info(
-            "Adjust batch size from %d to %d"
-            % (old_batch_size, self.current_batch_size)
-        )
-
-    def execute_all(
-        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
-    ) -> Tuple[np.ndarray]:
-        """Excuate a method with all given data.
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
 
         Parameters
         ----------
-        callable : Callable
-            The method should accept *args and **kwargs as input and return the similiar array.
-        total_size : int
-            Total size
-        natoms : int
-            The number of atoms
-        *args
-            Variable length argument list.
-        **kwargs
-            If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
+        e : Exception
+            Exception
         """
-
-        def execute_with_batch_size(
-            batch_size: int, start_index: int
-        ) -> Tuple[int, Tuple[np.ndarray]]:
-            end_index = start_index + batch_size
-            end_index = min(end_index, total_size)
-            return (end_index - start_index), callable(
-                *[
-                    (
-                        vv[start_index:end_index]
-                        if isinstance(vv, np.ndarray) and vv.ndim > 1
-                        else vv
-                    )
-                    for vv in args
-                ],
-                **{
-                    kk: (
-                        vv[start_index:end_index]
-                        if isinstance(vv, np.ndarray) and vv.ndim > 1
-                        else vv
-                    )
-                    for kk, vv in kwargs.items()
-                },
-            )
-
-        index = 0
-        results = []
-        while index < total_size:
-            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
-            if not isinstance(result, tuple):
-                result = (result,)
-            index += n_batch
-            if n_batch:
-                for rr in result:
-                    rr.reshape((n_batch, -1))
-                results.append(result)
-
-        r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
-        if len(r) == 1:
-            # avoid returning tuple if callable doesn't return tuple
-            r = r[0]
-        return r
+        # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
+        # but luckily we only need to catch once
+        return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError))
diff --git a/deepmd_utils/utils/batch_size.py b/deepmd_utils/utils/batch_size.py
new file mode 100644
index 0000000000..c85806458f
--- /dev/null
+++ b/deepmd_utils/utils/batch_size.py
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Callable,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.utils.errors import (
+    OutOfMemoryError,
+)
+
+log = logging.getLogger(__name__)
+
+
+class AutoBatchSize(ABC):
+    """This class allows DeePMD-kit to automatically decide the maximum
+    batch size that will not cause an OOM error.
+
+    Notes
+    -----
+    In some CPU environments, the program may be directly killed when OOM. In
+    this case, by default the batch size will not be increased for CPUs. The
+    environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size.
+
+    In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
+        is not set
+    factor : float, default: 2.
+        increased factor
+
+    Attributes
+    ----------
+    current_batch_size : int
+        current batch size (number of total atoms)
+    maximum_working_batch_size : int
+        maximum working batch size
+    minimal_not_working_batch_size : int
+        minimal not working batch size
+    """
+
+    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
+        # See also PyTorchLightning/pytorch-lightning#1638
+        # TODO: discuss a proper initial batch size
+        self.current_batch_size = initial_batch_size
+        DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
+        if DP_INFER_BATCH_SIZE > 0:
+            self.current_batch_size = DP_INFER_BATCH_SIZE
+            self.maximum_working_batch_size = DP_INFER_BATCH_SIZE
+            self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
+        else:
+            self.maximum_working_batch_size = initial_batch_size
+            if self.is_gpu_available():
+                self.minimal_not_working_batch_size = 2**31
+            else:
+                self.minimal_not_working_batch_size = (
+                    self.maximum_working_batch_size + 1
+                )
+                log.warning(
+                    "You can use the environment variable DP_INFER_BATCH_SIZE to"
+                    "control the inference batch size (nframes * natoms). "
+                    "The default value is %d." % initial_batch_size
+                )
+
+        self.factor = factor
+
+    def execute(
+        self, callable: Callable, start_index: int, natoms: int
+    ) -> Tuple[int, tuple]:
+        """Excuate a method with given batch size.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept the batch size and start_index as parameters,
+            and returns executed batch size and data.
+        start_index : int
+            start index
+        natoms : int
+            natoms
+
+        Returns
+        -------
+        int
+            executed batch size * number of atoms
+        tuple
+            result from callable, None if failing to execute
+
+        Raises
+        ------
+        OutOfMemoryError
+            OOM when batch size is 1
+        """
+        if natoms > 0:
+            batch_nframes = self.current_batch_size // natoms
+        else:
+            batch_nframes = self.current_batch_size
+        try:
+            n_batch, result = callable(max(batch_nframes, 1), start_index)
+        except Exception as e:
+            if not self.is_oom_error(e):
+                raise e
+            self.minimal_not_working_batch_size = min(
+                self.minimal_not_working_batch_size, self.current_batch_size
+            )
+            if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
+                self.maximum_working_batch_size = int(
+                    self.minimal_not_working_batch_size / self.factor
+                )
+            if self.minimal_not_working_batch_size <= natoms:
+                raise OutOfMemoryError(
+                    "The callable still throws an out-of-memory (OOM) error even when batch size is 1!"
+                ) from e
+            # adjust the next batch size
+            self._adjust_batch_size(1.0 / self.factor)
+            return 0, None
+        else:
+            n_tot = n_batch * natoms
+            self.maximum_working_batch_size = max(
+                self.maximum_working_batch_size, n_tot
+            )
+            # adjust the next batch size
+            if (
+                n_tot + natoms > self.current_batch_size
+                and self.current_batch_size * self.factor
+                < self.minimal_not_working_batch_size
+            ):
+                self._adjust_batch_size(self.factor)
+            return n_batch, result
+
+    def _adjust_batch_size(self, factor: float):
+        old_batch_size = self.current_batch_size
+        self.current_batch_size = int(self.current_batch_size * factor)
+        log.info(
+            "Adjust batch size from %d to %d"
+            % (old_batch_size, self.current_batch_size)
+        )
+
+    def execute_all(
+        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
+    ) -> Tuple[np.ndarray]:
+        """Excuate a method with all given data.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept *args and **kwargs as input and return the similiar array.
+        total_size : int
+            Total size
+        natoms : int
+            The number of atoms
+        *args
+            Variable length argument list.
+        **kwargs
+            If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
+        """
+
+        def execute_with_batch_size(
+            batch_size: int, start_index: int
+        ) -> Tuple[int, Tuple[np.ndarray]]:
+            end_index = start_index + batch_size
+            end_index = min(end_index, total_size)
+            return (end_index - start_index), callable(
+                *[
+                    (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for vv in args
+                ],
+                **{
+                    kk: (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for kk, vv in kwargs.items()
+                },
+            )
+
+        index = 0
+        results = []
+        while index < total_size:
+            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
+            if not isinstance(result, tuple):
+                result = (result,)
+            index += n_batch
+            if n_batch:
+                for rr in result:
+                    rr.reshape((n_batch, -1))
+                results.append(result)
+
+        r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
+        if len(r) == 1:
+            # avoid returning tuple if callable doesn't return tuple
+            r = r[0]
+        return r
+
+    @abstractmethod
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+
+    @abstractmethod
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+
+        Returns
+        -------
+        bool
+            True if the exception is an OOM error
+        """

From 89d9a887ffc59e19abea5516a27b220118820c95 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 14 Jan 2024 00:36:05 -0500
Subject: [PATCH 83/97] move deepmd.entrypoints.{doc,gui} to
 deepmd_utils.entrypoints.{doc,gui} (#3144)

See #3118

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/entrypoints/doc.py            | 18 ++-------------
 deepmd/entrypoints/gui.py            | 33 ++++------------------------
 deepmd_utils/entrypoints/__init__.py |  1 +
 deepmd_utils/entrypoints/doc.py      | 20 +++++++++++++++++
 deepmd_utils/entrypoints/gui.py      | 31 ++++++++++++++++++++++++++
 5 files changed, 58 insertions(+), 45 deletions(-)
 create mode 100644 deepmd_utils/entrypoints/__init__.py
 create mode 100644 deepmd_utils/entrypoints/doc.py
 create mode 100644 deepmd_utils/entrypoints/gui.py

diff --git a/deepmd/entrypoints/doc.py b/deepmd/entrypoints/doc.py
index 087eb10f73..cc28e52930 100644
--- a/deepmd/entrypoints/doc.py
+++ b/deepmd/entrypoints/doc.py
@@ -1,20 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module that prints train input arguments docstrings."""
-
-from deepmd.utils.argcheck import (
-    gen_doc,
-    gen_json,
+from deepmd_utils.entrypoints.doc import (
+    doc_train_input,
 )
 
 __all__ = ["doc_train_input"]
-
-
-def doc_train_input(*, out_type: str = "rst", **kwargs):
-    """Print out trining input arguments to console."""
-    if out_type == "rst":
-        doc_str = gen_doc(make_anchor=True)
-    elif out_type == "json":
-        doc_str = gen_json()
-    else:
-        raise RuntimeError("Unsupported out type %s" % out_type)
-    print(doc_str)
diff --git a/deepmd/entrypoints/gui.py b/deepmd/entrypoints/gui.py
index 8b6b9e0a09..72de65f1c2 100644
--- a/deepmd/entrypoints/gui.py
+++ b/deepmd/entrypoints/gui.py
@@ -1,31 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""DP-GUI entrypoint."""
+from deepmd_utils.entrypoints.gui import (
+    start_dpgui,
+)
 
-
-def start_dpgui(*, port: int, bind_all: bool, **kwargs):
-    """Host DP-GUI server.
-
-    Parameters
-    ----------
-    port : int
-        The port to serve DP-GUI on.
-    bind_all : bool
-        Serve on all public interfaces. This will expose your DP-GUI instance
-        to the network on both IPv4 and IPv6 (where available).
-    **kwargs
-        additional arguments
-
-    Raises
-    ------
-    ModuleNotFoundError
-        The dpgui package is not installed
-    """
-    try:
-        from dpgui import (
-            start_dpgui,
-        )
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError(
-            "To use DP-GUI, please install the dpgui package:\npip install dpgui"
-        ) from e
-    start_dpgui(port=port, bind_all=bind_all)
+__all__ = ["start_dpgui"]
diff --git a/deepmd_utils/entrypoints/__init__.py b/deepmd_utils/entrypoints/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd_utils/entrypoints/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd_utils/entrypoints/doc.py b/deepmd_utils/entrypoints/doc.py
new file mode 100644
index 0000000000..9f1fd39095
--- /dev/null
+++ b/deepmd_utils/entrypoints/doc.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Module that prints train input arguments docstrings."""
+
+from deepmd_utils.utils.argcheck import (
+    gen_doc,
+    gen_json,
+)
+
+__all__ = ["doc_train_input"]
+
+
+def doc_train_input(*, out_type: str = "rst", **kwargs):
+    """Print out trining input arguments to console."""
+    if out_type == "rst":
+        doc_str = gen_doc(make_anchor=True)
+    elif out_type == "json":
+        doc_str = gen_json()
+    else:
+        raise RuntimeError("Unsupported out type %s" % out_type)
+    print(doc_str)
diff --git a/deepmd_utils/entrypoints/gui.py b/deepmd_utils/entrypoints/gui.py
new file mode 100644
index 0000000000..8b6b9e0a09
--- /dev/null
+++ b/deepmd_utils/entrypoints/gui.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""DP-GUI entrypoint."""
+
+
+def start_dpgui(*, port: int, bind_all: bool, **kwargs):
+    """Host DP-GUI server.
+
+    Parameters
+    ----------
+    port : int
+        The port to serve DP-GUI on.
+    bind_all : bool
+        Serve on all public interfaces. This will expose your DP-GUI instance
+        to the network on both IPv4 and IPv6 (where available).
+    **kwargs
+        additional arguments
+
+    Raises
+    ------
+    ModuleNotFoundError
+        The dpgui package is not installed
+    """
+    try:
+        from dpgui import (
+            start_dpgui,
+        )
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            "To use DP-GUI, please install the dpgui package:\npip install dpgui"
+        ) from e
+    start_dpgui(port=port, bind_all=bind_all)

From e218f9cc7fccc16ff05e9c07fcc6ef0af201e188 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 14 Jan 2024 19:45:18 -0500
Subject: [PATCH 84/97] cc: refactor DeepPot to support multiple backends
 (#3142)

See #3119.
At this time, only TF is supported in such the multiple-backend
framework.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/include/DeepPot.h   |  592 +++++++++--
 source/api_cc/include/DeepPotTF.h |  628 ++++++++++++
 source/api_cc/include/common.h    |    2 +
 source/api_cc/src/DeepPot.cc      |  864 +---------------
 source/api_cc/src/DeepPotTF.cc    | 1556 +++++++++++++++++++++++++++++
 5 files changed, 2732 insertions(+), 910 deletions(-)
 create mode 100644 source/api_cc/include/DeepPotTF.h
 create mode 100644 source/api_cc/src/DeepPotTF.cc

diff --git a/source/api_cc/include/DeepPot.h b/source/api_cc/include/DeepPot.h
index 0e61b03ce9..e8e64e31c0 100644
--- a/source/api_cc/include/DeepPot.h
+++ b/source/api_cc/include/DeepPot.h
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #pragma once
 
+#include <memory>
+
 #include "common.h"
 #include "neighbor_list.h"
 
@@ -8,6 +10,499 @@ namespace deepmd {
 /**
  * @brief Deep Potential.
  **/
+class DeepPotBase {
+ public:
+  /**
+   * @brief DP constructor without initialization.
+   **/
+  DeepPotBase(){};
+  virtual ~DeepPotBase(){};
+  /**
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  DeepPotBase(const std::string& model,
+              const int& gpu_rank = 0,
+              const std::string& file_content = "");
+  /**
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  virtual void init(const std::string& model,
+                    const int& gpu_rank = 0,
+                    const std::string& file_content = "") = 0;
+
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+
+  virtual void computew(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+
+  virtual void computew(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+
+  virtual void computew(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+
+  virtual void computew(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const int nghost,
+      const InputNlist& inlist,
+      const int& ago,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew_mixed_type(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew_mixed_type(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   * @{
+   **/
+  virtual void computew_mixed_type(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew_mixed_type(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  virtual void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>()) = 0;
+  virtual void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>()) = 0;
+  /** @} */
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  virtual double cutoff() const = 0;
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  virtual int numb_types() const = 0;
+  /**
+   * @brief Get the number of types with spin.
+   * @return The number of types with spin.
+   **/
+  virtual int numb_types_spin() const = 0;
+  /**
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  virtual int dim_fparam() const = 0;
+  /**
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  virtual int dim_aparam() const = 0;
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  virtual void get_type_map(std::string& type_map) = 0;
+
+  /**
+   * @brief Get whether the atom dimension of aparam is nall instead of fparam.
+   * @param[out] aparam_nall whether the atom dimension of aparam is nall
+   *instead of fparam.
+   **/
+  virtual bool is_aparam_nall() const = 0;
+};
+
+/**
+ * @brief Deep Potential to automatically switch backends.
+ **/
 class DeepPot {
  public:
   /**
@@ -35,12 +530,12 @@ class DeepPot {
   void init(const std::string& model,
             const int& gpu_rank = 0,
             const std::string& file_content = "");
+
   /**
    * @brief Print the DP summary to the screen.
    * @param[in] pre The prefix to each line.
    **/
   void print_summary(const std::string& pre) const;
-
   /**
    * @brief Evaluate the energy, force and virial by using this DP.
    * @param[out] ener The system energy.
@@ -249,42 +744,27 @@ class DeepPot {
    * @brief Get the cutoff radius.
    * @return The cutoff radius.
    **/
-  double cutoff() const {
-    assert(inited);
-    return rcut;
-  };
+  double cutoff() const;
   /**
    * @brief Get the number of types.
    * @return The number of types.
    **/
-  int numb_types() const {
-    assert(inited);
-    return ntypes;
-  };
+  int numb_types() const;
   /**
    * @brief Get the number of types with spin.
    * @return The number of types with spin.
    **/
-  int numb_types_spin() const {
-    assert(inited);
-    return ntypes_spin;
-  };
+  int numb_types_spin() const;
   /**
    * @brief Get the dimension of the frame parameter.
    * @return The dimension of the frame parameter.
    **/
-  int dim_fparam() const {
-    assert(inited);
-    return dfparam;
-  };
+  int dim_fparam() const;
   /**
    * @brief Get the dimension of the atomic parameter.
    * @return The dimension of the atomic parameter.
    **/
-  int dim_aparam() const {
-    assert(inited);
-    return daparam;
-  };
+  int dim_aparam() const;
   /**
    * @brief Get the type map (element name of the atom types) of this model.
    * @param[out] type_map The type map of this model.
@@ -296,77 +776,11 @@ class DeepPot {
    * @param[out] aparam_nall whether the atom dimension of aparam is nall
    *instead of fparam.
    **/
-  bool is_aparam_nall() const {
-    assert(inited);
-    return aparam_nall;
-  };
+  bool is_aparam_nall() const;
 
  private:
-  tensorflow::Session* session;
-  int num_intra_nthreads, num_inter_nthreads;
-  tensorflow::GraphDef* graph_def;
   bool inited;
-  template <class VT>
-  VT get_scalar(const std::string& name) const;
-  // VALUETYPE get_rcut () const;
-  // int get_ntypes () const;
-  double rcut;
-  int dtype;
-  double cell_size;
-  std::string model_type;
-  std::string model_version;
-  int ntypes;
-  int ntypes_spin;
-  int dfparam;
-  int daparam;
-  bool aparam_nall;
-  /**
-   * @brief Validate the size of frame and atomic parameters.
-   * @param[in] nframes The number of frames.
-   * @param[in] nloc The number of local atoms.
-   * @param[in] fparam The frame parameter.
-   * @param[in] aparam The atomic parameter.
-   * @tparam VALUETYPE The type of the parameters, double or float.
-   */
-  template <typename VALUETYPE>
-  void validate_fparam_aparam(const int& nframes,
-                              const int& nloc,
-                              const std::vector<VALUETYPE>& fparam,
-                              const std::vector<VALUETYPE>& aparam) const;
-  /**
-   * @brief Tile the frame or atomic parameters if there is only
-   * a single frame of frame or atomic parameters.
-   * @param[out] out_param The tiled frame or atomic parameters.
-   * @param[in] nframes The number of frames.
-   * @param[in] dparam The dimension of the frame or atomic parameters in a
-   * frame.
-   * @param[in] param The frame or atomic parameters.
-   * @tparam VALUETYPE The type of the parameters, double or float.
-   */
-  template <typename VALUETYPE>
-  void tile_fparam_aparam(std::vector<VALUETYPE>& out_param,
-                          const int& nframes,
-                          const int& dparam,
-                          const std::vector<VALUETYPE>& param) const;
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute_inner(
-      ENERGYVTYPE& ener,
-      std::vector<VALUETYPE>& force,
-      std::vector<VALUETYPE>& virial,
-      const std::vector<VALUETYPE>& coord,
-      const std::vector<int>& atype,
-      const std::vector<VALUETYPE>& box,
-      const int nghost,
-      const int& ago,
-      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
-      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
-
-  // copy neighbor list info from host
-  bool init_nbor;
-  std::vector<int> sec_a;
-  NeighborListData nlist_data;
-  InputNlist nlist;
-  AtomMap atommap;
+  std::shared_ptr<deepmd::DeepPotBase> dp;
 };
 
 class DeepPotModelDevi {
diff --git a/source/api_cc/include/DeepPotTF.h b/source/api_cc/include/DeepPotTF.h
new file mode 100644
index 0000000000..59bd5f476f
--- /dev/null
+++ b/source/api_cc/include/DeepPotTF.h
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#pragma once
+
+#include "DeepPot.h"
+#include "common.h"
+#include "neighbor_list.h"
+
+namespace deepmd {
+/**
+ * @brief TensorFlow implementation for Deep Potential.
+ **/
+class DeepPotTF : public DeepPotBase {
+ public:
+  /**
+   * @brief DP constructor without initialization.
+   **/
+  DeepPotTF();
+  ~DeepPotTF();
+  /**
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  DeepPotTF(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+  /**
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(
+      ENERGYVTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(
+      ENERGYVTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      std::vector<VALUETYPE>& atom_energy,
+      std::vector<VALUETYPE>& atom_virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
+  /**
+   * @brief Get the number of types with spin.
+   * @return The number of types with spin.
+   **/
+  int numb_types_spin() const {
+    assert(inited);
+    return ntypes_spin;
+  };
+  /**
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  int dim_fparam() const {
+    assert(inited);
+    return dfparam;
+  };
+  /**
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  int dim_aparam() const {
+    assert(inited);
+    return daparam;
+  };
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string& type_map);
+
+  /**
+   * @brief Get whether the atom dimension of aparam is nall instead of fparam.
+   * @param[out] aparam_nall whether the atom dimension of aparam is nall
+   *instead of fparam.
+   **/
+  bool is_aparam_nall() const {
+    assert(inited);
+    return aparam_nall;
+  };
+
+  // forward to template class
+  void computew(double& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+
+  void computew(double& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(double& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+
+  void computew(double& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(double& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+
+  void computew(double& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(double& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+
+  void computew(double& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew_mixed_type(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>());
+  void computew_mixed_type(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>());
+  void computew_mixed_type(
+      double& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>());
+  void computew_mixed_type(
+      double& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>());
+
+ private:
+  tensorflow::Session* session;
+  int num_intra_nthreads, num_inter_nthreads;
+  tensorflow::GraphDef* graph_def;
+  bool inited;
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
+  double rcut;
+  int dtype;
+  double cell_size;
+  std::string model_type;
+  std::string model_version;
+  int ntypes;
+  int ntypes_spin;
+  int dfparam;
+  int daparam;
+  bool aparam_nall;
+  /**
+   * @brief Validate the size of frame and atomic parameters.
+   * @param[in] nframes The number of frames.
+   * @param[in] nloc The number of local atoms.
+   * @param[in] fparam The frame parameter.
+   * @param[in] aparam The atomic parameter.
+   * @tparam VALUETYPE The type of the parameters, double or float.
+   */
+  template <typename VALUETYPE>
+  void validate_fparam_aparam(const int& nframes,
+                              const int& nloc,
+                              const std::vector<VALUETYPE>& fparam,
+                              const std::vector<VALUETYPE>& aparam) const;
+  /**
+   * @brief Tile the frame or atomic parameters if there is only
+   * a single frame of frame or atomic parameters.
+   * @param[out] out_param The tiled frame or atomic parameters.
+   * @param[in] nframes The number of frames.
+   * @param[in] dparam The dimension of the frame or atomic parameters in a
+   * frame.
+   * @param[in] param The frame or atomic parameters.
+   * @tparam VALUETYPE The type of the parameters, double or float.
+   */
+  template <typename VALUETYPE>
+  void tile_fparam_aparam(std::vector<VALUETYPE>& out_param,
+                          const int& nframes,
+                          const int& dparam,
+                          const std::vector<VALUETYPE>& param) const;
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_inner(
+      ENERGYVTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const int nghost,
+      const int& ago,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+
+  // copy neighbor list info from host
+  bool init_nbor;
+  std::vector<int> sec_a;
+  NeighborListData nlist_data;
+  InputNlist nlist;
+  AtomMap atommap;
+};
+
+}  // namespace deepmd
diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h
index 481e09cc89..7982c4f89d 100644
--- a/source/api_cc/include/common.h
+++ b/source/api_cc/include/common.h
@@ -19,6 +19,8 @@
 namespace deepmd {
 
 typedef double ENERGYTYPE;
+// TODO: currently we only implement TF; reserve for future use
+enum DPBackend { TensorFlow, PyTorch, Paddle, Unknown };
 
 struct NeighborListData {
   /// Array stores the core region atom's index
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index cd1b571153..d290565c2b 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -1,419 +1,27 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DeepPot.h"
 
+#include "common.h"
+// TODO: only include when TF backend is built
+#include <memory>
 #include <stdexcept>
 
 #include "AtomMap.h"
+#include "DeepPotTF.h"
 #include "device.h"
 
-using namespace tensorflow;
 using namespace deepmd;
 
-// start multiple frames
-
-template <typename MODELTYPE, typename VALUETYPE>
-static void run_model(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<VALUETYPE>& dforce_,
-    std::vector<VALUETYPE>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost = 0) {
-  unsigned nloc = atommap.get_type().size();
-  unsigned nall = nloc + nghost;
-  dener.resize(nframes);
-  if (nloc == 0) {
-    // no backward map needed
-    // dforce of size nall * 3
-    dforce_.resize(nframes * nall * 3);
-    fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
-    // dvirial of size 9
-    dvirial.resize(nframes * 9);
-    fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
-    return;
-  }
-
-  std::vector<Tensor> output_tensors;
-  check_status(session->Run(
-      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
-      {}, &output_tensors));
-
-  Tensor output_e = output_tensors[0];
-  Tensor output_f = output_tensors[1];
-  Tensor output_av = output_tensors[3];
-
-  auto oe = output_e.flat<ENERGYTYPE>();
-  auto of = output_f.flat<MODELTYPE>();
-  auto oav = output_av.flat<MODELTYPE>();
-
-  std::vector<VALUETYPE> dforce(nframes * 3 * nall);
-  dvirial.resize(nframes * 9);
-  for (int ii = 0; ii < nframes; ++ii) {
-    dener[ii] = oe(ii);
-  }
-  for (unsigned ii = 0; ii < nframes * nall * 3; ++ii) {
-    dforce[ii] = of(ii);
-  }
-  // set dvirial to zero, prevent input vector is not zero (#1123)
-  std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
-  for (int kk = 0; kk < nframes; ++kk) {
-    for (int ii = 0; ii < nall; ++ii) {
-      dvirial[kk * 9 + 0] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 0);
-      dvirial[kk * 9 + 1] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 1);
-      dvirial[kk * 9 + 2] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 2);
-      dvirial[kk * 9 + 3] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 3);
-      dvirial[kk * 9 + 4] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 4);
-      dvirial[kk * 9 + 5] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 5);
-      dvirial[kk * 9 + 6] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 6);
-      dvirial[kk * 9 + 7] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 7);
-      dvirial[kk * 9 + 8] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 8);
-    }
-  }
-  dforce_ = dforce;
-  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3, nframes,
-                              nall);
-}
-
-template void run_model<double, double>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<double, float>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<float, double>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<float, float>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template <typename MODELTYPE, typename VALUETYPE>
-static void run_model(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<VALUETYPE>& dforce_,
-    std::vector<VALUETYPE>& dvirial,
-    std::vector<VALUETYPE>& datom_energy_,
-    std::vector<VALUETYPE>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost = 0) {
-  unsigned nloc = atommap.get_type().size();
-  unsigned nall = nloc + nghost;
-  dener.resize(nframes);
-  if (nloc == 0) {
-    // no backward map needed
-    // dforce of size nall * 3
-    dforce_.resize(nframes * nall * 3);
-    fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
-    // dvirial of size 9
-    dvirial.resize(nframes * 9);
-    fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
-    // datom_energy_ of size nall
-    datom_energy_.resize(nframes * nall);
-    fill(datom_energy_.begin(), datom_energy_.end(), (VALUETYPE)0.0);
-    // datom_virial_ of size nall * 9
-    datom_virial_.resize(nframes * nall * 9);
-    fill(datom_virial_.begin(), datom_virial_.end(), (VALUETYPE)0.0);
-    return;
-  }
-  std::vector<Tensor> output_tensors;
-
-  check_status(session->Run(
-      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
-      {}, &output_tensors));
-
-  Tensor output_e = output_tensors[0];
-  Tensor output_f = output_tensors[1];
-  Tensor output_ae = output_tensors[2];
-  Tensor output_av = output_tensors[3];
-
-  auto oe = output_e.flat<ENERGYTYPE>();
-  auto of = output_f.flat<MODELTYPE>();
-  auto oae = output_ae.flat<MODELTYPE>();
-  auto oav = output_av.flat<MODELTYPE>();
-
-  std::vector<VALUETYPE> dforce(nframes * 3 * nall);
-  std::vector<VALUETYPE> datom_energy(nframes * nall, 0);
-  std::vector<VALUETYPE> datom_virial(nframes * 9 * nall);
-  dvirial.resize(nframes * 9);
-  for (int ii = 0; ii < nframes; ++ii) {
-    dener[ii] = oe(ii);
-  }
-  for (int ii = 0; ii < nframes * nall * 3; ++ii) {
-    dforce[ii] = of(ii);
-  }
-  for (int ii = 0; ii < nframes; ++ii) {
-    for (int jj = 0; jj < nloc; ++jj) {
-      datom_energy[ii * nall + jj] = oae(ii * nloc + jj);
-    }
-  }
-  for (int ii = 0; ii < nframes * nall * 9; ++ii) {
-    datom_virial[ii] = oav(ii);
-  }
-  // set dvirial to zero, prevent input vector is not zero (#1123)
-  std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
-  for (int kk = 0; kk < nframes; ++kk) {
-    for (int ii = 0; ii < nall; ++ii) {
-      dvirial[kk * 9 + 0] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 0];
-      dvirial[kk * 9 + 1] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 1];
-      dvirial[kk * 9 + 2] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 2];
-      dvirial[kk * 9 + 3] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 3];
-      dvirial[kk * 9 + 4] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 4];
-      dvirial[kk * 9 + 5] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 5];
-      dvirial[kk * 9 + 6] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 6];
-      dvirial[kk * 9 + 7] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 7];
-      dvirial[kk * 9 + 8] +=
-          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 8];
-    }
-  }
-  dforce_ = dforce;
-  datom_energy_ = datom_energy;
-  datom_virial_ = datom_virial;
-  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3, nframes,
-                              nall);
-  atommap.backward<VALUETYPE>(datom_energy_.begin(), datom_energy.begin(), 1,
-                              nframes, nall);
-  atommap.backward<VALUETYPE>(datom_virial_.begin(), datom_virial.begin(), 9,
-                              nframes, nall);
-}
-
-template void run_model<double, double>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<double, float>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<float, double>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<float, float>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-// end multiple frames
-
-// start single frame
-
-template <typename MODELTYPE, typename VALUETYPE>
-static void run_model(
-    ENERGYTYPE& dener,
-    std::vector<VALUETYPE>& dforce_,
-    std::vector<VALUETYPE>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes = 1,
-    const int nghost = 0) {
-  assert(nframes == 1);
-  std::vector<ENERGYTYPE> dener_(1);
-  // call multi-frame version
-  run_model<MODELTYPE, VALUETYPE>(dener_, dforce_, dvirial, session,
-                                  input_tensors, atommap, nframes, nghost);
-  dener = dener_[0];
-}
-
-template void run_model<double, double>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<double, float>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<float, double>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template void run_model<float, float>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nframes,
-    const int nghost);
-
-template <typename MODELTYPE, typename VALUETYPE>
-static void run_model(
-    ENERGYTYPE& dener,
-    std::vector<VALUETYPE>& dforce_,
-    std::vector<VALUETYPE>& dvirial,
-    std::vector<VALUETYPE>& datom_energy_,
-    std::vector<VALUETYPE>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes = 1,
-    const int& nghost = 0) {
-  assert(nframes == 1);
-  std::vector<ENERGYTYPE> dener_(1);
-  // call multi-frame version
-  run_model<MODELTYPE, VALUETYPE>(dener_, dforce_, dvirial, datom_energy_,
-                                  datom_virial_, session, input_tensors,
-                                  atommap, nframes, nghost);
-  dener = dener_[0];
-}
-
-template void run_model<double, double>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<double, float>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<float, double>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-template void run_model<float, float>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const deepmd::AtomMap& atommap,
-    const int& nframes,
-    const int& nghost);
-
-// end single frame
-
-DeepPot::DeepPot()
-    : inited(false), init_nbor(false), graph_def(new GraphDef()) {}
+DeepPot::DeepPot() : inited(false) {}
 
 DeepPot::DeepPot(const std::string& model,
                  const int& gpu_rank,
                  const std::string& file_content)
-    : inited(false), init_nbor(false), graph_def(new GraphDef()) {
-  try {
-    init(model, gpu_rank, file_content);
-  } catch (...) {
-    // Clean up and rethrow, as the destructor will not be called
-    delete graph_def;
-    throw;
-  }
+    : inited(false) {
+  init(model, gpu_rank, file_content);
 }
 
-DeepPot::~DeepPot() { delete graph_def; }
+DeepPot::~DeepPot() {}
 
 void DeepPot::init(const std::string& model,
                    const int& gpu_rank,
@@ -424,152 +32,25 @@ void DeepPot::init(const std::string& model,
               << std::endl;
     return;
   }
-  SessionOptions options;
-  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
-  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
-  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
-  deepmd::load_op_library();
-
-  if (file_content.size() == 0) {
-    check_status(ReadBinaryProto(Env::Default(), model, graph_def));
+  // TODO: To implement detect_backend
+  DPBackend backend = deepmd::DPBackend::TensorFlow;
+  if (deepmd::DPBackend::TensorFlow == backend) {
+    // TODO: throw errors if TF backend is not built, without mentioning TF
+    dp = std::make_shared<deepmd::DeepPotTF>(model, gpu_rank, file_content);
+  } else if (deepmd::DPBackend::PyTorch == backend) {
+    throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
+  } else if (deepmd::DPBackend::Paddle == backend) {
+    throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet");
   } else {
-    (*graph_def).ParseFromString(file_content);
-  }
-  int gpu_num = -1;
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  DPGetDeviceCount(gpu_num);  // check current device environment
-  if (gpu_num > 0) {
-    options.config.set_allow_soft_placement(true);
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
-        0.9);
-    options.config.mutable_gpu_options()->set_allow_growth(true);
-    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
-    graph::SetDefaultDevice(str, graph_def);
-  }
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  check_status(NewSession(options, &session));
-  check_status(session->Create(*graph_def));
-  try {
-    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  } catch (deepmd::tf_exception& e) {
-    // no model version defined in old models
-    model_version = "0.0";
-  }
-  if (!model_compatable(model_version)) {
-    throw deepmd::deepmd_exception(
-        "incompatable model: version " + model_version +
-        " in graph, but version " + global_model_version +
-        " supported "
-        "See https://deepmd.rtfd.io/compatability/ for details.");
-  }
-  dtype = session_get_dtype(session, "descrpt_attr/rcut");
-  if (dtype == tensorflow::DT_DOUBLE) {
-    rcut = get_scalar<double>("descrpt_attr/rcut");
-  } else {
-    rcut = get_scalar<float>("descrpt_attr/rcut");
-  }
-  cell_size = rcut;
-  ntypes = get_scalar<int>("descrpt_attr/ntypes");
-  try {
-    ntypes_spin = get_scalar<int>("spin_attr/ntypes_spin");
-  } catch (const deepmd::deepmd_exception) {
-    ntypes_spin = 0;
-  }
-  dfparam = get_scalar<int>("fitting_attr/dfparam");
-  daparam = get_scalar<int>("fitting_attr/daparam");
-  if (dfparam < 0) {
-    dfparam = 0;
-  }
-  if (daparam < 0) {
-    daparam = 0;
+    throw deepmd::deepmd_exception("Unknown file type");
   }
-  if (daparam > 0) {
-    try {
-      aparam_nall = get_scalar<bool>("fitting_attr/aparam_nall");
-    } catch (const deepmd::deepmd_exception) {
-      aparam_nall = false;
-    }
-  } else {
-    aparam_nall = false;
-  }
-  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
   inited = true;
-
-  init_nbor = false;
 }
 
 void DeepPot::print_summary(const std::string& pre) const {
   deepmd::print_summary(pre);
 }
 
-template <class VT>
-VT DeepPot::get_scalar(const std::string& name) const {
-  return session_get_scalar<VT>(session, name);
-}
-
-template <typename VALUETYPE>
-void DeepPot::validate_fparam_aparam(
-    const int& nframes,
-    const int& nloc,
-    const std::vector<VALUETYPE>& fparam,
-    const std::vector<VALUETYPE>& aparam) const {
-  if (fparam.size() != dfparam && fparam.size() != nframes * dfparam) {
-    throw deepmd::deepmd_exception(
-        "the dim of frame parameter provided is not consistent with what the "
-        "model uses");
-  }
-
-  if (aparam.size() != daparam * nloc &&
-      aparam.size() != nframes * daparam * nloc) {
-    throw deepmd::deepmd_exception(
-        "the dim of atom parameter provided is not consistent with what the "
-        "model uses");
-  }
-}
-
-template void DeepPot::validate_fparam_aparam<double>(
-    const int& nframes,
-    const int& nloc,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam) const;
-
-template void DeepPot::validate_fparam_aparam<float>(
-    const int& nframes,
-    const int& nloc,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam) const;
-
-template <typename VALUETYPE>
-void DeepPot::tile_fparam_aparam(std::vector<VALUETYPE>& out_param,
-                                 const int& nframes,
-                                 const int& dparam,
-                                 const std::vector<VALUETYPE>& param) const {
-  if (param.size() == dparam) {
-    out_param.resize(nframes * dparam);
-    for (int ii = 0; ii < nframes; ++ii) {
-      std::copy(param.begin(), param.end(), out_param.begin() + ii * dparam);
-    }
-  } else if (param.size() == nframes * dparam) {
-    out_param = param;
-  }
-}
-
-template void DeepPot::tile_fparam_aparam<double>(
-    std::vector<double>& out_param,
-    const int& nframes,
-    const int& dparam,
-    const std::vector<double>& param) const;
-
-template void DeepPot::tile_fparam_aparam<float>(
-    std::vector<float>& out_param,
-    const int& nframes,
-    const int& dparam,
-    const std::vector<float>& param) const;
-
-// ENERGYVTYPE: std::vector<ENERGYTYPE> or ENERGYTYPE
-
 template <typename VALUETYPE, typename ENERGYVTYPE>
 void DeepPot::compute(ENERGYVTYPE& dener,
                       std::vector<VALUETYPE>& dforce_,
@@ -579,37 +60,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& dbox,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam_) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  int nloc = nall;
-  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam;
-  validate_fparam_aparam(nframes, (aparam_nall ? nall : nloc), fparam_,
-                         aparam_);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam, nframes, (aparam_nall ? nall : nloc) * daparam,
-                     aparam_);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
-                                            datype_, dbox, cell_size, fparam,
-                                            aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                      nframes);
-  } else {
-    int ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
-                                           datype_, dbox, cell_size, fparam,
-                                           aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                     nframes);
-  }
+  dp->computew(dener, dforce_, dvirial, dcoord_, datype_, dbox, fparam_,
+               aparam_);
 }
 
 template void DeepPot::compute<double, ENERGYTYPE>(
@@ -664,37 +116,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const int& ago,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam__) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam_;
-  validate_fparam_aparam(nframes, (aparam_nall ? nall : (nall - nghost)),
-                         fparam_, aparam__);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam_, nframes,
-                     (aparam_nall ? nall : (nall - nghost)) * daparam,
-                     aparam__);
-
-  // select real atoms
-  std::vector<VALUETYPE> dcoord, dforce, aparam;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
-
-  // internal nlist
-  if (ago == 0) {
-    nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);
-  }
-  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, ago,
-                fparam, aparam);
-  // bkw map
-  dforce_.resize(nframes * fwd_map.size() * 3);
-  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3, nframes, fwd_map.size(),
-                        bkw_map.size());
+  dp->computew(dener, dforce_, dvirial, dcoord_, datype_, dbox, nghost,
+               lmp_list, ago, fparam_, aparam__);
 }
 
 template void DeepPot::compute<double, ENERGYTYPE>(
@@ -749,96 +172,6 @@ template void DeepPot::compute<float, std::vector<ENERGYTYPE>>(
     const std::vector<float>& fparam,
     const std::vector<float>& aparam_);
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute_inner(ENERGYVTYPE& dener,
-                            std::vector<VALUETYPE>& dforce_,
-                            std::vector<VALUETYPE>& dvirial,
-                            const std::vector<VALUETYPE>& dcoord_,
-                            const std::vector<int>& datype_,
-                            const std::vector<VALUETYPE>& dbox,
-                            const int nghost,
-                            const int& ago,
-                            const std::vector<VALUETYPE>& fparam,
-                            const std::vector<VALUETYPE>& aparam) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  int nloc = nall - nghost;
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  // agp == 0 means that the LAMMPS nbor list has been updated
-  if (ago == 0) {
-    atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-    assert(nloc == atommap.get_type().size());
-    nlist_data.shuffle(atommap);
-    nlist_data.make_inlist(nlist);
-  }
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
-        atommap, nghost, ago, "", aparam_nall);
-    assert(nloc == ret);
-    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                      nframes, nghost);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
-        atommap, nghost, ago, "", aparam_nall);
-    assert(nloc == ret);
-    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                     nframes, nghost);
-  }
-}
-
-template void DeepPot::compute_inner<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPot::compute_inner<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template void DeepPot::compute_inner<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPot::compute_inner<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
 template <typename VALUETYPE, typename ENERGYVTYPE>
 void DeepPot::compute(ENERGYVTYPE& dener,
                       std::vector<VALUETYPE>& dforce_,
@@ -850,31 +183,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& dbox,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam_) {
-  // if datype.size is 0, not clear nframes; but 1 is just ok
-  int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
-  atommap = deepmd::AtomMap(datype_.begin(), datype_.end());
-  int nloc = datype_.size();
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam;
-  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int nloc = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
-                                             datype_, dbox, cell_size, fparam,
-                                             aparam, atommap, "", aparam_nall);
-    run_model<double>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
-                      session, input_tensors, atommap, nframes);
-  } else {
-    int nloc = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
-                                            datype_, dbox, cell_size, fparam,
-                                            aparam, atommap, "", aparam_nall);
-    run_model<float>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
-                     session, input_tensors, atommap, nframes);
-  }
+  dp->computew(dener, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, fparam_, aparam_);
 }
 
 template void DeepPot::compute<double, ENERGYTYPE>(
@@ -939,62 +249,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const int& ago,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam__) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  int nloc = nall - nghost;
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam_;
-  validate_fparam_aparam(nframes, (aparam_nall ? nall : nloc), fparam_,
-                         aparam__);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam_, nframes, (aparam_nall ? nall : nloc) * daparam,
-                     aparam__);
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-  // select real atoms
-  std::vector<VALUETYPE> dcoord, dforce, aparam, datom_energy, datom_virial;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
-
-  if (ago == 0) {
-    atommap = deepmd::AtomMap(datype.begin(), datype.begin() + nloc_real);
-    assert(nloc_real == atommap.get_type().size());
-
-    nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);
-    nlist_data.shuffle(atommap);
-    nlist_data.make_inlist(nlist);
-  }
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam,
-        atommap, nghost_real, ago, "", aparam_nall);
-    assert(nloc_real == ret);
-    run_model<double>(dener, dforce, dvirial, datom_energy, datom_virial,
-                      session, input_tensors, atommap, nframes, nghost_real);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam,
-        atommap, nghost_real, ago, "", aparam_nall);
-    assert(nloc_real == ret);
-    run_model<float>(dener, dforce, dvirial, datom_energy, datom_virial,
-                     session, input_tensors, atommap, nframes, nghost_real);
-  }
-
-  // bkw map
-  dforce_.resize(nframes * fwd_map.size() * 3);
-  datom_energy_.resize(nframes * fwd_map.size());
-  datom_virial_.resize(nframes * fwd_map.size() * 9);
-  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3, nframes, fwd_map.size(),
-                        nall_real);
-  select_map<VALUETYPE>(datom_energy_, datom_energy, bkw_map, 1, nframes,
-                        fwd_map.size(), nall_real);
-  select_map<VALUETYPE>(datom_virial_, datom_virial, bkw_map, 9, nframes,
-                        fwd_map.size(), nall_real);
+  dp->computew(dener, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, nghost, lmp_list, ago, fparam_, aparam__);
 }
 
 template void DeepPot::compute<double, ENERGYTYPE>(
@@ -1068,32 +324,8 @@ void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
                                  const std::vector<VALUETYPE>& dbox,
                                  const std::vector<VALUETYPE>& fparam_,
                                  const std::vector<VALUETYPE>& aparam_) {
-  int nloc = datype_.size() / nframes;
-  // here atommap only used to get nloc
-  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam;
-  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors_mixed_type<double>(
-        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
-        fparam, aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                      nframes);
-  } else {
-    int ret = session_input_tensors_mixed_type<float>(
-        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
-        fparam, aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                     nframes);
-  }
+  dp->computew_mixed_type(dener, dforce_, dvirial, nframes, dcoord_, datype_,
+                          dbox, fparam_, aparam_);
 }
 
 template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
@@ -1152,30 +384,8 @@ void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
                                  const std::vector<VALUETYPE>& dbox,
                                  const std::vector<VALUETYPE>& fparam_,
                                  const std::vector<VALUETYPE>& aparam_) {
-  int nloc = datype_.size() / nframes;
-  // here atommap only used to get nloc
-  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam;
-  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int nloc = session_input_tensors_mixed_type<double>(
-        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
-        fparam, aparam, atommap, "", aparam_nall);
-    run_model<double>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
-                      session, input_tensors, atommap, nframes);
-  } else {
-    int nloc = session_input_tensors_mixed_type<float>(
-        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
-        fparam, aparam, atommap, "", aparam_nall);
-    run_model<float>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
-                     session, input_tensors, atommap, nframes);
-  }
+  dp->computew_mixed_type(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                          nframes, dcoord_, datype_, dbox, fparam_, aparam_);
 }
 
 template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
@@ -1230,10 +440,22 @@ template void DeepPot::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
     const std::vector<float>& fparam,
     const std::vector<float>& aparam);
 
+double DeepPot::cutoff() const { return dp->cutoff(); }
+
+int DeepPot::numb_types() const { return dp->numb_types(); }
+
+int DeepPot::numb_types_spin() const { return dp->numb_types_spin(); }
+
+int DeepPot::dim_fparam() const { return dp->dim_fparam(); }
+
+int DeepPot::dim_aparam() const { return dp->dim_aparam(); }
+
 void DeepPot::get_type_map(std::string& type_map) {
-  type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
+  dp->get_type_map(type_map);
 }
 
+bool DeepPot::is_aparam_nall() const { return dp->is_aparam_nall(); }
+
 DeepPotModelDevi::DeepPotModelDevi() : inited(false), numb_models(0) {}
 
 DeepPotModelDevi::DeepPotModelDevi(
diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc
new file mode 100644
index 0000000000..ed3bede4df
--- /dev/null
+++ b/source/api_cc/src/DeepPotTF.cc
@@ -0,0 +1,1556 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include "DeepPotTF.h"
+
+#include <stdexcept>
+
+#include "AtomMap.h"
+#include "common.h"
+#include "device.h"
+
+using namespace tensorflow;
+using namespace deepmd;
+
+// start multiple frames
+
+template <typename MODELTYPE, typename VALUETYPE>
+static void run_model(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost = 0) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  dener.resize(nframes);
+  if (nloc == 0) {
+    // no backward map needed
+    // dforce of size nall * 3
+    dforce_.resize(static_cast<size_t>(nframes) * nall * 3);
+    fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
+    // dvirial of size 9
+    dvirial.resize(static_cast<size_t>(nframes) * 9);
+    fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  check_status(session->Run(
+      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
+      {}, &output_tensors));
+
+  Tensor output_e = output_tensors[0];
+  Tensor output_f = output_tensors[1];
+  Tensor output_av = output_tensors[3];
+
+  auto oe = output_e.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
+
+  std::vector<VALUETYPE> dforce(static_cast<size_t>(nframes) * 3 * nall);
+  dvirial.resize(static_cast<size_t>(nframes) * 9);
+  for (int ii = 0; ii < nframes; ++ii) {
+    dener[ii] = oe(ii);
+  }
+  for (size_t ii = 0; ii < static_cast<size_t>(nframes) * nall * 3; ++ii) {
+    dforce[ii] = of(ii);
+  }
+  // set dvirial to zero, prevent input vector is not zero (#1123)
+  std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
+  for (int kk = 0; kk < nframes; ++kk) {
+    for (int ii = 0; ii < nall; ++ii) {
+      dvirial[kk * 9 + 0] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 0);
+      dvirial[kk * 9 + 1] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 1);
+      dvirial[kk * 9 + 2] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 2);
+      dvirial[kk * 9 + 3] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 3);
+      dvirial[kk * 9 + 4] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 4);
+      dvirial[kk * 9 + 5] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 5);
+      dvirial[kk * 9 + 6] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 6);
+      dvirial[kk * 9 + 7] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 7);
+      dvirial[kk * 9 + 8] += (VALUETYPE)1.0 * oav(kk * nall * 9 + 9 * ii + 8);
+    }
+  }
+  dforce_ = dforce;
+  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3, nframes,
+                              nall);
+}
+
+template void run_model<double, double>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<double, float>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<float, double>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<float, float>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template <typename MODELTYPE, typename VALUETYPE>
+static void run_model(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    std::vector<VALUETYPE>& datom_energy_,
+    std::vector<VALUETYPE>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost = 0) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  dener.resize(nframes);
+  if (nloc == 0) {
+    // no backward map needed
+    // dforce of size nall * 3
+    dforce_.resize(static_cast<size_t>(nframes) * nall * 3);
+    fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
+    // dvirial of size 9
+    dvirial.resize(static_cast<size_t>(nframes) * 9);
+    fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
+    // datom_energy_ of size nall
+    datom_energy_.resize(static_cast<size_t>(nframes) * nall);
+    fill(datom_energy_.begin(), datom_energy_.end(), (VALUETYPE)0.0);
+    // datom_virial_ of size nall * 9
+    datom_virial_.resize(static_cast<size_t>(nframes) * nall * 9);
+    fill(datom_virial_.begin(), datom_virial_.end(), (VALUETYPE)0.0);
+    return;
+  }
+  std::vector<Tensor> output_tensors;
+
+  check_status(session->Run(
+      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
+      {}, &output_tensors));
+
+  Tensor output_e = output_tensors[0];
+  Tensor output_f = output_tensors[1];
+  Tensor output_ae = output_tensors[2];
+  Tensor output_av = output_tensors[3];
+
+  auto oe = output_e.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto oae = output_ae.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
+
+  std::vector<VALUETYPE> dforce(static_cast<size_t>(nframes) * 3 * nall);
+  std::vector<VALUETYPE> datom_energy(static_cast<size_t>(nframes) * nall, 0);
+  std::vector<VALUETYPE> datom_virial(static_cast<size_t>(nframes) * 9 * nall);
+  dvirial.resize(static_cast<size_t>(nframes) * 9);
+  for (int ii = 0; ii < nframes; ++ii) {
+    dener[ii] = oe(ii);
+  }
+  for (size_t ii = 0; ii < static_cast<size_t>(nframes) * nall * 3; ++ii) {
+    dforce[ii] = of(ii);
+  }
+  for (int ii = 0; ii < nframes; ++ii) {
+    for (int jj = 0; jj < nloc; ++jj) {
+      datom_energy[ii * nall + jj] = oae(ii * nloc + jj);
+    }
+  }
+  for (size_t ii = 0; ii < static_cast<size_t>(nframes) * nall * 9; ++ii) {
+    datom_virial[ii] = oav(ii);
+  }
+  // set dvirial to zero, prevent input vector is not zero (#1123)
+  std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
+  for (int kk = 0; kk < nframes; ++kk) {
+    for (int ii = 0; ii < nall; ++ii) {
+      dvirial[kk * 9 + 0] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 0];
+      dvirial[kk * 9 + 1] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 1];
+      dvirial[kk * 9 + 2] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 2];
+      dvirial[kk * 9 + 3] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 3];
+      dvirial[kk * 9 + 4] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 4];
+      dvirial[kk * 9 + 5] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 5];
+      dvirial[kk * 9 + 6] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 6];
+      dvirial[kk * 9 + 7] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 7];
+      dvirial[kk * 9 + 8] +=
+          (VALUETYPE)1.0 * datom_virial[kk * nall * 9 + 9 * ii + 8];
+    }
+  }
+  dforce_ = dforce;
+  datom_energy_ = datom_energy;
+  datom_virial_ = datom_virial;
+  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3, nframes,
+                              nall);
+  atommap.backward<VALUETYPE>(datom_energy_.begin(), datom_energy.begin(), 1,
+                              nframes, nall);
+  atommap.backward<VALUETYPE>(datom_virial_.begin(), datom_virial.begin(), 9,
+                              nframes, nall);
+}
+
+template void run_model<double, double>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<double, float>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<float, double>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<float, float>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+// end multiple frames
+
+// start single frame
+
+template <typename MODELTYPE, typename VALUETYPE>
+static void run_model(
+    ENERGYTYPE& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes = 1,
+    const int nghost = 0) {
+  assert(nframes == 1);
+  std::vector<ENERGYTYPE> dener_(1);
+  // call multi-frame version
+  run_model<MODELTYPE, VALUETYPE>(dener_, dforce_, dvirial, session,
+                                  input_tensors, atommap, nframes, nghost);
+  dener = dener_[0];
+}
+
+template void run_model<double, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<double, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<float, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template void run_model<float, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nframes,
+    const int nghost);
+
+template <typename MODELTYPE, typename VALUETYPE>
+static void run_model(
+    ENERGYTYPE& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    std::vector<VALUETYPE>& datom_energy_,
+    std::vector<VALUETYPE>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes = 1,
+    const int& nghost = 0) {
+  assert(nframes == 1);
+  std::vector<ENERGYTYPE> dener_(1);
+  // call multi-frame version
+  run_model<MODELTYPE, VALUETYPE>(dener_, dforce_, dvirial, datom_energy_,
+                                  datom_virial_, session, input_tensors,
+                                  atommap, nframes, nghost);
+  dener = dener_[0];
+}
+
+template void run_model<double, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<double, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<float, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+template void run_model<float, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nframes,
+    const int& nghost);
+
+// end single frame
+
+DeepPotTF::DeepPotTF()
+    : inited(false), init_nbor(false), graph_def(new GraphDef()) {}
+
+DeepPotTF::DeepPotTF(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content)
+    : inited(false), init_nbor(false), graph_def(new GraphDef()) {
+  try {
+    init(model, gpu_rank, file_content);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
+}
+
+DeepPotTF::~DeepPotTF() { delete graph_def; }
+
+void DeepPotTF::init(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
+  }
+  SessionOptions options;
+  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
+  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
+  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
+  deepmd::load_op_library();
+
+  if (file_content.size() == 0) {
+    check_status(ReadBinaryProto(Env::Default(), model, graph_def));
+  } else {
+    (*graph_def).ParseFromString(file_content);
+  }
+  int gpu_num = -1;
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  DPGetDeviceCount(gpu_num);  // check current device environment
+  if (gpu_num > 0) {
+    options.config.set_allow_soft_placement(true);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
+        0.9);
+    options.config.mutable_gpu_options()->set_allow_growth(true);
+    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
+    std::string str = "/gpu:";
+    str += std::to_string(gpu_rank % gpu_num);
+    graph::SetDefaultDevice(str, graph_def);
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  check_status(NewSession(options, &session));
+  check_status(session->Create(*graph_def));
+  try {
+    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
+  } catch (deepmd::tf_exception& e) {
+    // no model version defined in old models
+    model_version = "0.0";
+  }
+  if (!model_compatable(model_version)) {
+    throw deepmd::deepmd_exception(
+        "incompatable model: version " + model_version +
+        " in graph, but version " + global_model_version +
+        " supported "
+        "See https://deepmd.rtfd.io/compatability/ for details.");
+  }
+  dtype = session_get_dtype(session, "descrpt_attr/rcut");
+  if (dtype == tensorflow::DT_DOUBLE) {
+    rcut = get_scalar<double>("descrpt_attr/rcut");
+  } else {
+    rcut = get_scalar<float>("descrpt_attr/rcut");
+  }
+  cell_size = rcut;
+  ntypes = get_scalar<int>("descrpt_attr/ntypes");
+  try {
+    ntypes_spin = get_scalar<int>("spin_attr/ntypes_spin");
+  } catch (const deepmd::deepmd_exception&) {
+    ntypes_spin = 0;
+  }
+  dfparam = get_scalar<int>("fitting_attr/dfparam");
+  daparam = get_scalar<int>("fitting_attr/daparam");
+  if (dfparam < 0) {
+    dfparam = 0;
+  }
+  if (daparam < 0) {
+    daparam = 0;
+  }
+  if (daparam > 0) {
+    try {
+      aparam_nall = get_scalar<bool>("fitting_attr/aparam_nall");
+    } catch (const deepmd::deepmd_exception&) {
+      aparam_nall = false;
+    }
+  } else {
+    aparam_nall = false;
+  }
+  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
+  inited = true;
+
+  init_nbor = false;
+}
+
+template <class VT>
+VT DeepPotTF::get_scalar(const std::string& name) const {
+  return session_get_scalar<VT>(session, name);
+}
+
+template <typename VALUETYPE>
+void DeepPotTF::validate_fparam_aparam(
+    const int& nframes,
+    const int& nloc,
+    const std::vector<VALUETYPE>& fparam,
+    const std::vector<VALUETYPE>& aparam) const {
+  if (fparam.size() != dfparam &&
+      fparam.size() != static_cast<size_t>(nframes) * dfparam) {
+    throw deepmd::deepmd_exception(
+        "the dim of frame parameter provided is not consistent with what the "
+        "model uses");
+  }
+
+  if (aparam.size() != static_cast<size_t>(daparam) * nloc &&
+      aparam.size() != static_cast<size_t>(nframes) * daparam * nloc) {
+    throw deepmd::deepmd_exception(
+        "the dim of atom parameter provided is not consistent with what the "
+        "model uses");
+  }
+}
+
+template void DeepPotTF::validate_fparam_aparam<double>(
+    const int& nframes,
+    const int& nloc,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam) const;
+
+template void DeepPotTF::validate_fparam_aparam<float>(
+    const int& nframes,
+    const int& nloc,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam) const;
+
+template <typename VALUETYPE>
+void DeepPotTF::tile_fparam_aparam(std::vector<VALUETYPE>& out_param,
+                                   const int& nframes,
+                                   const int& dparam,
+                                   const std::vector<VALUETYPE>& param) const {
+  if (param.size() == dparam) {
+    out_param.resize(static_cast<size_t>(nframes) * dparam);
+    for (int ii = 0; ii < nframes; ++ii) {
+      std::copy(param.begin(), param.end(),
+                out_param.begin() + static_cast<unsigned long>(ii) * dparam);
+    }
+  } else if (param.size() == static_cast<size_t>(nframes) * dparam) {
+    out_param = param;
+  }
+}
+
+template void DeepPotTF::tile_fparam_aparam<double>(
+    std::vector<double>& out_param,
+    const int& nframes,
+    const int& dparam,
+    const std::vector<double>& param) const;
+
+template void DeepPotTF::tile_fparam_aparam<float>(
+    std::vector<float>& out_param,
+    const int& nframes,
+    const int& dparam,
+    const std::vector<float>& param) const;
+
+// ENERGYVTYPE: std::vector<ENERGYTYPE> or ENERGYTYPE
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute(ENERGYVTYPE& dener,
+                        std::vector<VALUETYPE>& dforce_,
+                        std::vector<VALUETYPE>& dvirial,
+                        const std::vector<VALUETYPE>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<VALUETYPE>& dbox,
+                        const std::vector<VALUETYPE>& fparam_,
+                        const std::vector<VALUETYPE>& aparam_) {
+  int nall = datype_.size();
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
+  int nloc = nall;
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam;
+  validate_fparam_aparam(nframes, (aparam_nall ? nall : nloc), fparam_,
+                         aparam_);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam, nframes, (aparam_nall ? nall : nloc) * daparam,
+                     aparam_);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
+                                            datype_, dbox, cell_size, fparam,
+                                            aparam, atommap, "", aparam_nall);
+    assert(ret == nloc);
+    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                      nframes);
+  } else {
+    int ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
+                                           datype_, dbox, cell_size, fparam,
+                                           aparam, atommap, "", aparam_nall);
+    assert(ret == nloc);
+    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                     nframes);
+  }
+}
+
+template void DeepPotTF::compute<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute(ENERGYVTYPE& dener,
+                        std::vector<VALUETYPE>& dforce_,
+                        std::vector<VALUETYPE>& dvirial,
+                        const std::vector<VALUETYPE>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<VALUETYPE>& dbox,
+                        const int nghost,
+                        const InputNlist& lmp_list,
+                        const int& ago,
+                        const std::vector<VALUETYPE>& fparam_,
+                        const std::vector<VALUETYPE>& aparam__) {
+  int nall = datype_.size();
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam_;
+  validate_fparam_aparam(nframes, (aparam_nall ? nall : (nall - nghost)),
+                         fparam_, aparam__);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam_, nframes,
+                     (aparam_nall ? nall : (nall - nghost)) * daparam,
+                     aparam__);
+
+  // select real atoms
+  std::vector<VALUETYPE> dcoord, dforce, aparam;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_,
+                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
+
+  // internal nlist
+  if (ago == 0) {
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle_exclude_empty(fwd_map);
+  }
+  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, ago,
+                fparam, aparam);
+  // bkw map
+  dforce_.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
+  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3, nframes, fwd_map.size(),
+                        bkw_map.size());
+}
+
+template void DeepPotTF::compute<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam_);
+
+template void DeepPotTF::compute<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam_);
+
+template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam_);
+
+template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam_);
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute_inner(ENERGYVTYPE& dener,
+                              std::vector<VALUETYPE>& dforce_,
+                              std::vector<VALUETYPE>& dvirial,
+                              const std::vector<VALUETYPE>& dcoord_,
+                              const std::vector<int>& datype_,
+                              const std::vector<VALUETYPE>& dbox,
+                              const int nghost,
+                              const int& ago,
+                              const std::vector<VALUETYPE>& fparam,
+                              const std::vector<VALUETYPE>& aparam) {
+  int nall = datype_.size();
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
+  int nloc = nall - nghost;
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  // agp == 0 means that the LAMMPS nbor list has been updated
+  if (ago == 0) {
+    atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+    assert(nloc == atommap.get_type().size());
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
+        atommap, nghost, ago, "", aparam_nall);
+    assert(nloc == ret);
+    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                      nframes, nghost);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
+        atommap, nghost, ago, "", aparam_nall);
+    assert(nloc == ret);
+    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                     nframes, nghost);
+  }
+}
+
+template void DeepPotTF::compute_inner<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_inner<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template void DeepPotTF::compute_inner<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_inner<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute(ENERGYVTYPE& dener,
+                        std::vector<VALUETYPE>& dforce_,
+                        std::vector<VALUETYPE>& dvirial,
+                        std::vector<VALUETYPE>& datom_energy_,
+                        std::vector<VALUETYPE>& datom_virial_,
+                        const std::vector<VALUETYPE>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<VALUETYPE>& dbox,
+                        const std::vector<VALUETYPE>& fparam_,
+                        const std::vector<VALUETYPE>& aparam_) {
+  // if datype.size is 0, not clear nframes; but 1 is just ok
+  int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.end());
+  int nloc = datype_.size();
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam;
+  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
+                                            datype_, dbox, cell_size, fparam,
+                                            aparam, atommap, "", aparam_nall);
+    run_model<double>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                      session, input_tensors, atommap, nframes);
+  } else {
+    int ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
+                                           datype_, dbox, cell_size, fparam,
+                                           aparam, atommap, "", aparam_nall);
+    run_model<float>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                     session, input_tensors, atommap, nframes);
+  }
+}
+
+template void DeepPotTF::compute<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute(ENERGYVTYPE& dener,
+                        std::vector<VALUETYPE>& dforce_,
+                        std::vector<VALUETYPE>& dvirial,
+                        std::vector<VALUETYPE>& datom_energy_,
+                        std::vector<VALUETYPE>& datom_virial_,
+                        const std::vector<VALUETYPE>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<VALUETYPE>& dbox,
+                        const int nghost,
+                        const InputNlist& lmp_list,
+                        const int& ago,
+                        const std::vector<VALUETYPE>& fparam_,
+                        const std::vector<VALUETYPE>& aparam__) {
+  int nall = datype_.size();
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
+  int nloc = nall - nghost;
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam_;
+  validate_fparam_aparam(nframes, (aparam_nall ? nall : nloc), fparam_,
+                         aparam__);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam_, nframes, (aparam_nall ? nall : nloc) * daparam,
+                     aparam__);
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+  // select real atoms
+  std::vector<VALUETYPE> dcoord, dforce, aparam, datom_energy, datom_virial;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_,
+                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
+
+  if (ago == 0) {
+    atommap = deepmd::AtomMap(datype.begin(), datype.begin() + nloc_real);
+    assert(nloc_real == atommap.get_type().size());
+
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle_exclude_empty(fwd_map);
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam,
+        atommap, nghost_real, ago, "", aparam_nall);
+    assert(nloc_real == ret);
+    run_model<double>(dener, dforce, dvirial, datom_energy, datom_virial,
+                      session, input_tensors, atommap, nframes, nghost_real);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam,
+        atommap, nghost_real, ago, "", aparam_nall);
+    assert(nloc_real == ret);
+    run_model<float>(dener, dforce, dvirial, datom_energy, datom_virial,
+                     session, input_tensors, atommap, nframes, nghost_real);
+  }
+
+  // bkw map
+  dforce_.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
+  datom_energy_.resize(static_cast<size_t>(nframes) * fwd_map.size());
+  datom_virial_.resize(static_cast<size_t>(nframes) * fwd_map.size() * 9);
+  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3, nframes, fwd_map.size(),
+                        nall_real);
+  select_map<VALUETYPE>(datom_energy_, datom_energy, bkw_map, 1, nframes,
+                        fwd_map.size(), nall_real);
+  select_map<VALUETYPE>(datom_virial_, datom_virial, bkw_map, 9, nframes,
+                        fwd_map.size(), nall_real);
+}
+
+template void DeepPotTF::compute<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam_);
+
+template void DeepPotTF::compute<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam_);
+
+template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam_);
+
+template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam_);
+
+// mixed type
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute_mixed_type(ENERGYVTYPE& dener,
+                                   std::vector<VALUETYPE>& dforce_,
+                                   std::vector<VALUETYPE>& dvirial,
+                                   const int& nframes,
+                                   const std::vector<VALUETYPE>& dcoord_,
+                                   const std::vector<int>& datype_,
+                                   const std::vector<VALUETYPE>& dbox,
+                                   const std::vector<VALUETYPE>& fparam_,
+                                   const std::vector<VALUETYPE>& aparam_) {
+  int nloc = datype_.size() / nframes;
+  // here atommap only used to get nloc
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam;
+  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors_mixed_type<double>(
+        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
+        fparam, aparam, atommap, "", aparam_nall);
+    assert(ret == nloc);
+    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                      nframes);
+  } else {
+    int ret = session_input_tensors_mixed_type<float>(
+        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
+        fparam, aparam, atommap, "", aparam_nall);
+    assert(ret == nloc);
+    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                     nframes);
+  }
+}
+
+template void DeepPotTF::compute_mixed_type<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const int& nframes,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_mixed_type<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const int& nframes,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template void DeepPotTF::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    const int& nframes,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    const int& nframes,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotTF::compute_mixed_type(ENERGYVTYPE& dener,
+                                   std::vector<VALUETYPE>& dforce_,
+                                   std::vector<VALUETYPE>& dvirial,
+                                   std::vector<VALUETYPE>& datom_energy_,
+                                   std::vector<VALUETYPE>& datom_virial_,
+                                   const int& nframes,
+                                   const std::vector<VALUETYPE>& dcoord_,
+                                   const std::vector<int>& datype_,
+                                   const std::vector<VALUETYPE>& dbox,
+                                   const std::vector<VALUETYPE>& fparam_,
+                                   const std::vector<VALUETYPE>& aparam_) {
+  int nloc = datype_.size() / nframes;
+  // here atommap only used to get nloc
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+  std::vector<VALUETYPE> fparam;
+  std::vector<VALUETYPE> aparam;
+  validate_fparam_aparam(nframes, nloc, fparam_, aparam_);
+  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
+  tile_fparam_aparam(aparam, nframes, nloc * daparam, aparam_);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int nloc = session_input_tensors_mixed_type<double>(
+        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
+        fparam, aparam, atommap, "", aparam_nall);
+    run_model<double>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                      session, input_tensors, atommap, nframes);
+  } else {
+    int nloc = session_input_tensors_mixed_type<float>(
+        input_tensors, nframes, dcoord_, ntypes, datype_, dbox, cell_size,
+        fparam, aparam, atommap, "", aparam_nall);
+    run_model<float>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                     session, input_tensors, atommap, nframes);
+  }
+}
+
+template void DeepPotTF::compute_mixed_type<double, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const int& nframes,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_mixed_type<float, ENERGYTYPE>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const int& nframes,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+template void DeepPotTF::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    const int& nframes,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotTF::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    const int& nframes,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+
+void DeepPotTF::get_type_map(std::string& type_map) {
+  type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
+}
+
+// forward to template method
+void DeepPotTF::computew(double& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, coord, atype, box, fparam, aparam);
+}
+
+void DeepPotTF::computew(double& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew(double& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
+          aparam);
+}
+
+void DeepPotTF::computew(double& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
+          aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
+          aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
+          aparam);
+}
+void DeepPotTF::computew(double& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+
+void DeepPotTF::computew(double& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+void DeepPotTF::computew(double& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+
+void DeepPotTF::computew(double& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+void DeepPotTF::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+void DeepPotTF::computew_mixed_type(double& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam) {
+  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
+                     aparam);
+}
+void DeepPotTF::computew_mixed_type(double& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam) {
+  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
+                     aparam);
+}
+void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam) {
+  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
+                     aparam);
+}
+void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam) {
+  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
+                     aparam);
+}
+void DeepPotTF::computew_mixed_type(double& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    std::vector<double>& atom_energy,
+                                    std::vector<double>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam) {
+  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
+                     coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew_mixed_type(double& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    std::vector<float>& atom_energy,
+                                    std::vector<float>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam) {
+  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
+                     coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    std::vector<double>& atom_energy,
+                                    std::vector<double>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam) {
+  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
+                     coord, atype, box, fparam, aparam);
+}
+void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    std::vector<float>& atom_energy,
+                                    std::vector<float>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam) {
+  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
+                     coord, atype, box, fparam, aparam);
+}

From ed76cff1baf25e0dec8d3047a6451a8f6100af7a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 16 Jan 2024 20:54:45 -0500
Subject: [PATCH 85/97] resolve "Multiplication result converted to larger
 type" (#3149)

cast `int` to `int64_t` (for `AddDim`) or `size_t` (for `resize`)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_c/include/deepmd.hpp               | 70 +++++++++----------
 source/api_c/tests/test_deepdipole_hpp.cc     |  2 +-
 source/api_c/tests/test_deeppolar_hpp.cc      |  2 +-
 .../test_deeppot_a_fparam_aparam_nframes.cc   |  4 +-
 .../api_c/tests/test_deeppot_a_nframes_hpp.cc |  4 +-
 source/api_c/tests/test_utils.h               |  2 +-
 source/api_cc/src/DataModifier.cc             |  8 +--
 source/api_cc/src/DeepTensor.cc               |  4 +-
 source/api_cc/src/common.cc                   | 11 +--
 source/api_cc/tests/test_deepdipole.cc        |  2 +-
 source/api_cc/tests/test_deeppolar.cc         |  2 +-
 .../test_deeppot_a_fparam_aparam_nframes.cc   |  4 +-
 source/api_cc/tests/test_deeppot_a_nframes.cc |  4 +-
 source/api_cc/tests/test_utils.h              |  2 +-
 source/ipi/driver.cc                          | 10 +--
 source/lib/src/ewald.cc                       |  4 +-
 source/lib/src/neighbor_list.cc               |  2 +-
 source/lib/src/pairwise.cc                    |  4 +-
 source/lib/src/prod_env_mat.cc                |  5 +-
 source/lib/tests/test_coord.cc                |  8 +--
 source/lib/tests/test_map_aparam.cc           |  4 +-
 source/lib/tests/test_pair_tab.cc             |  8 +--
 source/lib/tests/test_prod_force_a.cc         | 10 +--
 source/lib/tests/test_prod_force_grad_a.cc    | 10 +--
 source/lib/tests/test_prod_force_grad_r.cc    | 10 +--
 source/lib/tests/test_prod_force_r.cc         | 10 +--
 source/lib/tests/test_prod_virial_a.cc        | 10 +--
 source/lib/tests/test_prod_virial_grad_a.cc   |  8 +--
 source/lib/tests/test_prod_virial_grad_r.cc   |  8 +--
 source/lib/tests/test_prod_virial_r.cc        | 10 +--
 source/lib/tests/test_soft_min_switch.cc      |  4 +-
 .../lib/tests/test_soft_min_switch_force.cc   |  6 +-
 .../tests/test_soft_min_switch_force_grad.cc  |  8 +--
 .../lib/tests/test_soft_min_switch_virial.cc  |  6 +-
 .../tests/test_soft_min_switch_virial_grad.cc |  8 +--
 source/lmp/fix_dplr.cpp                       |  2 +-
 source/lmp/pair_deepmd.cpp                    | 10 +--
 source/lmp/pppm_dplr.cpp                      |  6 +-
 source/md/src/Convert.cc                      |  4 +-
 source/md/src/Tabulated.cc                    |  2 +-
 source/op/descrpt.cc                          | 12 ++--
 source/op/descrpt_se_a_ef.cc                  |  8 +--
 source/op/descrpt_se_a_ef_para.cc             |  8 +--
 source/op/descrpt_se_a_ef_vert.cc             |  8 +--
 source/op/descrpt_se_a_mask.cc                | 10 +--
 source/op/ewald_recp.cc                       |  2 +-
 source/op/map_aparam.cc                       |  2 +-
 source/op/neighbor_stat.cc                    | 12 ++--
 source/op/pair_tab.cc                         |  4 +-
 source/op/prod_env_mat_multi_device.cc        | 34 ++++-----
 source/op/prod_env_mat_multi_device_nvnmd.cc  |  8 +--
 source/op/prod_force.cc                       |  2 +-
 source/op/prod_force_grad.cc                  |  2 +-
 source/op/prod_force_multi_device.cc          |  4 +-
 source/op/prod_force_se_a_grad.cc             |  2 +-
 source/op/prod_force_se_a_mask.cc             |  2 +-
 source/op/prod_force_se_a_mask_grad.cc        |  2 +-
 source/op/prod_force_se_r_grad.cc             |  2 +-
 source/op/prod_virial.cc                      |  2 +-
 source/op/prod_virial_grad.cc                 |  2 +-
 source/op/prod_virial_multi_device.cc         |  4 +-
 source/op/prod_virial_se_a_grad.cc            |  2 +-
 source/op/prod_virial_se_r_grad.cc            |  2 +-
 source/op/soft_min.cc                         |  2 +-
 source/op/soft_min_force.cc                   |  2 +-
 source/op/soft_min_virial.cc                  |  2 +-
 66 files changed, 222 insertions(+), 218 deletions(-)

diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 4a376e0bec..503a4c4b4b 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -644,8 +644,8 @@ class DeepPot {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     std::vector<VALUETYPE> fparam_, aparam_;
@@ -706,10 +706,10 @@ class DeepPot {
     const int *atype_ = &atype[0];
 
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
-    atom_energy.resize(nframes * natoms);
-    atom_virial.resize(nframes * natoms * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
+    atom_energy.resize(static_cast<size_t>(nframes) * natoms);
+    atom_virial.resize(static_cast<size_t>(nframes) * natoms * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     VALUETYPE *atomic_ener_ = &atom_energy[0];
@@ -774,8 +774,8 @@ class DeepPot {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     std::vector<VALUETYPE> fparam_, aparam_;
@@ -845,10 +845,10 @@ class DeepPot {
     const int *atype_ = &atype[0];
 
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
-    atom_energy.resize(nframes * natoms);
-    atom_virial.resize(nframes * natoms * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
+    atom_energy.resize(static_cast<size_t>(nframes) * natoms);
+    atom_virial.resize(static_cast<size_t>(nframes) * natoms * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     VALUETYPE *atomic_ener_ = &atom_energy[0];
@@ -910,8 +910,8 @@ class DeepPot {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     std::vector<VALUETYPE> fparam_, aparam_;
@@ -972,10 +972,10 @@ class DeepPot {
     const int *atype_ = &atype[0];
 
     double *ener_ = _DP_Get_Energy_Pointer(ener, nframes);
-    force.resize(nframes * natoms * 3);
-    virial.resize(nframes * 9);
-    atom_energy.resize(nframes * natoms);
-    atom_virial.resize(nframes * natoms * 9);
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    virial.resize(static_cast<size_t>(nframes) * 9);
+    atom_energy.resize(static_cast<size_t>(nframes) * natoms);
+    atom_virial.resize(static_cast<size_t>(nframes) * natoms * 9);
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
     VALUETYPE *atomic_ener_ = &atom_energy[0];
@@ -1079,7 +1079,7 @@ class DeepPot {
                           const int &dparam,
                           const std::vector<VALUETYPE> &param) const {
     if (param.size() == dparam) {
-      out_param.resize(nframes * dparam);
+      out_param.resize(static_cast<size_t>(nframes) * dparam);
       for (int ii = 0; ii < nframes; ++ii) {
         std::copy(param.begin(), param.end(), out_param.begin() + ii * dparam);
       }
@@ -1210,7 +1210,7 @@ class DeepPotModelDevi {
     virial.resize(numb_models);
     for (int i = 0; i < numb_models; i++) {
       ener[i] = energy_flat[i];
-      force[i].resize(natoms * 3);
+      force[i].resize(static_cast<size_t>(natoms) * 3);
       virial[i].resize(9);
       for (int j = 0; j < natoms * 3; j++) {
         force[i][j] = force_flat[i * natoms * 3 + j];
@@ -1292,10 +1292,10 @@ class DeepPotModelDevi {
     atom_virial.resize(numb_models);
     for (int i = 0; i < numb_models; i++) {
       ener[i] = energy_flat[i];
-      force[i].resize(natoms * 3);
+      force[i].resize(static_cast<size_t>(natoms) * 3);
       virial[i].resize(9);
       atom_energy[i].resize(natoms);
-      atom_virial[i].resize(natoms * 9);
+      atom_virial[i].resize(static_cast<size_t>(natoms) * 9);
       for (int j = 0; j < natoms * 3; j++) {
         force[i][j] = force_flat[i * natoms * 3 + j];
       }
@@ -1496,7 +1496,7 @@ class DeepPotModelDevi {
                           const int &dparam,
                           const std::vector<VALUETYPE> &param) const {
     if (param.size() == dparam) {
-      out_param.resize(nframes * dparam);
+      out_param.resize(static_cast<size_t>(nframes) * dparam);
       for (int ii = 0; ii < nframes; ++ii) {
         std::copy(param.begin(), param.end(), out_param.begin() + ii * dparam);
       }
@@ -1653,8 +1653,8 @@ class DeepTensor {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
     global_tensor.resize(odim);
-    force.resize(odim * natoms * 3);
-    virial.resize(odim * 9);
+    force.resize(static_cast<size_t>(odim) * natoms * 3);
+    virial.resize(static_cast<size_t>(odim) * 9);
     VALUETYPE *global_tensor_ = &global_tensor[0];
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
@@ -1697,9 +1697,9 @@ class DeepTensor {
     const int *atype_ = &atype[0];
 
     global_tensor.resize(odim);
-    force.resize(odim * natoms * 3);
-    virial.resize(odim * 9);
-    atom_virial.resize(odim * natoms * 9);
+    force.resize(static_cast<size_t>(odim) * natoms * 3);
+    virial.resize(static_cast<size_t>(odim) * 9);
+    atom_virial.resize(static_cast<size_t>(odim) * natoms * 9);
     VALUETYPE *global_tensor_ = &global_tensor[0];
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
@@ -1752,8 +1752,8 @@ class DeepTensor {
     const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
     const int *atype_ = &atype[0];
     global_tensor.resize(odim);
-    force.resize(odim * natoms * 3);
-    virial.resize(odim * 9);
+    force.resize(static_cast<size_t>(odim) * natoms * 3);
+    virial.resize(static_cast<size_t>(odim) * 9);
     VALUETYPE *global_tensor_ = &global_tensor[0];
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
@@ -1800,9 +1800,9 @@ class DeepTensor {
     const int *atype_ = &atype[0];
 
     global_tensor.resize(odim);
-    force.resize(odim * natoms * 3);
-    virial.resize(odim * 9);
-    atom_virial.resize(odim * natoms * 9);
+    force.resize(static_cast<size_t>(odim) * natoms * 3);
+    virial.resize(static_cast<size_t>(odim) * 9);
+    atom_virial.resize(static_cast<size_t>(odim) * natoms * 9);
     VALUETYPE *global_tensor_ = &global_tensor[0];
     VALUETYPE *force_ = &force[0];
     VALUETYPE *virial_ = &virial[0];
@@ -1954,7 +1954,7 @@ class DipoleChargeModifier {
     const int *dpairs = reinterpret_cast<const int *>(&pairs[0]);
     const VALUETYPE *delef = &delef_[0];
 
-    dfcorr_.resize(natoms * 3);
+    dfcorr_.resize(static_cast<size_t>(natoms) * 3);
     dvcorr_.resize(9);
     VALUETYPE *dfcorr = &dfcorr_[0];
     VALUETYPE *dvcorr = &dvcorr_[0];
@@ -2071,7 +2071,7 @@ void select_map(std::vector<VT> &out,
       nall2++;
     }
   }
-  out.resize(nall2 * stride);
+  out.resize(static_cast<size_t>(nall2) * stride);
   DP_SelectMapInt(&in[0], &fwd_map[0], stride, nall1, nall2, &out[0]);
 };
 
diff --git a/source/api_c/tests/test_deepdipole_hpp.cc b/source/api_c/tests/test_deepdipole_hpp.cc
index 49958469e0..f781c34c5b 100644
--- a/source/api_c/tests/test_deepdipole_hpp.cc
+++ b/source/api_c/tests/test_deepdipole_hpp.cc
@@ -234,7 +234,7 @@ class TestInferDeepDipoleNew : public ::testing::Test {
       }
     }
 
-    expected_gv.resize(odim * 9);
+    expected_gv.resize(static_cast<size_t>(odim) * 9);
     for (int kk = 0; kk < odim; ++kk) {
       for (int ii = 0; ii < natoms; ++ii) {
         for (int dd = 0; dd < 9; ++dd) {
diff --git a/source/api_c/tests/test_deeppolar_hpp.cc b/source/api_c/tests/test_deeppolar_hpp.cc
index 1fc2075afb..63ebf5d760 100644
--- a/source/api_c/tests/test_deeppolar_hpp.cc
+++ b/source/api_c/tests/test_deeppolar_hpp.cc
@@ -466,7 +466,7 @@ class TestInferDeepPolarNew : public ::testing::Test {
       }
     }
 
-    expected_gv.resize(odim * 9);
+    expected_gv.resize(static_cast<size_t>(odim) * 9);
     for (int kk = 0; kk < odim; ++kk) {
       for (int ii = 0; ii < natoms; ++ii) {
         for (int dd = 0; dd < 9; ++dd) {
diff --git a/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc b/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
index b94087916a..383c8f5fb1 100644
--- a/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
+++ b/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
@@ -126,7 +126,7 @@ class TestInferDeepPotAFparamAparamNFrames : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
@@ -767,7 +767,7 @@ class TestInferDeepPotAFparamAparamNFramesSingleParam : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
diff --git a/source/api_c/tests/test_deeppot_a_nframes_hpp.cc b/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
index 1177957899..af132c0146 100644
--- a/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
+++ b/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
@@ -119,7 +119,7 @@ class TestInferDeepPotANFrames : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
@@ -728,7 +728,7 @@ class TestInferDeepPotANFramesNoPbc : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
diff --git a/source/api_c/tests/test_utils.h b/source/api_c/tests/test_utils.h
index 01636156b2..5167732bc8 100644
--- a/source/api_c/tests/test_utils.h
+++ b/source/api_c/tests/test_utils.h
@@ -42,7 +42,7 @@ inline void _fold_back(std::vector<VALUETYPE> &out,
                        const int nall,
                        const int ndim,
                        const int nframes = 1) {
-  out.resize(nframes * nloc * ndim);
+  out.resize(static_cast<size_t>(nframes) * nloc * ndim);
   _fold_back<VALUETYPE>(out.begin(), in.begin(), mapping, nloc, nall, ndim,
                         nframes);
 }
diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index d687c02e75..c44bbceaa2 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -123,7 +123,7 @@ void DipoleChargeModifier::run_model(
   auto of = output_f.flat<MODELTYPE>();
   auto ov = output_v.flat<MODELTYPE>();
 
-  dforce.resize(nall * 3);
+  dforce.resize(static_cast<size_t>(nall) * 3);
   dvirial.resize(9);
   for (int ii = 0; ii < nall * 3; ++ii) {
     dforce[ii] = of(ii);
@@ -186,7 +186,7 @@ void DipoleChargeModifier::compute(
   int nall_real = real_bkw_map.size();
   int nloc_real = nall_real - nghost_real;
   if (nloc_real == 0) {
-    dfcorr_.resize(nall * 3);
+    dfcorr_.resize(static_cast<size_t>(nall) * 3);
     dvcorr_.resize(9);
     fill(dfcorr_.begin(), dfcorr_.end(), (VALUETYPE)0.0);
     fill(dvcorr_.begin(), dvcorr_.end(), (VALUETYPE)0.0);
@@ -196,8 +196,8 @@ void DipoleChargeModifier::compute(
   std::vector<VALUETYPE> dcoord_real;
   std::vector<VALUETYPE> delef_real;
   std::vector<int> datype_real;
-  dcoord_real.resize(nall_real * 3);
-  delef_real.resize(nall_real * 3);
+  dcoord_real.resize(static_cast<size_t>(nall_real) * 3);
+  delef_real.resize(static_cast<size_t>(nall_real) * 3);
   datype_real.resize(nall_real);
   // fwd map
   select_map<VALUETYPE>(dcoord_real, dcoord_, real_fwd_map, 3);
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 655819e086..11a131a604 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -253,7 +253,7 @@ void DeepTensor::run_model(
   }
 
   // component-wise virial
-  dvirial_.resize(odim * 9);
+  dvirial_.resize(static_cast<size_t>(odim) * 9);
   for (unsigned ii = 0; ii < odim * 9; ++ii) {
     dvirial_[ii] = ov(ii);
   }
@@ -266,7 +266,7 @@ void DeepTensor::run_model(
   std::vector<int> sel_srt = sel_fwd;
   select_map<int>(sel_srt, sel_fwd, atommap.get_fwd_map(), 1);
   std::remove(sel_srt.begin(), sel_srt.end(), -1);
-  datom_tensor_.resize(nsel * odim);
+  datom_tensor_.resize(static_cast<size_t>(nsel) * odim);
   select_map<VALUETYPE>(datom_tensor_, datom_tensor, sel_srt, odim);
 
   // component-wise atomic virial
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 33c433a90a..2f75aaa291 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -171,14 +171,15 @@ void deepmd::select_real_atoms_coord(std::vector<VALUETYPE>& dcoord,
   // resize to nall_real
   nall_real = bkw_map.size();
   nloc_real = nall_real - nghost_real;
-  dcoord.resize(nframes * nall_real * 3);
+  dcoord.resize(static_cast<size_t>(nframes) * nall_real * 3);
   datype.resize(nall_real);
   // fwd map
   select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3, nframes, nall_real, nall);
   select_map<int>(datype, datype_, fwd_map, 1);
   // aparam
   if (daparam > 0) {
-    aparam.resize(nframes * (aparam_nall ? nall_real : nloc_real));
+    aparam.resize(static_cast<size_t>(nframes) *
+                  (aparam_nall ? nall_real : nloc_real));
     select_map<VALUETYPE>(aparam, aparam_, fwd_map, daparam, nframes,
                           (aparam_nall ? nall_real : nloc_real),
                           (aparam_nall ? nall : (nall - nghost)));
@@ -396,7 +397,7 @@ int deepmd::session_input_tensors(
 
   TensorShape coord_shape;
   coord_shape.AddDim(nframes);
-  coord_shape.AddDim(nall * 3);
+  coord_shape.AddDim(static_cast<int64_t>(nall) * 3);
   TensorShape type_shape;
   type_shape.AddDim(nframes);
   type_shape.AddDim(nall);
@@ -540,7 +541,7 @@ int deepmd::session_input_tensors(
 
   TensorShape coord_shape;
   coord_shape.AddDim(nframes);
-  coord_shape.AddDim(nall * 3);
+  coord_shape.AddDim(static_cast<int64_t>(nall) * 3);
   TensorShape type_shape;
   type_shape.AddDim(nframes);
   type_shape.AddDim(nall);
@@ -675,7 +676,7 @@ int deepmd::session_input_tensors_mixed_type(
 
   TensorShape coord_shape;
   coord_shape.AddDim(nframes);
-  coord_shape.AddDim(nall * 3);
+  coord_shape.AddDim(static_cast<int64_t>(nall) * 3);
   TensorShape type_shape;
   type_shape.AddDim(nframes);
   type_shape.AddDim(nall);
diff --git a/source/api_cc/tests/test_deepdipole.cc b/source/api_cc/tests/test_deepdipole.cc
index b8f2195728..86a8a4131f 100644
--- a/source/api_cc/tests/test_deepdipole.cc
+++ b/source/api_cc/tests/test_deepdipole.cc
@@ -238,7 +238,7 @@ class TestInferDeepDipoleNew : public ::testing::Test {
       }
     }
 
-    expected_gv.resize(odim * 9);
+    expected_gv.resize(static_cast<size_t>(odim) * 9);
     for (int kk = 0; kk < odim; ++kk) {
       for (int ii = 0; ii < natoms; ++ii) {
         for (int dd = 0; dd < 9; ++dd) {
diff --git a/source/api_cc/tests/test_deeppolar.cc b/source/api_cc/tests/test_deeppolar.cc
index d8ad497054..89014fd245 100644
--- a/source/api_cc/tests/test_deeppolar.cc
+++ b/source/api_cc/tests/test_deeppolar.cc
@@ -470,7 +470,7 @@ class TestInferDeepPolarNew : public ::testing::Test {
       }
     }
 
-    expected_gv.resize(odim * 9);
+    expected_gv.resize(static_cast<size_t>(odim) * 9);
     for (int kk = 0; kk < odim; ++kk) {
       for (int ii = 0; ii < natoms; ++ii) {
         for (int dd = 0; dd < 9; ++dd) {
diff --git a/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc b/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
index 0f45eaabb0..0851523814 100644
--- a/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
+++ b/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
@@ -127,7 +127,7 @@ class TestInferDeepPotAFparamAparamNFrames : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
@@ -768,7 +768,7 @@ class TestInferDeepPotAFparamAparamNFramesSingleParam : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
diff --git a/source/api_cc/tests/test_deeppot_a_nframes.cc b/source/api_cc/tests/test_deeppot_a_nframes.cc
index 835971d106..c83a7a0b41 100644
--- a/source/api_cc/tests/test_deeppot_a_nframes.cc
+++ b/source/api_cc/tests/test_deeppot_a_nframes.cc
@@ -123,7 +123,7 @@ class TestInferDeepPotANFrames : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
@@ -732,7 +732,7 @@ class TestInferDeepPotANFramesNoPbc : public ::testing::Test {
     EXPECT_EQ(nframes * natoms * 3, expected_f.size());
     EXPECT_EQ(nframes * natoms * 9, expected_v.size());
     expected_tot_e.resize(nframes);
-    expected_tot_v.resize(nframes * 9);
+    expected_tot_v.resize(static_cast<size_t>(nframes) * 9);
     std::fill(expected_tot_e.begin(), expected_tot_e.end(), 0.);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
     for (int kk = 0; kk < nframes; ++kk) {
diff --git a/source/api_cc/tests/test_utils.h b/source/api_cc/tests/test_utils.h
index 46732ca935..d06823b4e0 100644
--- a/source/api_cc/tests/test_utils.h
+++ b/source/api_cc/tests/test_utils.h
@@ -42,7 +42,7 @@ inline void _fold_back(std::vector<VALUETYPE> &out,
                        const int nall,
                        const int ndim,
                        const int nframes = 1) {
-  out.resize(nframes * nloc * ndim);
+  out.resize(static_cast<size_t>(nframes) * nloc * ndim);
   _fold_back<VALUETYPE>(out.begin(), in.begin(), mapping, nloc, nall, ndim,
                         nframes);
 }
diff --git a/source/ipi/driver.cc b/source/ipi/driver.cc
index 22d5415bd1..1e3d92eb5e 100644
--- a/source/ipi/driver.cc
+++ b/source/ipi/driver.cc
@@ -190,11 +190,11 @@ int main(int argc, char *argv[]) {
                     << std::endl;
         }
 
-        dcoord.resize(3 * natoms);
-        dforce.resize(3 * natoms, 0);
-        dcoord_tmp.resize(3 * natoms);
-        dforce_tmp.resize(3 * natoms, 0);
-        msg_buff = new double[3 * natoms];
+        dcoord.resize(3 * static_cast<size_t>(natoms));
+        dforce.resize(3 * static_cast<size_t>(natoms), 0);
+        dcoord_tmp.resize(3 * static_cast<size_t>(natoms));
+        dforce_tmp.resize(3 * static_cast<size_t>(natoms), 0);
+        msg_buff = new double[3 * static_cast<size_t>(natoms)];
       }
 
       // get coord
diff --git a/source/lib/src/ewald.cc b/source/lib/src/ewald.cc
index 0f3b960d9f..ec201c4d7d 100644
--- a/source/lib/src/ewald.cc
+++ b/source/lib/src/ewald.cc
@@ -98,7 +98,7 @@ void deepmd::ewald_recp(VALUETYPE& ener,
   // natoms
   int natoms = charge.size();
   // init returns
-  force.resize(natoms * 3);
+  force.resize(static_cast<size_t>(natoms) * 3);
   virial.resize(9);
   ener = 0;
   fill(force.begin(), force.end(), static_cast<VALUETYPE>(0));
@@ -179,7 +179,7 @@ void deepmd::ewald_recp(VALUETYPE& ener,
   std::vector<std::vector<VALUETYPE> > thread_force(nthreads);
   std::vector<std::vector<VALUETYPE> > thread_virial(nthreads);
   for (int ii = 0; ii < nthreads; ++ii) {
-    thread_force[ii].resize(natoms * 3, 0.);
+    thread_force[ii].resize(static_cast<size_t>(natoms) * 3, 0.);
     thread_virial[ii].resize(9, 0.);
   }
   // calculate ener, force and virial
diff --git a/source/lib/src/neighbor_list.cc b/source/lib/src/neighbor_list.cc
index fc797ce6a9..6723e3de66 100644
--- a/source/lib/src/neighbor_list.cc
+++ b/source/lib/src/neighbor_list.cc
@@ -784,7 +784,7 @@ void copy_coord(std::vector<double>& out_c,
   build_clist(clist, in_c, nloc, nat_stt, ncell, nat_stt, ncell, region, ncell);
 
   // copy local atoms
-  out_c.resize(nloc * 3);
+  out_c.resize(static_cast<size_t>(nloc) * 3);
   out_t.resize(nloc);
   mapping.resize(nloc);
   copy(in_c.begin(), in_c.end(), out_c.begin());
diff --git a/source/lib/src/pairwise.cc b/source/lib/src/pairwise.cc
index 3fea27bd71..f5b21d9856 100644
--- a/source/lib/src/pairwise.cc
+++ b/source/lib/src/pairwise.cc
@@ -93,7 +93,7 @@ void deepmd::dprc_pairwise_map_cpu(
   // (3, 4, 0, 1, 2, 10, 11),
   // (3, 4, 5, 6, 7, 10, -1),
   // (3, 4, 8, 9, -1, 10, -1)
-  forward_qmmm_map.resize((nfragments - 1) * map_size);
+  forward_qmmm_map.resize(static_cast<size_t>(nfragments - 1) * map_size);
   std::fill(forward_qmmm_map.begin(), forward_qmmm_map.end(), -1);
   int nqm_real = nloc;  // init for nfragments = 1
   for (int ii = 0; ii < nfragments - 1; ++ii) {
@@ -133,7 +133,7 @@ void deepmd::dprc_pairwise_map_cpu(
   // (2, 3, 4, 0, 1, -1, -1, -1, -1, -1, 5, 6)
   // (-1, -1, -1, 0, 1, 2, 3, 4, -1, -1, 5, -1)
   // (-1, -1, -1, 0, 1, -1, -1, -1, 2, 3, 5, -1)
-  backward_qmmm_map.resize((nfragments - 1) * nall);
+  backward_qmmm_map.resize(static_cast<size_t>(nfragments - 1) * nall);
   std::fill(backward_qmmm_map.begin(), backward_qmmm_map.end(), -1);
   for (int ii = 0; ii < nfragments - 1; ++ii) {
     for (int jj = 0; jj < map_size; ++jj) {
diff --git a/source/lib/src/prod_env_mat.cc b/source/lib/src/prod_env_mat.cc
index 63813069b0..81984c78e4 100644
--- a/source/lib/src/prod_env_mat.cc
+++ b/source/lib/src/prod_env_mat.cc
@@ -304,7 +304,8 @@ void deepmd::env_mat_nbor_update(InputNlist &inlist,
     max_nbor_size = _max_nbor_size;
 
     // copy nbor list from host to the device
-    std::vector<int> nbor_list_host(inum * max_nbor_size, 0);
+    std::vector<int> nbor_list_host(static_cast<size_t>(inum) * max_nbor_size,
+                                    0);
     int **_firstneigh = (int **)malloc(sizeof(int *) * inum);
     for (int ii = 0; ii < inum; ii++) {
       _firstneigh[ii] = nbor_list_dev + ii * max_nbor_size;
@@ -313,7 +314,7 @@ void deepmd::env_mat_nbor_update(InputNlist &inlist,
       }
     }
     memcpy_host_to_device(nbor_list_dev, &nbor_list_host[0],
-                          inum * max_nbor_size);
+                          static_cast<size_t>(inum) * max_nbor_size);
     memcpy_host_to_device(gpu_inlist.firstneigh, _firstneigh, inum);
     free(_firstneigh);
   }
diff --git a/source/lib/tests/test_coord.cc b/source/lib/tests/test_coord.cc
index c939dd6fa6..0427521416 100644
--- a/source/lib/tests/test_coord.cc
+++ b/source/lib/tests/test_coord.cc
@@ -234,7 +234,7 @@ TEST_F(TestCopyCoord, cpu) {
   // 	    << nloc << " "
   // 	    << nall << std::endl;
 
-  out_c.resize(nall * 3);
+  out_c.resize(static_cast<size_t>(nall) * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
@@ -322,7 +322,7 @@ TEST_F(TestCopyCoord, gpu) {
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall * 3);
+  out_c.resize(static_cast<size_t>(nall) * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
@@ -468,7 +468,7 @@ TEST_F(TestCopyCoordMoreCell, cpu) {
   // 	    << nloc << " "
   // 	    << nall << std::endl;
 
-  out_c.resize(nall * 3);
+  out_c.resize(static_cast<size_t>(nall) * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
@@ -556,7 +556,7 @@ TEST_F(TestCopyCoordMoreCell, gpu) {
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall * 3);
+  out_c.resize(static_cast<size_t>(nall) * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
diff --git a/source/lib/tests/test_map_aparam.cc b/source/lib/tests/test_map_aparam.cc
index 4adf3ffd98..061ae49f28 100644
--- a/source/lib/tests/test_map_aparam.cc
+++ b/source/lib/tests/test_map_aparam.cc
@@ -65,7 +65,7 @@ class TestMapAparam : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -74,7 +74,7 @@ class TestMapAparam : public ::testing::Test {
         nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
     }
-    aparam.resize(nall * numb_aparam);
+    aparam.resize(static_cast<size_t>(nall) * numb_aparam);
     for (int ii = 0; ii < nall * numb_aparam; ++ii) {
       aparam[ii] = 10 - 0.1 * ii;
     }
diff --git a/source/lib/tests/test_pair_tab.cc b/source/lib/tests/test_pair_tab.cc
index 9f68cd98b9..7002beb1da 100644
--- a/source/lib/tests/test_pair_tab.cc
+++ b/source/lib/tests/test_pair_tab.cc
@@ -235,10 +235,10 @@ class TestPairTab : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
diff --git a/source/lib/tests/test_prod_force_a.cc b/source/lib/tests/test_prod_force_a.cc
index 2031f086b4..f49b173769 100644
--- a/source/lib/tests/test_prod_force_a.cc
+++ b/source/lib/tests/test_prod_force_a.cc
@@ -82,10 +82,10 @@ class TestProdForceA : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij_a.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij_a.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -105,7 +105,7 @@ class TestProdForceA : public ::testing::Test {
         }
       }
     }
-    net_deriv.resize(nloc * ndescrpt);
+    net_deriv.resize(static_cast<size_t>(nloc) * ndescrpt);
     for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
diff --git a/source/lib/tests/test_prod_force_grad_a.cc b/source/lib/tests/test_prod_force_grad_a.cc
index abb04eaa01..a946639638 100644
--- a/source/lib/tests/test_prod_force_grad_a.cc
+++ b/source/lib/tests/test_prod_force_grad_a.cc
@@ -93,10 +93,10 @@ class TestProdForceGradA : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij_a.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij_a.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -116,7 +116,7 @@ class TestProdForceGradA : public ::testing::Test {
         }
       }
     }
-    grad.resize(nloc * 3);
+    grad.resize(static_cast<size_t>(nloc) * 3);
     for (int ii = 0; ii < nloc * 3; ++ii) {
       grad[ii] = 10 - ii * 0.1;
     }
diff --git a/source/lib/tests/test_prod_force_grad_r.cc b/source/lib/tests/test_prod_force_grad_r.cc
index c8a27077c3..e143633bea 100644
--- a/source/lib/tests/test_prod_force_grad_r.cc
+++ b/source/lib/tests/test_prod_force_grad_r.cc
@@ -67,10 +67,10 @@ class TestProdForceGradR : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij_a.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij_a.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -90,7 +90,7 @@ class TestProdForceGradR : public ::testing::Test {
         }
       }
     }
-    grad.resize(nloc * 3);
+    grad.resize(static_cast<size_t>(nloc) * 3);
     for (int ii = 0; ii < nloc * 3; ++ii) {
       grad[ii] = 10 - ii * 0.1;
     }
diff --git a/source/lib/tests/test_prod_force_r.cc b/source/lib/tests/test_prod_force_r.cc
index ff3245742d..544152c759 100644
--- a/source/lib/tests/test_prod_force_r.cc
+++ b/source/lib/tests/test_prod_force_r.cc
@@ -79,10 +79,10 @@ class TestProdForceR : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij_a.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij_a.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -102,7 +102,7 @@ class TestProdForceR : public ::testing::Test {
         }
       }
     }
-    net_deriv.resize(nloc * ndescrpt);
+    net_deriv.resize(static_cast<size_t>(nloc) * ndescrpt);
     for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
diff --git a/source/lib/tests/test_prod_virial_a.cc b/source/lib/tests/test_prod_virial_a.cc
index b2f2a11989..c6fe254db8 100644
--- a/source/lib/tests/test_prod_virial_a.cc
+++ b/source/lib/tests/test_prod_virial_a.cc
@@ -118,10 +118,10 @@ class TestProdVirialA : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -144,7 +144,7 @@ class TestProdVirialA : public ::testing::Test {
         rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
-    net_deriv.resize(nloc * ndescrpt);
+    net_deriv.resize(static_cast<size_t>(nloc) * ndescrpt);
     for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
diff --git a/source/lib/tests/test_prod_virial_grad_a.cc b/source/lib/tests/test_prod_virial_grad_a.cc
index 09af51d6ed..598df91c86 100644
--- a/source/lib/tests/test_prod_virial_grad_a.cc
+++ b/source/lib/tests/test_prod_virial_grad_a.cc
@@ -87,10 +87,10 @@ class TestProdVirialGradA : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
diff --git a/source/lib/tests/test_prod_virial_grad_r.cc b/source/lib/tests/test_prod_virial_grad_r.cc
index 93a7291176..9b520ed898 100644
--- a/source/lib/tests/test_prod_virial_grad_r.cc
+++ b/source/lib/tests/test_prod_virial_grad_r.cc
@@ -61,10 +61,10 @@ class TestProdVirialGradR : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
diff --git a/source/lib/tests/test_prod_virial_r.cc b/source/lib/tests/test_prod_virial_r.cc
index aed4abc512..f0fab48e78 100644
--- a/source/lib/tests/test_prod_virial_r.cc
+++ b/source/lib/tests/test_prod_virial_r.cc
@@ -118,10 +118,10 @@ class TestProdVirialR : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    env.resize(nloc * ndescrpt);
-    env_deriv.resize(nloc * ndescrpt * 3);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    env.resize(static_cast<size_t>(nloc) * ndescrpt);
+    env_deriv.resize(static_cast<size_t>(nloc) * ndescrpt * 3);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -144,7 +144,7 @@ class TestProdVirialR : public ::testing::Test {
         rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
-    net_deriv.resize(nloc * ndescrpt);
+    net_deriv.resize(static_cast<size_t>(nloc) * ndescrpt);
     for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
diff --git a/source/lib/tests/test_soft_min_switch.cc b/source/lib/tests/test_soft_min_switch.cc
index f7a4f43b1a..fbce26e352 100644
--- a/source/lib/tests/test_soft_min_switch.cc
+++ b/source/lib/tests/test_soft_min_switch.cc
@@ -55,8 +55,8 @@ class TestSoftMinSwitch : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
diff --git a/source/lib/tests/test_soft_min_switch_force.cc b/source/lib/tests/test_soft_min_switch_force.cc
index dacc681792..a49661fdbd 100644
--- a/source/lib/tests/test_soft_min_switch_force.cc
+++ b/source/lib/tests/test_soft_min_switch_force.cc
@@ -76,8 +76,8 @@ class TestSoftMinSwitchForce : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -95,7 +95,7 @@ class TestSoftMinSwitchForce : public ::testing::Test {
       }
     }
     sw_value.resize(nloc);
-    sw_deriv.resize(nloc * nnei * 3);
+    sw_deriv.resize(static_cast<size_t>(nloc) * nnei * 3);
     deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
                                         &nlist[0], nloc, nnei, alpha, rmin,
                                         rmax);
diff --git a/source/lib/tests/test_soft_min_switch_force_grad.cc b/source/lib/tests/test_soft_min_switch_force_grad.cc
index 7c36296a79..9cef91bed0 100644
--- a/source/lib/tests/test_soft_min_switch_force_grad.cc
+++ b/source/lib/tests/test_soft_min_switch_force_grad.cc
@@ -56,8 +56,8 @@ class TestSoftMinSwitchForceGrad : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -75,11 +75,11 @@ class TestSoftMinSwitchForceGrad : public ::testing::Test {
       }
     }
     sw_value.resize(nloc);
-    sw_deriv.resize(nloc * nnei * 3);
+    sw_deriv.resize(static_cast<size_t>(nloc) * nnei * 3);
     deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
                                         &nlist[0], nloc, nnei, alpha, rmin,
                                         rmax);
-    grad.resize(nloc * 3);
+    grad.resize(static_cast<size_t>(nloc) * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       grad[ii] = 1.0 - ii * 0.1;
     }
diff --git a/source/lib/tests/test_soft_min_switch_virial.cc b/source/lib/tests/test_soft_min_switch_virial.cc
index 76ddf9fa7f..8b38805528 100644
--- a/source/lib/tests/test_soft_min_switch_virial.cc
+++ b/source/lib/tests/test_soft_min_switch_virial.cc
@@ -121,8 +121,8 @@ class TestSoftMinSwitchVirial : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -140,7 +140,7 @@ class TestSoftMinSwitchVirial : public ::testing::Test {
       }
     }
     sw_value.resize(nloc);
-    sw_deriv.resize(nloc * nnei * 3);
+    sw_deriv.resize(static_cast<size_t>(nloc) * nnei * 3);
     deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
                                         &nlist[0], nloc, nnei, alpha, rmin,
                                         rmax);
diff --git a/source/lib/tests/test_soft_min_switch_virial_grad.cc b/source/lib/tests/test_soft_min_switch_virial_grad.cc
index 315880b3ac..fef87d4d4e 100644
--- a/source/lib/tests/test_soft_min_switch_virial_grad.cc
+++ b/source/lib/tests/test_soft_min_switch_virial_grad.cc
@@ -56,8 +56,8 @@ class TestSoftMinSwitchVirialGrad : public ::testing::Test {
     }
     build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
                 ncell, ext_stt, ext_end, region, ncell);
-    nlist.resize(nloc * nnei);
-    rij.resize(nloc * nnei * 3);
+    nlist.resize(static_cast<size_t>(nloc) * nnei);
+    rij.resize(static_cast<size_t>(nloc) * nnei * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
       format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
@@ -75,11 +75,11 @@ class TestSoftMinSwitchVirialGrad : public ::testing::Test {
       }
     }
     sw_value.resize(nloc);
-    sw_deriv.resize(nloc * nnei * 3);
+    sw_deriv.resize(static_cast<size_t>(nloc) * nnei * 3);
     deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
                                         &nlist[0], nloc, nnei, alpha, rmin,
                                         rmax);
-    grad.resize(nloc * 3);
+    grad.resize(static_cast<size_t>(nloc) * 3);
     for (int ii = 0; ii < nloc; ++ii) {
       grad[ii] = 1.0 - ii * 0.1;
     }
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 628f435bb7..ea60023e26 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -517,7 +517,7 @@ void FixDPLR::pre_force(int vflag) {
 
   int odim = dpt.output_dim();
   assert(odim == 3);
-  dipole_recd.resize(nall * 3);
+  dipole_recd.resize(static_cast<size_t>(nall) * 3);
   fill(dipole_recd.begin(), dipole_recd.end(), 0.0);
   for (int ii = 0; ii < valid_pairs.size(); ++ii) {
     int idx0 = valid_pairs[ii].first;
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 3a6c1c8bbf..90aa453143 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -204,7 +204,7 @@ static void make_uniform_aparam(vector<double> &daparam,
                                 const vector<double> &aparam,
                                 const int &nlocal) {
   unsigned dim_aparam = aparam.size();
-  daparam.resize(dim_aparam * nlocal);
+  daparam.resize(static_cast<size_t>(dim_aparam) * nlocal);
   for (int ii = 0; ii < nlocal; ++ii) {
     for (int jj = 0; jj < dim_aparam; ++jj) {
       daparam[ii * dim_aparam + jj] = aparam[jj];
@@ -247,7 +247,7 @@ void PairDeepMD::make_aparam_from_compute(vector<double> &aparam) {
 
   assert(compute);
   int nlocal = atom->nlocal;
-  aparam.resize(dim_aparam * nlocal);
+  aparam.resize(static_cast<size_t>(dim_aparam) * nlocal);
 
   if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
     compute->compute_peratom();
@@ -573,7 +573,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
             error->one(FLERR, e.what());
           }
         } else {
-          dforce.resize((extend_inum + extend_nghost) * 3);
+          dforce.resize(static_cast<size_t>(extend_inum + extend_nghost) * 3);
           try {
             deep_pot.compute(dener, dforce, dvirial, extend_dcoord,
                              extend_dtype, dbox, extend_nghost, extend_lmp_list,
@@ -596,7 +596,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
             error->one(FLERR, e.what());
           }
         } else {
-          dforce.resize((extend_inum + extend_nghost) * 3);
+          dforce.resize(static_cast<size_t>(extend_inum + extend_nghost) * 3);
           try {
             deep_pot.compute(dener, dforce, dvirial, extend_dcoord,
                              extend_dtype, dbox, extend_nghost, extend_lmp_list,
@@ -1495,7 +1495,7 @@ void PairDeepMD::extend(int &extend_inum,
   }
 
   // extend coord
-  extend_dcoord.resize(extend_nall * 3);
+  extend_dcoord.resize(static_cast<size_t>(extend_nall) * 3);
   for (int ii = 0; ii < nloc; ii++) {
     for (int jj = 0; jj < 3; jj++) {
       extend_dcoord[new_idx_map[ii] * 3 + jj] = dcoord[ii * 3 + jj];
diff --git a/source/lmp/pppm_dplr.cpp b/source/lmp/pppm_dplr.cpp
index faa80ee308..613a9f1c93 100644
--- a/source/lmp/pppm_dplr.cpp
+++ b/source/lmp/pppm_dplr.cpp
@@ -59,7 +59,7 @@ void PPPMDPLR::init() {
 
   int nlocal = atom->nlocal;
   // cout << " ninit pppm/dplr ---------------------- " << nlocal << endl;
-  fele.resize(nlocal * 3);
+  fele.resize(static_cast<size_t>(nlocal) * 3);
   fill(fele.begin(), fele.end(), 0.0);
 }
 
@@ -296,7 +296,7 @@ void PPPMDPLR::fieldforce_ik() {
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
-  fele.resize(nlocal * 3);
+  fele.resize(static_cast<size_t>(nlocal) * 3);
   fill(fele.begin(), fele.end(), 0.0);
 
   for (i = 0; i < nlocal; i++) {
@@ -372,7 +372,7 @@ void PPPMDPLR::fieldforce_ad() {
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
-  fele.resize(nlocal * 3);
+  fele.resize(static_cast<size_t>(nlocal) * 3);
   fill(fele.begin(), fele.end(), 0.0);
 
   for (i = 0; i < nlocal; i++) {
diff --git a/source/md/src/Convert.cc b/source/md/src/Convert.cc
index 198c746c23..b8014bf974 100644
--- a/source/md/src/Convert.cc
+++ b/source/md/src/Convert.cc
@@ -53,8 +53,8 @@ void Convert<VALUETYPE>::gro2nnp(vector<VALUETYPE>& coord,
   assert(posi.size() == idx_map_nnp2gro.size());
   assert(velo.size() == idx_map_nnp2gro.size());
   int natoms = idx_map_nnp2gro.size();
-  coord.resize(3 * natoms);
-  veloc.resize(3 * natoms);
+  coord.resize(3 * static_cast<size_t>(natoms));
+  veloc.resize(3 * static_cast<size_t>(natoms));
   for (unsigned ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map_nnp2gro[ii];
     for (int dd = 0; dd < 3; ++dd) {
diff --git a/source/md/src/Tabulated.cc b/source/md/src/Tabulated.cc
index 1ecf8ee53d..6e9777ea29 100644
--- a/source/md/src/Tabulated.cc
+++ b/source/md/src/Tabulated.cc
@@ -24,7 +24,7 @@ void Tabulated::reinit(const VALUETYPE rc,
   hi = 1. / hh;
   rc2 = rc * rc;
 
-  data.resize(tableLength * stride);
+  data.resize(static_cast<size_t>(tableLength) * stride);
 
   int ii;
   for (ii = 0; ii < tableLength - 1; ++ii) {
diff --git a/source/op/descrpt.cc b/source/op/descrpt.cc
index ef040c3de0..6362b8d37a 100644
--- a/source/op/descrpt.cc
+++ b/source/op/descrpt.cc
@@ -145,22 +145,22 @@ class DescrptOp : public OpKernel {
     // Create an output tensor
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 12);
+    descrpt_deriv_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt * 12);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(static_cast<int64_t>(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
     TensorShape axis_shape;
     axis_shape.AddDim(nsamples);
-    axis_shape.AddDim(nloc * 4);
+    axis_shape.AddDim(static_cast<int64_t>(nloc) * 4);
     TensorShape rot_mat_shape;
     rot_mat_shape.AddDim(nsamples);
-    rot_mat_shape.AddDim(nloc * 9);
+    rot_mat_shape.AddDim(static_cast<int64_t>(nloc) * 9);
 
     Tensor* descrpt_tensor = NULL;
     OP_REQUIRES_OK(context,
diff --git a/source/op/descrpt_se_a_ef.cc b/source/op/descrpt_se_a_ef.cc
index 030c184b46..96c953f167 100644
--- a/source/op/descrpt_se_a_ef.cc
+++ b/source/op/descrpt_se_a_ef.cc
@@ -161,16 +161,16 @@ class DescrptSeAEfOp : public OpKernel {
     // Create an output tensor
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    descrpt_deriv_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(static_cast<int64_t>(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
diff --git a/source/op/descrpt_se_a_ef_para.cc b/source/op/descrpt_se_a_ef_para.cc
index 06f7f138fb..6dc4442ee6 100644
--- a/source/op/descrpt_se_a_ef_para.cc
+++ b/source/op/descrpt_se_a_ef_para.cc
@@ -161,16 +161,16 @@ class DescrptSeAEfParaOp : public OpKernel {
     // Create an output tensor
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    descrpt_deriv_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(static_cast<int64_t>(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
diff --git a/source/op/descrpt_se_a_ef_vert.cc b/source/op/descrpt_se_a_ef_vert.cc
index d70e9b201b..9899e29f06 100644
--- a/source/op/descrpt_se_a_ef_vert.cc
+++ b/source/op/descrpt_se_a_ef_vert.cc
@@ -161,16 +161,16 @@ class DescrptSeAEfVertOp : public OpKernel {
     // Create an output tensor
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(nloc * ndescrpt);
+    descrpt_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    descrpt_deriv_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(nloc * nnei * 3);
+    rij_shape.AddDim(static_cast<int64_t>(nloc) * nnei * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(nloc * nnei);
+    nlist_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
diff --git a/source/op/descrpt_se_a_mask.cc b/source/op/descrpt_se_a_mask.cc
index 4f133e5210..e27ea099ab 100644
--- a/source/op/descrpt_se_a_mask.cc
+++ b/source/op/descrpt_se_a_mask.cc
@@ -95,16 +95,18 @@ class DescrptSeAMaskOp : public OpKernel {
     // Create an output tensor
     TensorShape descrpt_shape;
     descrpt_shape.AddDim(nsamples);
-    descrpt_shape.AddDim(total_atom_num * total_atom_num * n_descrpt);
+    descrpt_shape.AddDim(static_cast<int64_t>(total_atom_num) * total_atom_num *
+                         n_descrpt);
     TensorShape descrpt_deriv_shape;
     descrpt_deriv_shape.AddDim(nsamples);
-    descrpt_deriv_shape.AddDim(total_atom_num * total_atom_num * n_descrpt * 3);
+    descrpt_deriv_shape.AddDim(static_cast<int64_t>(total_atom_num) *
+                               total_atom_num * n_descrpt * 3);
     TensorShape rij_shape;
     rij_shape.AddDim(nsamples);
-    rij_shape.AddDim(total_atom_num * total_atom_num * 3);
+    rij_shape.AddDim(static_cast<int64_t>(total_atom_num) * total_atom_num * 3);
     TensorShape nlist_shape;
     nlist_shape.AddDim(nsamples);
-    nlist_shape.AddDim(total_atom_num * total_atom_num);
+    nlist_shape.AddDim(static_cast<int64_t>(total_atom_num) * total_atom_num);
 
     int context_output_index = 0;
     Tensor *descrpt_tensor = NULL;
diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc
index a0fbc7f580..72f3c3d5dc 100644
--- a/source/op/ewald_recp.cc
+++ b/source/op/ewald_recp.cc
@@ -70,7 +70,7 @@ class EwaldRecpOp : public OpKernel {
     energy_shape.AddDim(nsamples);
     TensorShape force_shape;
     force_shape.AddDim(nsamples);
-    force_shape.AddDim(nloc * 3);
+    force_shape.AddDim(static_cast<int64_t>(nloc) * 3);
     TensorShape virial_shape;
     virial_shape.AddDim(nsamples);
     virial_shape.AddDim(9);
diff --git a/source/op/map_aparam.cc b/source/op/map_aparam.cc
index d0ff08032d..7ac3b48a4f 100644
--- a/source/op/map_aparam.cc
+++ b/source/op/map_aparam.cc
@@ -61,7 +61,7 @@ class MapAparamOp : public OpKernel {
     // Create an output tensor
     TensorShape output_shape;
     output_shape.AddDim(nframes);
-    output_shape.AddDim(nloc * nnei * numb_aparam);
+    output_shape.AddDim(static_cast<int64_t>(nloc) * nnei * numb_aparam);
     Tensor* output_tensor = NULL;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, output_shape, &output_tensor));
diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc
index d917c60a5f..d2a6b3ab31 100644
--- a/source/op/neighbor_stat.cc
+++ b/source/op/neighbor_stat.cc
@@ -112,7 +112,7 @@ class NeighborStatOp : public OpKernel {
       if (nei_mode == 1) {
         // Tensor FPTYPE_temp;
         TensorShape FPTYPE_shape;
-        FPTYPE_shape.AddDim(nall * 3);
+        FPTYPE_shape.AddDim(static_cast<int64_t>(nall) * 3);
         OP_REQUIRES_OK(context,
                        context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                               FPTYPE_shape, &tensor_list[0]));
@@ -125,20 +125,20 @@ class NeighborStatOp : public OpKernel {
                                               double_shape, &tensor_list[1]));
         // Tensor cpy_temp;
         TensorShape cpy_shape;
-        cpy_shape.AddDim(mem_cpy * 3);
+        cpy_shape.AddDim(static_cast<int64_t>(mem_cpy) * 3);
         OP_REQUIRES_OK(context,
                        context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                               cpy_shape, &tensor_list[3]));
         // Tensor t_temp;
         TensorShape t_shape;
-        t_shape.AddDim(mem_cpy * 2);
+        t_shape.AddDim(static_cast<int64_t>(mem_cpy) * 2);
         OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, t_shape,
                                                        &tensor_list[4]));
       }
 
       // Tensor nlist_temp;
       TensorShape nlist_shape;
-      nlist_shape.AddDim(nloc * 2);
+      nlist_shape.AddDim(static_cast<int64_t>(nloc) * 2);
       OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, nlist_shape,
                                                      &tensor_list[5]));
 
@@ -167,7 +167,7 @@ class NeighborStatOp : public OpKernel {
           rcut, max_cpy_trial, max_nnei_trial);
 
       TensorShape min_nbor_dist_shape;
-      min_nbor_dist_shape.AddDim(nloc * mem_nnei);
+      min_nbor_dist_shape.AddDim(static_cast<int64_t>(nloc) * mem_nnei);
       Tensor* min_nbor_dist_tensor = NULL;
       OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
                                                        min_nbor_dist_shape,
@@ -253,7 +253,7 @@ class NeighborStatOp : public OpKernel {
       }
       // allocate output tensor for deepmd-kit
       TensorShape min_nbor_dist_shape;
-      min_nbor_dist_shape.AddDim(nloc * MAX_NNEI);
+      min_nbor_dist_shape.AddDim(static_cast<int64_t>(nloc) * MAX_NNEI);
       Tensor* min_nbor_dist_tensor = NULL;
       OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
                                                        min_nbor_dist_shape,
diff --git a/source/op/pair_tab.cc b/source/op/pair_tab.cc
index e412aa6c2a..5c16e0faa4 100644
--- a/source/op/pair_tab.cc
+++ b/source/op/pair_tab.cc
@@ -103,10 +103,10 @@ class PairTabOp : public OpKernel {
     energy_shape.AddDim(nloc);
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     TensorShape virial_shape;
     virial_shape.AddDim(nframes);
-    virial_shape.AddDim(9 * nall);
+    virial_shape.AddDim(9 * static_cast<int64_t>(nall));
     Tensor* energy_tensor = NULL;
     Tensor* force_tensor = NULL;
     Tensor* virial_tensor = NULL;
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index 22654b5f3a..a99804cb9e 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -497,7 +497,7 @@ class ProdEnvMatAOp : public OpKernel {
         if (nei_mode == 1) {
           // Tensor FPTYPE_temp;
           TensorShape FPTYPE_shape;
-          FPTYPE_shape.AddDim(nall * 3);
+          FPTYPE_shape.AddDim(static_cast<int64_t>(nall) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 FPTYPE_shape, &tensor_list[0]));
@@ -510,20 +510,20 @@ class ProdEnvMatAOp : public OpKernel {
                                                 double_shape, &tensor_list[1]));
           // Tensor cpy_temp;
           TensorShape cpy_shape;
-          cpy_shape.AddDim(mem_cpy * 3);
+          cpy_shape.AddDim(static_cast<int64_t>(mem_cpy) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 cpy_shape, &tensor_list[3]));
           // Tensor t_temp;
           TensorShape t_shape;
-          t_shape.AddDim(mem_cpy * 2);
+          t_shape.AddDim(static_cast<int64_t>(mem_cpy) * 2);
           OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, t_shape,
                                                          &tensor_list[4]));
         }
 
         // Tensor nlist_temp;
         TensorShape nlist_shape;
-        nlist_shape.AddDim(nloc * 2);
+        nlist_shape.AddDim(static_cast<int64_t>(nloc) * 2);
         OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, nlist_shape,
                                                        &tensor_list[5]));
 
@@ -794,7 +794,7 @@ class ProdEnvMatROp : public OpKernel {
         if (nei_mode == 1) {
           // Tensor FPTYPE_temp;
           TensorShape FPTYPE_shape;
-          FPTYPE_shape.AddDim(nall * 3);
+          FPTYPE_shape.AddDim(static_cast<int64_t>(nall) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 FPTYPE_shape, &tensor_list[0]));
@@ -807,20 +807,20 @@ class ProdEnvMatROp : public OpKernel {
                                                 double_shape, &tensor_list[1]));
           // Tensor cpy_temp;
           TensorShape cpy_shape;
-          cpy_shape.AddDim(mem_cpy * 3);
+          cpy_shape.AddDim(static_cast<int64_t>(mem_cpy) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 cpy_shape, &tensor_list[3]));
           // Tensor t_temp;
           TensorShape t_shape;
-          t_shape.AddDim(mem_cpy * 2);
+          t_shape.AddDim(static_cast<int64_t>(mem_cpy) * 2);
           OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, t_shape,
                                                          &tensor_list[4]));
         }
 
         // Tensor nlist_temp;
         TensorShape nlist_shape;
-        nlist_shape.AddDim(nloc * 2);
+        nlist_shape.AddDim(static_cast<int64_t>(nloc) * 2);
         OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, nlist_shape,
                                                        &tensor_list[5]));
 
@@ -1066,10 +1066,10 @@ class ProdEnvMatAMixOp : public OpKernel {
     nlist_shape.AddDim(int_64(nloc) * nnei);
     TensorShape ntype_shape;
     ntype_shape.AddDim(nsamples);
-    ntype_shape.AddDim(nloc * nnei);
+    ntype_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
     TensorShape nmask_shape;
     nmask_shape.AddDim(nsamples);
-    nmask_shape.AddDim(nloc * nnei);
+    nmask_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
@@ -1137,7 +1137,7 @@ class ProdEnvMatAMixOp : public OpKernel {
         if (nei_mode == 1) {
           // Tensor FPTYPE_temp;
           TensorShape FPTYPE_shape;
-          FPTYPE_shape.AddDim(nall * 3);
+          FPTYPE_shape.AddDim(static_cast<int64_t>(nall) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 FPTYPE_shape, &tensor_list[0]));
@@ -1150,20 +1150,20 @@ class ProdEnvMatAMixOp : public OpKernel {
                                                 double_shape, &tensor_list[1]));
           // Tensor cpy_temp;
           TensorShape cpy_shape;
-          cpy_shape.AddDim(mem_cpy * 3);
+          cpy_shape.AddDim(static_cast<int64_t>(mem_cpy) * 3);
           OP_REQUIRES_OK(context,
                          context->allocate_temp(DataTypeToEnum<FPTYPE>::value,
                                                 cpy_shape, &tensor_list[3]));
           // Tensor t_temp;
           TensorShape t_shape;
-          t_shape.AddDim(mem_cpy * 2);
+          t_shape.AddDim(static_cast<int64_t>(mem_cpy) * 2);
           OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, t_shape,
                                                          &tensor_list[4]));
         }
 
         // Tensor nlist_temp;
         TensorShape nlist_shape;
-        nlist_shape.AddDim(nloc * 2);
+        nlist_shape.AddDim(static_cast<int64_t>(nloc) * 2);
         OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, nlist_shape,
                                                        &tensor_list[5]));
 
@@ -1296,7 +1296,7 @@ static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
   normalize_coord_cpu(&tmp_coord[0], nall, region);
   int tt;
   for (tt = 0; tt < max_cpy_trial; ++tt) {
-    coord_cpy.resize(mem_cpy * 3);
+    coord_cpy.resize(static_cast<size_t>(mem_cpy) * 3);
     type_cpy.resize(mem_cpy);
     idx_mapping.resize(mem_cpy);
     int ret =
@@ -1512,7 +1512,7 @@ static int _norm_copy_coord_gpu(OpKernelContext* context,
       mem_cpy *= 2;
       // Tensor cpy_temp;
       TensorShape cpy_shape;
-      cpy_shape.AddDim(mem_cpy * 3);
+      cpy_shape.AddDim(static_cast<int64_t>(mem_cpy) * 3);
       status = context->allocate_temp(DataTypeToEnum<FPTYPE>::value, cpy_shape,
                                       tensor_list + 3);
       if (!status.ok()) {
@@ -1520,7 +1520,7 @@ static int _norm_copy_coord_gpu(OpKernelContext* context,
       }
       // Tensor t_temp;
       TensorShape t_shape;
-      t_shape.AddDim(mem_cpy * 2);
+      t_shape.AddDim(static_cast<int64_t>(mem_cpy) * 2);
       status = context->allocate_temp(DT_INT32, t_shape, tensor_list + 4);
       if (!status.ok()) {
         return false;
diff --git a/source/op/prod_env_mat_multi_device_nvnmd.cc b/source/op/prod_env_mat_multi_device_nvnmd.cc
index 1cbfb968f1..d9f9275b86 100644
--- a/source/op/prod_env_mat_multi_device_nvnmd.cc
+++ b/source/op/prod_env_mat_multi_device_nvnmd.cc
@@ -156,7 +156,7 @@ static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
   normalize_coord_cpu(&tmp_coord[0], nall, region);
   int tt;
   for (tt = 0; tt < max_cpy_trial; ++tt) {
-    coord_cpy.resize(mem_cpy * 3);
+    coord_cpy.resize(static_cast<size_t>(mem_cpy) * 3);
     type_cpy.resize(mem_cpy);
     idx_mapping.resize(mem_cpy);
     int ret =
@@ -675,10 +675,10 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
     nlist_shape.AddDim(int_64(nloc) * nnei);
     TensorShape ntype_shape;
     ntype_shape.AddDim(nsamples);
-    ntype_shape.AddDim(nloc * nnei);
+    ntype_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
     TensorShape nmask_shape;
     nmask_shape.AddDim(nsamples);
-    nmask_shape.AddDim(nloc * nnei);
+    nmask_shape.AddDim(static_cast<int64_t>(nloc) * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
@@ -707,7 +707,7 @@ class ProdEnvMatAMixNvnmdQuantizeOp : public OpKernel {
 
     Tensor fake_type_tensor;  // all zeros
     TensorShape fake_type_shape;
-    fake_type_shape.AddDim(nsamples * nall);
+    fake_type_shape.AddDim(static_cast<int64_t>(nsamples) * nall);
     OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape,
                                                    &fake_type_tensor));
 
diff --git a/source/op/prod_force.cc b/source/op/prod_force.cc
index d8ced591b9..57d1cd1331 100644
--- a/source/op/prod_force.cc
+++ b/source/op/prod_force.cc
@@ -81,7 +81,7 @@ class ProdForceOp : public OpKernel {
     // Create an output tensor
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     // std::cout << "forcesahpe " << force_shape.dim_size(0) << " " <<
     // force_shape.dim_size(1) << std::endl;
     Tensor* force_tensor = NULL;
diff --git a/source/op/prod_force_grad.cc b/source/op/prod_force_grad.cc
index 2d14022279..c1cf63917e 100644
--- a/source/op/prod_force_grad.cc
+++ b/source/op/prod_force_grad.cc
@@ -92,7 +92,7 @@ class ProdForceGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc
index 3eaf005f9a..20cc96dd31 100644
--- a/source/op/prod_force_multi_device.cc
+++ b/source/op/prod_force_multi_device.cc
@@ -103,7 +103,7 @@ class ProdForceSeAOp : public OpKernel {
     // Create an output tensor
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     Tensor* force_tensor = NULL;
     int context_output_index = 0;
     OP_REQUIRES_OK(context,
@@ -200,7 +200,7 @@ class ProdForceSeROp : public OpKernel {
     // Create an output tensor
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     Tensor* force_tensor = NULL;
     int context_output_index = 0;
     OP_REQUIRES_OK(context,
diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/prod_force_se_a_grad.cc
index 21dd4fe00a..5aaf030512 100644
--- a/source/op/prod_force_se_a_grad.cc
+++ b/source/op/prod_force_se_a_grad.cc
@@ -85,7 +85,7 @@ class ProdForceSeAGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/prod_force_se_a_mask.cc b/source/op/prod_force_se_a_mask.cc
index 32fcf54a79..aa4268434d 100644
--- a/source/op/prod_force_se_a_mask.cc
+++ b/source/op/prod_force_se_a_mask.cc
@@ -63,7 +63,7 @@ class ProdForceSeAMaskOp : public OpKernel {
     // Create an output tensor
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     // std::cout << "forcesahpe " << force_shape.dim_size(0) << " " <<
     // force_shape.dim_size(1) << std::endl;
     Tensor *force_tensor = NULL;
diff --git a/source/op/prod_force_se_a_mask_grad.cc b/source/op/prod_force_se_a_mask_grad.cc
index 6f841b1c7d..dabe405545 100644
--- a/source/op/prod_force_se_a_mask_grad.cc
+++ b/source/op/prod_force_se_a_mask_grad.cc
@@ -77,7 +77,7 @@ class ProdForceSeAMaskGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor *grad_net_tensor = NULL;
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc
index f0b4b18323..0b5338c241 100644
--- a/source/op/prod_force_se_r_grad.cc
+++ b/source/op/prod_force_se_r_grad.cc
@@ -77,7 +77,7 @@ class ProdForceSeRGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc
index 2719c6c670..10532d74db 100644
--- a/source/op/prod_virial.cc
+++ b/source/op/prod_virial.cc
@@ -96,7 +96,7 @@ class ProdVirialOp : public OpKernel {
                    context->allocate_output(0, virial_shape, &virial_tensor));
     TensorShape atom_virial_shape;
     atom_virial_shape.AddDim(nframes);
-    atom_virial_shape.AddDim(9 * nall);
+    atom_virial_shape.AddDim(9 * static_cast<int64_t>(nall));
     Tensor* atom_virial_tensor = NULL;
     OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape,
                                                      &atom_virial_tensor));
diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc
index b06e273453..02feba4eee 100644
--- a/source/op/prod_virial_grad.cc
+++ b/source/op/prod_virial_grad.cc
@@ -101,7 +101,7 @@ class ProdVirialGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/prod_virial_multi_device.cc b/source/op/prod_virial_multi_device.cc
index 23b312b797..a544b010c5 100644
--- a/source/op/prod_virial_multi_device.cc
+++ b/source/op/prod_virial_multi_device.cc
@@ -93,7 +93,7 @@ class ProdVirialSeAOp : public OpKernel {
     virial_shape.AddDim(9);
     TensorShape atom_virial_shape;
     atom_virial_shape.AddDim(nframes);
-    atom_virial_shape.AddDim(9 * nall);
+    atom_virial_shape.AddDim(9 * static_cast<int64_t>(nall));
     int context_output_index = 0;
     Tensor* virial_tensor = NULL;
     OP_REQUIRES_OK(
@@ -192,7 +192,7 @@ class ProdVirialSeROp : public OpKernel {
     virial_shape.AddDim(9);
     TensorShape atom_virial_shape;
     atom_virial_shape.AddDim(nframes);
-    atom_virial_shape.AddDim(9 * nall);
+    atom_virial_shape.AddDim(9 * static_cast<int64_t>(nall));
     int context_output_index = 0;
     Tensor* virial_tensor = NULL;
     OP_REQUIRES_OK(
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc
index a22401d654..d6c55b6969 100644
--- a/source/op/prod_virial_se_a_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -94,7 +94,7 @@ class ProdVirialSeAGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc
index b874c828df..40c2828ca7 100644
--- a/source/op/prod_virial_se_r_grad.cc
+++ b/source/op/prod_virial_se_r_grad.cc
@@ -86,7 +86,7 @@ class ProdVirialSeRGradOp : public OpKernel {
     // Create an output tensor
     TensorShape grad_net_shape;
     grad_net_shape.AddDim(nframes);
-    grad_net_shape.AddDim(nloc * ndescrpt);
+    grad_net_shape.AddDim(static_cast<int64_t>(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
diff --git a/source/op/soft_min.cc b/source/op/soft_min.cc
index 4062ddc4cb..85aade5e7b 100644
--- a/source/op/soft_min.cc
+++ b/source/op/soft_min.cc
@@ -94,7 +94,7 @@ class SoftMinSwitchOp : public OpKernel {
     sw_value_shape.AddDim(nloc);
     TensorShape sw_deriv_shape;
     sw_deriv_shape.AddDim(nframes);
-    sw_deriv_shape.AddDim(3 * nnei * nloc);
+    sw_deriv_shape.AddDim(3 * static_cast<int64_t>(nnei) * nloc);
     Tensor* sw_value_tensor = NULL;
     Tensor* sw_deriv_tensor = NULL;
     tmp_idx = 0;
diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc
index a2970f4c3a..0801170597 100644
--- a/source/op/soft_min_force.cc
+++ b/source/op/soft_min_force.cc
@@ -73,7 +73,7 @@ class SoftMinForceOp : public OpKernel {
     // Create an output tensor
     TensorShape force_shape;
     force_shape.AddDim(nframes);
-    force_shape.AddDim(3 * nall);
+    force_shape.AddDim(3 * static_cast<int64_t>(nall));
     Tensor* force_tensor = NULL;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, force_shape, &force_tensor));
diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc
index 91a94e01c3..26daa78604 100644
--- a/source/op/soft_min_virial.cc
+++ b/source/op/soft_min_virial.cc
@@ -89,7 +89,7 @@ class SoftMinVirialOp : public OpKernel {
                    context->allocate_output(0, virial_shape, &virial_tensor));
     TensorShape atom_virial_shape;
     atom_virial_shape.AddDim(nframes);
-    atom_virial_shape.AddDim(9 * nall);
+    atom_virial_shape.AddDim(9 * static_cast<int64_t>(nall));
     Tensor* atom_virial_tensor = NULL;
     OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape,
                                                      &atom_virial_tensor));

From 9be1ad29f2054a621495ecd05557094bc1a1c5f0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 17 Jan 2024 11:26:41 +0800
Subject: [PATCH 86/97] [pre-commit.ci] pre-commit autoupdate (#3147)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.11 →
v0.1.13](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.11...v0.1.13)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5b6beb1dba..d4e89f1129 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.11
+    rev: v0.1.13
     hooks:
     - id: ruff
       args: ["--fix"]

From 850575a6ac1235359ad7c2ef989df08feda6406f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 17 Jan 2024 00:00:38 -0500
Subject: [PATCH 87/97] cc: refactor DeepTensor for multiple-backend framework
 (#3151)

See #3119

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/include/DeepTensor.h   | 263 +++++---
 source/api_cc/include/DeepTensorTF.h | 395 ++++++++++++
 source/api_cc/src/DeepTensor.cc      | 666 ++------------------
 source/api_cc/src/DeepTensorTF.cc    | 900 +++++++++++++++++++++++++++
 4 files changed, 1512 insertions(+), 712 deletions(-)
 create mode 100644 source/api_cc/include/DeepTensorTF.h
 create mode 100644 source/api_cc/src/DeepTensorTF.cc

diff --git a/source/api_cc/include/DeepTensor.h b/source/api_cc/include/DeepTensor.h
index af535cc9de..6150eca970 100644
--- a/source/api_cc/include/DeepTensor.h
+++ b/source/api_cc/include/DeepTensor.h
@@ -1,10 +1,184 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #pragma once
 
+#include <memory>
+
 #include "common.h"
 #include "neighbor_list.h"
 
 namespace deepmd {
+/**
+ * @brief Deep Tensor.
+ **/
+class DeepTensorBase {
+ public:
+  /**
+   * @brief Deep Tensor constructor without initialization.
+   **/
+  DeepTensorBase(){};
+  virtual ~DeepTensorBase(){};
+  /**
+   * @brief Deep Tensor constructor with initialization..
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  DeepTensorBase(const std::string& model,
+                 const int& gpu_rank = 0,
+                 const std::string& name_scope = "");
+  /**
+   * @brief Initialize the Deep Tensor.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  virtual void init(const std::string& model,
+                    const int& gpu_rank = 0,
+                    const std::string& name_scope = "") = 0;
+
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @{
+   **/
+  virtual void computew(std::vector<double>& value,
+                        const std::vector<double>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<double>& box) = 0;
+  virtual void computew(std::vector<float>& value,
+                        const std::vector<float>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<float>& box) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @{
+   **/
+  virtual void computew(std::vector<double>& value,
+                        const std::vector<double>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<double>& box,
+                        const int nghost,
+                        const InputNlist& inlist) = 0;
+  virtual void computew(std::vector<float>& value,
+                        const std::vector<float>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<float>& box,
+                        const int nghost,
+                        const InputNlist& inlist) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @{
+   **/
+  virtual void computew(std::vector<double>& global_tensor,
+                        std::vector<double>& force,
+                        std::vector<double>& virial,
+                        std::vector<double>& atom_tensor,
+                        std::vector<double>& atom_virial,
+                        const std::vector<double>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<double>& box) = 0;
+  virtual void computew(std::vector<float>& global_tensor,
+                        std::vector<float>& force,
+                        std::vector<float>& virial,
+                        std::vector<float>& atom_tensor,
+                        std::vector<float>& atom_virial,
+                        const std::vector<float>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<float>& box) = 0;
+  /** @} */
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @{
+   **/
+  virtual void computew(std::vector<double>& global_tensor,
+                        std::vector<double>& force,
+                        std::vector<double>& virial,
+                        std::vector<double>& atom_tensor,
+                        std::vector<double>& atom_virial,
+                        const std::vector<double>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<double>& box,
+                        const int nghost,
+                        const InputNlist& inlist) = 0;
+  virtual void computew(std::vector<float>& global_tensor,
+                        std::vector<float>& force,
+                        std::vector<float>& virial,
+                        std::vector<float>& atom_tensor,
+                        std::vector<float>& atom_virial,
+                        const std::vector<float>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<float>& box,
+                        const int nghost,
+                        const InputNlist& inlist) = 0;
+  /** @} */
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  virtual double cutoff() const = 0;
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  virtual int numb_types() const = 0;
+  /**
+   * @brief Get the output dimension.
+   * @return The output dimension.
+   **/
+  virtual int output_dim() const = 0;
+  /**
+   * @brief Get the list of sel types.
+   * @return The list of sel types.
+   */
+  virtual const std::vector<int>& sel_types() const = 0;
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  virtual void get_type_map(std::string& type_map) = 0;
+};
+
 /**
  * @brief Deep Tensor.
  **/
@@ -169,34 +343,22 @@ class DeepTensor {
    * @brief Get the cutoff radius.
    * @return The cutoff radius.
    **/
-  double cutoff() const {
-    assert(inited);
-    return rcut;
-  };
+  double cutoff() const;
   /**
    * @brief Get the number of types.
    * @return The number of types.
    **/
-  int numb_types() const {
-    assert(inited);
-    return ntypes;
-  };
+  int numb_types() const;
   /**
    * @brief Get the output dimension.
    * @return The output dimension.
    **/
-  int output_dim() const {
-    assert(inited);
-    return odim;
-  };
+  int output_dim() const;
   /**
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  const std::vector<int>& sel_types() const {
-    assert(inited);
-    return sel_type;
-  };
+  const std::vector<int>& sel_types() const;
   /**
    * @brief Get the type map (element name of the atom types) of this model.
    * @param[out] type_map The type map of this model.
@@ -204,74 +366,7 @@ class DeepTensor {
   void get_type_map(std::string& type_map);
 
  private:
-  tensorflow::Session* session;
-  std::string name_scope;
-  int num_intra_nthreads, num_inter_nthreads;
-  tensorflow::GraphDef* graph_def;
   bool inited;
-  double rcut;
-  int dtype;
-  double cell_size;
-  int ntypes;
-  std::string model_type;
-  std::string model_version;
-  int odim;
-  std::vector<int> sel_type;
-  template <class VT>
-  VT get_scalar(const std::string& name) const;
-  template <class VT>
-  void get_vector(std::vector<VT>& vec, const std::string& name) const;
-  template <typename MODELTYPE, typename VALUETYPE>
-  void run_model(std::vector<VALUETYPE>& d_tensor_,
-                 tensorflow::Session* session,
-                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
-                     input_tensors,
-                 const AtomMap& atommap,
-                 const std::vector<int>& sel_fwd,
-                 const int nghost = 0);
-  template <typename MODELTYPE, typename VALUETYPE>
-  void run_model(std::vector<VALUETYPE>& dglobal_tensor_,
-                 std::vector<VALUETYPE>& dforce_,
-                 std::vector<VALUETYPE>& dvirial_,
-                 std::vector<VALUETYPE>& datom_tensor_,
-                 std::vector<VALUETYPE>& datom_virial_,
-                 tensorflow::Session* session,
-                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
-                     input_tensors,
-                 const AtomMap& atommap,
-                 const std::vector<int>& sel_fwd,
-                 const int nghost = 0);
-  template <typename VALUETYPE>
-  void compute_inner(std::vector<VALUETYPE>& value,
-                     const std::vector<VALUETYPE>& coord,
-                     const std::vector<int>& atype,
-                     const std::vector<VALUETYPE>& box);
-  template <typename VALUETYPE>
-  void compute_inner(std::vector<VALUETYPE>& value,
-                     const std::vector<VALUETYPE>& coord,
-                     const std::vector<int>& atype,
-                     const std::vector<VALUETYPE>& box,
-                     const int nghost,
-                     const InputNlist& inlist);
-  template <typename VALUETYPE>
-  void compute_inner(std::vector<VALUETYPE>& global_tensor,
-                     std::vector<VALUETYPE>& force,
-                     std::vector<VALUETYPE>& virial,
-                     std::vector<VALUETYPE>& atom_tensor,
-                     std::vector<VALUETYPE>& atom_virial,
-                     const std::vector<VALUETYPE>& coord,
-                     const std::vector<int>& atype,
-                     const std::vector<VALUETYPE>& box);
-  template <typename VALUETYPE>
-  void compute_inner(std::vector<VALUETYPE>& global_tensor,
-                     std::vector<VALUETYPE>& force,
-                     std::vector<VALUETYPE>& virial,
-                     std::vector<VALUETYPE>& atom_tensor,
-                     std::vector<VALUETYPE>& atom_virial,
-                     const std::vector<VALUETYPE>& coord,
-                     const std::vector<int>& atype,
-                     const std::vector<VALUETYPE>& box,
-                     const int nghost,
-                     const InputNlist& inlist);
+  std::shared_ptr<deepmd::DeepTensorBase> dt;
 };
 }  // namespace deepmd
diff --git a/source/api_cc/include/DeepTensorTF.h b/source/api_cc/include/DeepTensorTF.h
new file mode 100644
index 0000000000..2ba7697076
--- /dev/null
+++ b/source/api_cc/include/DeepTensorTF.h
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#pragma once
+
+#include "DeepTensor.h"
+#include "common.h"
+#include "neighbor_list.h"
+
+namespace deepmd {
+/**
+ * @brief Deep Tensor.
+ **/
+class DeepTensorTF : public DeepTensorBase {
+ public:
+  /**
+   * @brief Deep Tensor constructor without initialization.
+   **/
+  DeepTensorTF();
+  ~DeepTensorTF();
+  /**
+   * @brief Deep Tensor constructor with initialization..
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  DeepTensorTF(const std::string& model,
+               const int& gpu_rank = 0,
+               const std::string& name_scope = "");
+  /**
+   * @brief Initialize the Deep Tensor.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& name_scope = "");
+  /**
+   * @brief Print the DP summary to the screen.
+   * @param[in] pre The prefix to each line.
+   **/
+  void print_summary(const std::string& pre) const;
+
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& value,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& value,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_tensor,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_tensor,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
+  /**
+   * @brief Get the output dimension.
+   * @return The output dimension.
+   **/
+  int output_dim() const {
+    assert(inited);
+    return odim;
+  };
+  /**
+   * @brief Get the list of sel types.
+   * @return The list of sel types.
+   */
+  const std::vector<int>& sel_types() const {
+    assert(inited);
+    return sel_type;
+  };
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string& type_map);
+
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @{
+   **/
+  void computew(std::vector<double>& value,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box);
+  void computew(std::vector<float>& value,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box);
+  /** @} */
+  /**
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @{
+   **/
+  void computew(std::vector<double>& value,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist);
+  void computew(std::vector<float>& value,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist);
+  /** @} */
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @{
+   **/
+  void computew(std::vector<double>& global_tensor,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_tensor,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box);
+  void computew(std::vector<float>& global_tensor,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_tensor,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box);
+  /** @} */
+  /**
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @{
+   **/
+  void computew(std::vector<double>& global_tensor,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_tensor,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist);
+  void computew(std::vector<float>& global_tensor,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_tensor,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist);
+  /** @} */
+
+ private:
+  tensorflow::Session* session;
+  std::string name_scope;
+  int num_intra_nthreads, num_inter_nthreads;
+  tensorflow::GraphDef* graph_def;
+  bool inited;
+  double rcut;
+  int dtype;
+  double cell_size;
+  int ntypes;
+  std::string model_type;
+  std::string model_version;
+  int odim;
+  std::vector<int> sel_type;
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
+  template <class VT>
+  void get_vector(std::vector<VT>& vec, const std::string& name) const;
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& d_tensor_,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const std::vector<int>& sel_fwd,
+                 const int nghost = 0);
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& dglobal_tensor_,
+                 std::vector<VALUETYPE>& dforce_,
+                 std::vector<VALUETYPE>& dvirial_,
+                 std::vector<VALUETYPE>& datom_tensor_,
+                 std::vector<VALUETYPE>& datom_virial_,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const std::vector<int>& sel_fwd,
+                 const int nghost = 0);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& value,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& value,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box,
+                     const int nghost,
+                     const InputNlist& inlist);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& global_tensor,
+                     std::vector<VALUETYPE>& force,
+                     std::vector<VALUETYPE>& virial,
+                     std::vector<VALUETYPE>& atom_tensor,
+                     std::vector<VALUETYPE>& atom_virial,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& global_tensor,
+                     std::vector<VALUETYPE>& force,
+                     std::vector<VALUETYPE>& virial,
+                     std::vector<VALUETYPE>& atom_tensor,
+                     std::vector<VALUETYPE>& atom_virial,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box,
+                     const int nghost,
+                     const InputNlist& inlist);
+};
+}  // namespace deepmd
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 11a131a604..9d4e71c3d3 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -1,25 +1,23 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DeepTensor.h"
 
+#include <memory>
+
+#include "DeepTensorTF.h"
+#include "common.h"
+
 using namespace deepmd;
-using namespace tensorflow;
 
-DeepTensor::DeepTensor() : inited(false), graph_def(new GraphDef()) {}
+DeepTensor::DeepTensor() : inited(false) {}
 
 DeepTensor::DeepTensor(const std::string &model,
                        const int &gpu_rank,
                        const std::string &name_scope_)
-    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  try {
-    init(model, gpu_rank, name_scope_);
-  } catch (...) {
-    // Clean up and rethrow, as the destructor will not be called
-    delete graph_def;
-    throw;
-  }
+    : inited(false) {
+  init(model, gpu_rank, name_scope_);
 }
 
-DeepTensor::~DeepTensor() { delete graph_def; }
+DeepTensor::~DeepTensor() {}
 
 void DeepTensor::init(const std::string &model,
                       const int &gpu_rank,
@@ -30,53 +28,18 @@ void DeepTensor::init(const std::string &model,
               << std::endl;
     return;
   }
-  name_scope = name_scope_;
-  SessionOptions options;
-  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
-  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
-  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
-  deepmd::load_op_library();
-  int gpu_num = -1;
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  DPGetDeviceCount(gpu_num);  // check current device environment
-  if (gpu_num > 0) {
-    options.config.set_allow_soft_placement(true);
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
-        0.9);
-    options.config.mutable_gpu_options()->set_allow_growth(true);
-    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
-    graph::SetDefaultDevice(str, graph_def);
-  }
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  deepmd::check_status(NewSession(options, &session));
-  deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
-  deepmd::check_status(session->Create(*graph_def));
-  try {
-    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  } catch (deepmd::tf_exception &e) {
-    // no model version defined in old models
-    model_version = "0.0";
-  }
-  if (!model_compatable(model_version)) {
-    throw deepmd::deepmd_exception(
-        "incompatable model: version " + model_version +
-        " in graph, but version " + global_model_version +
-        " supported "
-        "See https://deepmd.rtfd.io/compatability/ for details.");
-  }
-  dtype = session_get_dtype(session, "descrpt_attr/rcut");
-  if (dtype == tensorflow::DT_DOUBLE) {
-    rcut = get_scalar<double>("descrpt_attr/rcut");
+  // TODO: To implement detect_backend
+  DPBackend backend = deepmd::DPBackend::TensorFlow;
+  if (deepmd::DPBackend::TensorFlow == backend) {
+    // TODO: throw errors if TF backend is not built, without mentioning TF
+    dt = std::make_shared<deepmd::DeepTensorTF>(model, gpu_rank, name_scope_);
+  } else if (deepmd::DPBackend::PyTorch == backend) {
+    throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
+  } else if (deepmd::DPBackend::Paddle == backend) {
+    throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet");
   } else {
-    rcut = get_scalar<float>("descrpt_attr/rcut");
+    throw deepmd::deepmd_exception("Unknown file type");
   }
-  cell_size = rcut;
-  ntypes = get_scalar<int>("descrpt_attr/ntypes");
-  odim = get_scalar<int>("model_attr/output_dim");
-  get_vector<int>(sel_type, "model_attr/sel_type");
-  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
   inited = true;
 }
 
@@ -84,267 +47,12 @@ void DeepTensor::print_summary(const std::string &pre) const {
   deepmd::print_summary(pre);
 }
 
-template <class VT>
-VT DeepTensor::get_scalar(const std::string &name) const {
-  return session_get_scalar<VT>(session, name, name_scope);
-}
-
-template <class VT>
-void DeepTensor::get_vector(std::vector<VT> &vec,
-                            const std::string &name) const {
-  session_get_vector<VT>(vec, session, name, name_scope);
-}
-
-template <typename MODELTYPE, typename VALUETYPE>
-void DeepTensor::run_model(
-    std::vector<VALUETYPE> &d_tensor_,
-    Session *session,
-    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost) {
-  unsigned nloc = atommap.get_type().size();
-  unsigned nall = nloc + nghost;
-  if (nloc == 0) {
-    // return empty
-    d_tensor_.clear();
-    return;
-  }
-
-  std::vector<Tensor> output_tensors;
-  deepmd::check_status(
-      session->Run(input_tensors, {name_prefix(name_scope) + "o_" + model_type},
-                   {}, &output_tensors));
-
-  Tensor output_t = output_tensors[0];
-  // Yixiao: newer model may output rank 2 tensor [nframes x (natoms x noutdim)]
-  // assert (output_t.dims() == 1), "dim of output tensor should be 1";
-  auto ot = output_t.flat<MODELTYPE>();
-  // this is an Eigen Tensor
-  int o_size = ot.size();
-
-  std::vector<VALUETYPE> d_tensor(o_size);
-  for (unsigned ii = 0; ii < o_size; ++ii) {
-    d_tensor[ii] = ot(ii);
-  }
-  // now we map the type-sorted sel-atom tensor back to original order
-  // first we have to get the type-sorted select map
-  std::vector<int> sel_srt = sel_fwd;
-  select_map<int>(sel_srt, sel_fwd, atommap.get_fwd_map(), 1);
-  // remove those -1 that correspond to discarded atoms
-  std::remove(sel_srt.begin(), sel_srt.end(), -1);
-  // now map the tensor back
-  d_tensor_.resize(o_size);
-  select_map<VALUETYPE>(d_tensor_, d_tensor, sel_srt, odim);
-}
-
-template void DeepTensor::run_model<double, double>(
-    std::vector<double> &d_tensor_,
-    Session *session,
-    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-template void DeepTensor::run_model<float, double>(
-    std::vector<double> &d_tensor_,
-    Session *session,
-    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-template void DeepTensor::run_model<double, float>(
-    std::vector<float> &d_tensor_,
-    Session *session,
-    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-template void DeepTensor::run_model<float, float>(
-    std::vector<float> &d_tensor_,
-    Session *session,
-    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-
-template <typename MODELTYPE, typename VALUETYPE>
-void DeepTensor::run_model(
-    std::vector<VALUETYPE> &dglobal_tensor_,
-    std::vector<VALUETYPE> &dforce_,
-    std::vector<VALUETYPE> &dvirial_,
-    std::vector<VALUETYPE> &datom_tensor_,
-    std::vector<VALUETYPE> &datom_virial_,
-    tensorflow::Session *session,
-    const std::vector<std::pair<std::string, tensorflow::Tensor>>
-        &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost) {
-  unsigned nloc = atommap.get_type().size();
-  unsigned nall = nloc + nghost;
-  unsigned nsel = nloc - std::count(sel_fwd.begin(), sel_fwd.end(), -1);
-  if (nloc == 0) {
-    // return empty
-    dglobal_tensor_.clear();
-    dforce_.clear();
-    dvirial_.clear();
-    return;
-  }
-
-  std::vector<Tensor> output_tensors;
-  deepmd::check_status(
-      session->Run(input_tensors,
-                   {name_prefix(name_scope) + "o_global_" + model_type,
-                    name_prefix(name_scope) + "o_force",
-                    name_prefix(name_scope) + "o_virial",
-                    name_prefix(name_scope) + "o_" + model_type,
-                    name_prefix(name_scope) + "o_atom_virial"},
-                   {}, &output_tensors));
-
-  Tensor output_gt = output_tensors[0];
-  Tensor output_f = output_tensors[1];
-  Tensor output_v = output_tensors[2];
-  Tensor output_at = output_tensors[3];
-  Tensor output_av = output_tensors[4];
-  // this is the new model, output has to be rank 2 tensor
-  assert(output_gt.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_f.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_v.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_at.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_av.dims() == 2 && "dim of output tensor should be 2");
-  // also check the tensor shapes
-  assert(output_gt.dim_size(0) == 1 && "nframes should match");
-  assert(output_gt.dim_size(1) == odim &&
-         "dof of global tensor should be odim");
-  assert(output_f.dim_size(0) == 1 && "nframes should match");
-  assert(output_f.dim_size(1) == odim * nall * 3 &&
-         "dof of force should be odim * nall * 3");
-  assert(output_v.dim_size(0) == 1 && "nframes should match");
-  assert(output_v.dim_size(1) == odim * 9 &&
-         "dof of virial should be odim * 9");
-  assert(output_at.dim_size(0) == 1 && "nframes should match");
-  assert(output_at.dim_size(1) == nsel * odim &&
-         "dof of atomic tensor should be nsel * odim");
-  assert(output_av.dim_size(0) == 1 && "nframes should match");
-  assert(output_av.dim_size(1) == odim * nall * 9 &&
-         "dof of atomic virial should be odim * nall * 9");
-
-  auto ogt = output_gt.flat<ENERGYTYPE>();
-  auto of = output_f.flat<MODELTYPE>();
-  auto ov = output_v.flat<MODELTYPE>();
-  auto oat = output_at.flat<MODELTYPE>();
-  auto oav = output_av.flat<MODELTYPE>();
-
-  // global tensor
-  dglobal_tensor_.resize(odim);
-  for (unsigned ii = 0; ii < odim; ++ii) {
-    dglobal_tensor_[ii] = ogt(ii);
-  }
-
-  // component-wise force
-  std::vector<VALUETYPE> dforce(3 * nall * odim);
-  for (unsigned ii = 0; ii < odim * nall * 3; ++ii) {
-    dforce[ii] = of(ii);
-  }
-  dforce_ = dforce;
-  for (unsigned dd = 0; dd < odim; ++dd) {
-    atommap.backward<VALUETYPE>(dforce_.begin() + (dd * nall * 3),
-                                dforce.begin() + (dd * nall * 3), 3);
-  }
-
-  // component-wise virial
-  dvirial_.resize(static_cast<size_t>(odim) * 9);
-  for (unsigned ii = 0; ii < odim * 9; ++ii) {
-    dvirial_[ii] = ov(ii);
-  }
-
-  // atomic tensor
-  std::vector<VALUETYPE> datom_tensor(nsel * odim);
-  for (unsigned ii = 0; ii < nsel * odim; ++ii) {
-    datom_tensor[ii] = oat(ii);
-  }
-  std::vector<int> sel_srt = sel_fwd;
-  select_map<int>(sel_srt, sel_fwd, atommap.get_fwd_map(), 1);
-  std::remove(sel_srt.begin(), sel_srt.end(), -1);
-  datom_tensor_.resize(static_cast<size_t>(nsel) * odim);
-  select_map<VALUETYPE>(datom_tensor_, datom_tensor, sel_srt, odim);
-
-  // component-wise atomic virial
-  std::vector<VALUETYPE> datom_virial(9 * nall * odim);
-  for (unsigned ii = 0; ii < odim * nall * 9; ++ii) {
-    datom_virial[ii] = oav(ii);
-  }
-  datom_virial_ = datom_virial;
-  for (unsigned dd = 0; dd < odim; ++dd) {
-    atommap.backward<VALUETYPE>(datom_virial_.begin() + (dd * nall * 9),
-                                datom_virial.begin() + (dd * nall * 9), 9);
-  }
-}
-
-template void DeepTensor::run_model<double, double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    std::vector<double> &datom_tensor_,
-    std::vector<double> &datom_virial_,
-    tensorflow::Session *session,
-    const std::vector<std::pair<std::string, tensorflow::Tensor>>
-        &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-template void DeepTensor::run_model<float, double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    std::vector<double> &datom_tensor_,
-    std::vector<double> &datom_virial_,
-    tensorflow::Session *session,
-    const std::vector<std::pair<std::string, tensorflow::Tensor>>
-        &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-
-template void DeepTensor::run_model<double, float>(
-    std::vector<float> &dglobal_tensor_,
-    std::vector<float> &dforce_,
-    std::vector<float> &dvirial_,
-    std::vector<float> &datom_tensor_,
-    std::vector<float> &datom_virial_,
-    tensorflow::Session *session,
-    const std::vector<std::pair<std::string, tensorflow::Tensor>>
-        &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-
-template void DeepTensor::run_model<float, float>(
-    std::vector<float> &dglobal_tensor_,
-    std::vector<float> &dforce_,
-    std::vector<float> &dvirial_,
-    std::vector<float> &datom_tensor_,
-    std::vector<float> &datom_virial_,
-    tensorflow::Session *session,
-    const std::vector<std::pair<std::string, tensorflow::Tensor>>
-        &input_tensors,
-    const AtomMap &atommap,
-    const std::vector<int> &sel_fwd,
-    const int nghost);
-
 template <typename VALUETYPE>
 void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
                          const std::vector<VALUETYPE> &dcoord_,
                          const std::vector<int> &datype_,
                          const std::vector<VALUETYPE> &dbox) {
-  int nall = datype_.size();
-  std::vector<VALUETYPE> dcoord, aparam, aparam_;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_, 0,
-                          ntypes, 1, 0, nall);
-  compute_inner(dtensor_, dcoord, datype, dbox);
+  dt->computew(dtensor_, dcoord_, datype_, dbox);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
@@ -364,20 +72,7 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
                          const std::vector<VALUETYPE> &dbox,
                          const int nghost,
                          const InputNlist &lmp_list) {
-  int nall = datype_.size();
-  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, 1, 0, nall);
-  // internal nlist
-  NeighborListData nlist_data;
-  nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(fwd_map);
-  InputNlist nlist;
-  nlist_data.make_inlist(nlist);
-  compute_inner(dtensor_, dcoord, datype, dbox, nghost_real, nlist);
+  dt->computew(dtensor_, dcoord_, datype_, dbox, nghost, lmp_list);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
@@ -401,9 +96,9 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<VALUETYPE> &dcoord_,
                          const std::vector<int> &datype_,
                          const std::vector<VALUETYPE> &dbox) {
-  std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
-          datype_, dbox);
+  std::vector<VALUETYPE> datom_tensor_, datom_virial_;
+  dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
+               dcoord_, datype_, dbox);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -429,9 +124,9 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<VALUETYPE> &dbox,
                          const int nghost,
                          const InputNlist &lmp_list) {
-  std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
-          datype_, dbox, nghost, lmp_list);
+  std::vector<VALUETYPE> datom_tensor_, datom_virial_;
+  dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
+               dcoord_, datype_, dbox, nghost, lmp_list);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -461,34 +156,8 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<VALUETYPE> &dcoord_,
                          const std::vector<int> &datype_,
                          const std::vector<VALUETYPE> &dbox) {
-  int nall = datype_.size();
-  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_, 0,
-                          ntypes, 1, 0, nall);
-  assert(nghost_real == 0);
-  // resize to nall_real
-  dcoord.resize(bkw_map.size() * 3);
-  datype.resize(bkw_map.size());
-  // fwd map
-  select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
-  select_map<int>(datype, datype_, fwd_map, 1);
-  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
-                dcoord, datype, dbox);
-  // bkw map
-  dforce_.resize(odim * fwd_map.size() * 3);
-  for (int kk = 0; kk < odim; ++kk) {
-    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
-                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
-  }
-  datom_virial_.resize(odim * fwd_map.size() * 9);
-  for (int kk = 0; kk < odim; ++kk) {
-    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
-                          datom_virial.begin() + kk * bkw_map.size() * 9,
-                          bkw_map, 9);
-  }
+  dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
+               dcoord_, datype_, dbox);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -520,33 +189,8 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<VALUETYPE> &dbox,
                          const int nghost,
                          const InputNlist &lmp_list) {
-  int nall = datype_.size();
-  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, 1, 0, nall);
-  // internal nlist
-  NeighborListData nlist_data;
-  nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(fwd_map);
-  InputNlist nlist;
-  nlist_data.make_inlist(nlist);
-  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
-                dcoord, datype, dbox, nghost_real, nlist);
-  // bkw map
-  dforce_.resize(odim * fwd_map.size() * 3);
-  for (int kk = 0; kk < odim; ++kk) {
-    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
-                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
-  }
-  datom_virial_.resize(odim * fwd_map.size() * 9);
-  for (int kk = 0; kk < odim; ++kk) {
-    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
-                          datom_virial.begin() + kk * bkw_map.size() * 9,
-                          bkw_map, 9);
-  }
+  dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
+               dcoord_, datype_, dbox, nghost, lmp_list);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -571,250 +215,16 @@ template void DeepTensor::compute<float>(std::vector<float> &dglobal_tensor_,
                                          const int nghost,
                                          const InputNlist &lmp_list);
 
-template <typename VALUETYPE>
-void DeepTensor::compute_inner(std::vector<VALUETYPE> &dtensor_,
-                               const std::vector<VALUETYPE> &dcoord_,
-                               const std::vector<int> &datype_,
-                               const std::vector<VALUETYPE> &dbox) {
-  int nall = dcoord_.size() / 3;
-  int nloc = nall;
-  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-
-  std::vector<int> sel_fwd, sel_bkw;
-  int nghost_sel;
-  // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, 0, sel_type);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        name_scope);
-    assert(ret == nloc);
-    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        name_scope);
-    assert(ret == nloc);
-    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd);
-  }
-}
-
-template void DeepTensor::compute_inner<double>(
-    std::vector<double> &dtensor_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox);
-
-template void DeepTensor::compute_inner<float>(
-    std::vector<float> &dtensor_,
-    const std::vector<float> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<float> &dbox);
-
-template <typename VALUETYPE>
-void DeepTensor::compute_inner(std::vector<VALUETYPE> &dtensor_,
-                               const std::vector<VALUETYPE> &dcoord_,
-                               const std::vector<int> &datype_,
-                               const std::vector<VALUETYPE> &dbox,
-                               const int nghost,
-                               const InputNlist &nlist_) {
-  int nall = dcoord_.size() / 3;
-  int nloc = nall - nghost;
-  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-
-  std::vector<int> sel_fwd, sel_bkw;
-  int nghost_sel;
-  // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
-                 sel_type);
-  sel_fwd.resize(nloc);
-
-  NeighborListData nlist_data;
-  nlist_data.copy_from_nlist(nlist_);
-  nlist_data.shuffle(atommap);
-  InputNlist nlist;
-  nlist_data.make_inlist(nlist);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
-        name_scope);
-    assert(nloc == ret);
-    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd,
-                      nghost);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
-        name_scope);
-    assert(nloc == ret);
-    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd,
-                     nghost);
-  }
-}
-
-template void DeepTensor::compute_inner<double>(
-    std::vector<double> &dtensor_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox,
-    const int nghost,
-    const InputNlist &nlist_);
-
-template void DeepTensor::compute_inner<float>(
-    std::vector<float> &dtensor_,
-    const std::vector<float> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<float> &dbox,
-    const int nghost,
-    const InputNlist &nlist_);
-
-template <typename VALUETYPE>
-void DeepTensor::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
-                               std::vector<VALUETYPE> &dforce_,
-                               std::vector<VALUETYPE> &dvirial_,
-                               std::vector<VALUETYPE> &datom_tensor_,
-                               std::vector<VALUETYPE> &datom_virial_,
-                               const std::vector<VALUETYPE> &dcoord_,
-                               const std::vector<int> &datype_,
-                               const std::vector<VALUETYPE> &dbox) {
-  int nall = dcoord_.size() / 3;
-  int nloc = nall;
-  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-
-  std::vector<int> sel_fwd, sel_bkw;
-  int nghost_sel;
-  // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, 0, sel_type);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        name_scope);
-    assert(ret == nloc);
-    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
-                      datom_virial_, session, input_tensors, atommap, sel_fwd);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        name_scope);
-    assert(ret == nloc);
-    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
-                     datom_virial_, session, input_tensors, atommap, sel_fwd);
-  }
+void DeepTensor::get_type_map(std::string &type_map) {
+  dt->get_type_map(type_map);
 }
 
-template void DeepTensor::compute_inner<double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    std::vector<double> &datom_tensor_,
-    std::vector<double> &datom_virial_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox);
+double DeepTensor::cutoff() const { return dt->cutoff(); }
 
-template void DeepTensor::compute_inner<float>(
-    std::vector<float> &dglobal_tensor_,
-    std::vector<float> &dforce_,
-    std::vector<float> &dvirial_,
-    std::vector<float> &datom_tensor_,
-    std::vector<float> &datom_virial_,
-    const std::vector<float> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<float> &dbox);
+int DeepTensor::output_dim() const { return dt->output_dim(); }
 
-template <typename VALUETYPE>
-void DeepTensor::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
-                               std::vector<VALUETYPE> &dforce_,
-                               std::vector<VALUETYPE> &dvirial_,
-                               std::vector<VALUETYPE> &datom_tensor_,
-                               std::vector<VALUETYPE> &datom_virial_,
-                               const std::vector<VALUETYPE> &dcoord_,
-                               const std::vector<int> &datype_,
-                               const std::vector<VALUETYPE> &dbox,
-                               const int nghost,
-                               const InputNlist &nlist_) {
-  int nall = dcoord_.size() / 3;
-  int nloc = nall - nghost;
-  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-
-  std::vector<int> sel_fwd, sel_bkw;
-  int nghost_sel;
-  // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
-                 sel_type);
-  sel_fwd.resize(nloc);
-
-  NeighborListData nlist_data;
-  nlist_data.copy_from_nlist(nlist_);
-  nlist_data.shuffle(atommap);
-  InputNlist nlist;
-  nlist_data.make_inlist(nlist);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
-        name_scope);
-    assert(nloc == ret);
-    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
-                      datom_virial_, session, input_tensors, atommap, sel_fwd,
-                      nghost);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
-        name_scope);
-    assert(nloc == ret);
-    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
-                     datom_virial_, session, input_tensors, atommap, sel_fwd,
-                     nghost);
-  }
+const std::vector<int> &DeepTensor::sel_types() const {
+  return dt->sel_types();
 }
 
-template void DeepTensor::compute_inner<double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    std::vector<double> &datom_tensor_,
-    std::vector<double> &datom_virial_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox,
-    const int nghost,
-    const InputNlist &nlist_);
-
-template void DeepTensor::compute_inner<float>(
-    std::vector<float> &dglobal_tensor_,
-    std::vector<float> &dforce_,
-    std::vector<float> &dvirial_,
-    std::vector<float> &datom_tensor_,
-    std::vector<float> &datom_virial_,
-    const std::vector<float> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<float> &dbox,
-    const int nghost,
-    const InputNlist &nlist_);
-
-void DeepTensor::get_type_map(std::string &type_map) {
-  type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
-}
+int DeepTensor::numb_types() const { return dt->numb_types(); }
diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc
new file mode 100644
index 0000000000..75399e9f39
--- /dev/null
+++ b/source/api_cc/src/DeepTensorTF.cc
@@ -0,0 +1,900 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include "DeepTensorTF.h"
+
+using namespace deepmd;
+using namespace tensorflow;
+
+DeepTensorTF::DeepTensorTF() : inited(false), graph_def(new GraphDef()) {}
+
+DeepTensorTF::DeepTensorTF(const std::string &model,
+                           const int &gpu_rank,
+                           const std::string &name_scope_)
+    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
+}
+
+DeepTensorTF::~DeepTensorTF() { delete graph_def; }
+
+void DeepTensorTF::init(const std::string &model,
+                        const int &gpu_rank,
+                        const std::string &name_scope_) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
+  }
+  name_scope = name_scope_;
+  SessionOptions options;
+  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
+  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
+  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
+  deepmd::load_op_library();
+  int gpu_num = -1;
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  DPGetDeviceCount(gpu_num);  // check current device environment
+  if (gpu_num > 0) {
+    options.config.set_allow_soft_placement(true);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
+        0.9);
+    options.config.mutable_gpu_options()->set_allow_growth(true);
+    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
+    std::string str = "/gpu:";
+    str += std::to_string(gpu_rank % gpu_num);
+    graph::SetDefaultDevice(str, graph_def);
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  deepmd::check_status(NewSession(options, &session));
+  deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
+  deepmd::check_status(session->Create(*graph_def));
+  try {
+    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
+  } catch (deepmd::tf_exception &e) {
+    // no model version defined in old models
+    model_version = "0.0";
+  }
+  if (!model_compatable(model_version)) {
+    throw deepmd::deepmd_exception(
+        "incompatable model: version " + model_version +
+        " in graph, but version " + global_model_version +
+        " supported "
+        "See https://deepmd.rtfd.io/compatability/ for details.");
+  }
+  dtype = session_get_dtype(session, "descrpt_attr/rcut");
+  if (dtype == tensorflow::DT_DOUBLE) {
+    rcut = get_scalar<double>("descrpt_attr/rcut");
+  } else {
+    rcut = get_scalar<float>("descrpt_attr/rcut");
+  }
+  cell_size = rcut;
+  ntypes = get_scalar<int>("descrpt_attr/ntypes");
+  odim = get_scalar<int>("model_attr/output_dim");
+  get_vector<int>(sel_type, "model_attr/sel_type");
+  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
+  inited = true;
+}
+
+template <class VT>
+VT DeepTensorTF::get_scalar(const std::string &name) const {
+  return session_get_scalar<VT>(session, name, name_scope);
+}
+
+template <class VT>
+void DeepTensorTF::get_vector(std::vector<VT> &vec,
+                              const std::string &name) const {
+  session_get_vector<VT>(vec, session, name, name_scope);
+}
+
+template <typename MODELTYPE, typename VALUETYPE>
+void DeepTensorTF::run_model(
+    std::vector<VALUETYPE> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  if (nloc == 0) {
+    // return empty
+    d_tensor_.clear();
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  deepmd::check_status(
+      session->Run(input_tensors, {name_prefix(name_scope) + "o_" + model_type},
+                   {}, &output_tensors));
+
+  Tensor output_t = output_tensors[0];
+  // Yixiao: newer model may output rank 2 tensor [nframes x (natoms x noutdim)]
+  // assert (output_t.dims() == 1), "dim of output tensor should be 1";
+  auto ot = output_t.flat<MODELTYPE>();
+  // this is an Eigen Tensor
+  int o_size = ot.size();
+
+  std::vector<VALUETYPE> d_tensor(o_size);
+  for (unsigned ii = 0; ii < o_size; ++ii) {
+    d_tensor[ii] = ot(ii);
+  }
+  // now we map the type-sorted sel-atom tensor back to original order
+  // first we have to get the type-sorted select map
+  std::vector<int> sel_srt = sel_fwd;
+  select_map<int>(sel_srt, sel_fwd, atommap.get_fwd_map(), 1);
+  // remove those -1 that correspond to discarded atoms
+  std::remove(sel_srt.begin(), sel_srt.end(), -1);
+  // now map the tensor back
+  d_tensor_.resize(o_size);
+  select_map<VALUETYPE>(d_tensor_, d_tensor, sel_srt, odim);
+}
+
+template void DeepTensorTF::run_model<double, double>(
+    std::vector<double> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensorTF::run_model<float, double>(
+    std::vector<double> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensorTF::run_model<double, float>(
+    std::vector<float> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensorTF::run_model<float, float>(
+    std::vector<float> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template <typename MODELTYPE, typename VALUETYPE>
+void DeepTensorTF::run_model(
+    std::vector<VALUETYPE> &dglobal_tensor_,
+    std::vector<VALUETYPE> &dforce_,
+    std::vector<VALUETYPE> &dvirial_,
+    std::vector<VALUETYPE> &datom_tensor_,
+    std::vector<VALUETYPE> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  unsigned nsel = nloc - std::count(sel_fwd.begin(), sel_fwd.end(), -1);
+  if (nloc == 0) {
+    // return empty
+    dglobal_tensor_.clear();
+    dforce_.clear();
+    dvirial_.clear();
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  deepmd::check_status(
+      session->Run(input_tensors,
+                   {name_prefix(name_scope) + "o_global_" + model_type,
+                    name_prefix(name_scope) + "o_force",
+                    name_prefix(name_scope) + "o_virial",
+                    name_prefix(name_scope) + "o_" + model_type,
+                    name_prefix(name_scope) + "o_atom_virial"},
+                   {}, &output_tensors));
+
+  Tensor output_gt = output_tensors[0];
+  Tensor output_f = output_tensors[1];
+  Tensor output_v = output_tensors[2];
+  Tensor output_at = output_tensors[3];
+  Tensor output_av = output_tensors[4];
+  // this is the new model, output has to be rank 2 tensor
+  assert(output_gt.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_f.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_v.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_at.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_av.dims() == 2 && "dim of output tensor should be 2");
+  // also check the tensor shapes
+  assert(output_gt.dim_size(0) == 1 && "nframes should match");
+  assert(output_gt.dim_size(1) == odim &&
+         "dof of global tensor should be odim");
+  assert(output_f.dim_size(0) == 1 && "nframes should match");
+  assert(output_f.dim_size(1) == odim * nall * 3 &&
+         "dof of force should be odim * nall * 3");
+  assert(output_v.dim_size(0) == 1 && "nframes should match");
+  assert(output_v.dim_size(1) == odim * 9 &&
+         "dof of virial should be odim * 9");
+  assert(output_at.dim_size(0) == 1 && "nframes should match");
+  assert(output_at.dim_size(1) == nsel * odim &&
+         "dof of atomic tensor should be nsel * odim");
+  assert(output_av.dim_size(0) == 1 && "nframes should match");
+  assert(output_av.dim_size(1) == odim * nall * 9 &&
+         "dof of atomic virial should be odim * nall * 9");
+
+  auto ogt = output_gt.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto ov = output_v.flat<MODELTYPE>();
+  auto oat = output_at.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
+
+  // global tensor
+  dglobal_tensor_.resize(odim);
+  for (unsigned ii = 0; ii < odim; ++ii) {
+    dglobal_tensor_[ii] = ogt(ii);
+  }
+
+  // component-wise force
+  std::vector<VALUETYPE> dforce(3 * static_cast<size_t>(nall) * odim);
+  for (unsigned ii = 0; ii < odim * nall * 3; ++ii) {
+    dforce[ii] = of(ii);
+  }
+  dforce_ = dforce;
+  for (unsigned dd = 0; dd < odim; ++dd) {
+    atommap.backward<VALUETYPE>(dforce_.begin() + (dd * nall * 3),
+                                dforce.begin() + (dd * nall * 3), 3);
+  }
+
+  // component-wise virial
+  dvirial_.resize(static_cast<size_t>(odim) * 9);
+  for (unsigned ii = 0; ii < odim * 9; ++ii) {
+    dvirial_[ii] = ov(ii);
+  }
+
+  // atomic tensor
+  std::vector<VALUETYPE> datom_tensor(static_cast<size_t>(nsel) * odim);
+  for (unsigned ii = 0; ii < nsel * odim; ++ii) {
+    datom_tensor[ii] = oat(ii);
+  }
+  std::vector<int> sel_srt = sel_fwd;
+  select_map<int>(sel_srt, sel_fwd, atommap.get_fwd_map(), 1);
+  std::remove(sel_srt.begin(), sel_srt.end(), -1);
+  datom_tensor_.resize(static_cast<size_t>(nsel) * odim);
+  select_map<VALUETYPE>(datom_tensor_, datom_tensor, sel_srt, odim);
+
+  // component-wise atomic virial
+  std::vector<VALUETYPE> datom_virial(9 * static_cast<size_t>(nall) * odim);
+  for (unsigned ii = 0; ii < odim * nall * 9; ++ii) {
+    datom_virial[ii] = oav(ii);
+  }
+  datom_virial_ = datom_virial;
+  for (unsigned dd = 0; dd < odim; ++dd) {
+    atommap.backward<VALUETYPE>(datom_virial_.begin() + (dd * nall * 9),
+                                datom_virial.begin() + (dd * nall * 9), 9);
+  }
+}
+
+template void DeepTensorTF::run_model<double, double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensorTF::run_model<float, double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template void DeepTensorTF::run_model<double, float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template void DeepTensorTF::run_model<float, float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dtensor_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox) {
+  int nall = datype_.size();
+  std::vector<VALUETYPE> dcoord, aparam, aparam_;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_, 0,
+                          ntypes, 1, 0, nall);
+  compute_inner(dtensor_, dcoord, datype, dbox);
+}
+
+template void DeepTensorTF::compute<double>(std::vector<double> &dtensor_,
+                                            const std::vector<double> &dcoord_,
+                                            const std::vector<int> &datype_,
+                                            const std::vector<double> &dbox);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dtensor_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dtensor_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox,
+                           const int nghost,
+                           const InputNlist &lmp_list) {
+  int nall = datype_.size();
+  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_,
+                          nghost, ntypes, 1, 0, nall);
+  // internal nlist
+  NeighborListData nlist_data;
+  nlist_data.copy_from_nlist(lmp_list);
+  nlist_data.shuffle_exclude_empty(fwd_map);
+  InputNlist nlist;
+  nlist_data.make_inlist(nlist);
+  compute_inner(dtensor_, dcoord, datype, dbox, nghost_real, nlist);
+}
+
+template void DeepTensorTF::compute<double>(std::vector<double> &dtensor_,
+                                            const std::vector<double> &dcoord_,
+                                            const std::vector<int> &datype_,
+                                            const std::vector<double> &dbox,
+                                            const int nghost,
+                                            const InputNlist &lmp_list);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dtensor_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox,
+                                           const int nghost,
+                                           const InputNlist &lmp_list);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                           std::vector<VALUETYPE> &dforce_,
+                           std::vector<VALUETYPE> &dvirial_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox) {
+  std::vector<VALUETYPE> tmp_at_, tmp_av_;
+  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
+          datype_, dbox);
+}
+
+template void DeepTensorTF::compute<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
+                                           std::vector<float> &dforce_,
+                                           std::vector<float> &dvirial_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                           std::vector<VALUETYPE> &dforce_,
+                           std::vector<VALUETYPE> &dvirial_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox,
+                           const int nghost,
+                           const InputNlist &lmp_list) {
+  std::vector<VALUETYPE> tmp_at_, tmp_av_;
+  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
+          datype_, dbox, nghost, lmp_list);
+}
+
+template void DeepTensorTF::compute<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &lmp_list);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
+                                           std::vector<float> &dforce_,
+                                           std::vector<float> &dvirial_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox,
+                                           const int nghost,
+                                           const InputNlist &lmp_list);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                           std::vector<VALUETYPE> &dforce_,
+                           std::vector<VALUETYPE> &dvirial_,
+                           std::vector<VALUETYPE> &datom_tensor_,
+                           std::vector<VALUETYPE> &datom_virial_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox) {
+  int nall = datype_.size();
+  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_, 0,
+                          ntypes, 1, 0, nall);
+  assert(nghost_real == 0);
+  // resize to nall_real
+  dcoord.resize(bkw_map.size() * 3);
+  datype.resize(bkw_map.size());
+  // fwd map
+  select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
+  select_map<int>(datype, datype_, fwd_map, 1);
+  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
+                dcoord, datype, dbox);
+  // bkw map
+  dforce_.resize(odim * fwd_map.size() * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
+                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
+  }
+  datom_virial_.resize(odim * fwd_map.size() * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
+                          datom_virial.begin() + kk * bkw_map.size() * 9,
+                          bkw_map, 9);
+  }
+}
+
+template void DeepTensorTF::compute<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
+                                           std::vector<float> &dforce_,
+                                           std::vector<float> &dvirial_,
+                                           std::vector<float> &datom_tensor_,
+                                           std::vector<float> &datom_virial_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                           std::vector<VALUETYPE> &dforce_,
+                           std::vector<VALUETYPE> &dvirial_,
+                           std::vector<VALUETYPE> &datom_tensor_,
+                           std::vector<VALUETYPE> &datom_virial_,
+                           const std::vector<VALUETYPE> &dcoord_,
+                           const std::vector<int> &datype_,
+                           const std::vector<VALUETYPE> &dbox,
+                           const int nghost,
+                           const InputNlist &lmp_list) {
+  int nall = datype_.size();
+  std::vector<VALUETYPE> dcoord, dforce, datom_virial, aparam, aparam_;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
+                          nall_real, nloc_real, dcoord_, datype_, aparam_,
+                          nghost, ntypes, 1, 0, nall);
+  // internal nlist
+  NeighborListData nlist_data;
+  nlist_data.copy_from_nlist(lmp_list);
+  nlist_data.shuffle_exclude_empty(fwd_map);
+  InputNlist nlist;
+  nlist_data.make_inlist(nlist);
+  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
+                dcoord, datype, dbox, nghost_real, nlist);
+  // bkw map
+  dforce_.resize(odim * fwd_map.size() * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
+                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
+  }
+  datom_virial_.resize(odim * fwd_map.size() * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
+                          datom_virial.begin() + kk * bkw_map.size() * 9,
+                          bkw_map, 9);
+  }
+}
+
+template void DeepTensorTF::compute<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &lmp_list);
+
+template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
+                                           std::vector<float> &dforce_,
+                                           std::vector<float> &dvirial_,
+                                           std::vector<float> &datom_tensor_,
+                                           std::vector<float> &datom_virial_,
+                                           const std::vector<float> &dcoord_,
+                                           const std::vector<int> &datype_,
+                                           const std::vector<float> &dbox,
+                                           const int nghost,
+                                           const InputNlist &lmp_list);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute_inner(std::vector<VALUETYPE> &dtensor_,
+                                 const std::vector<VALUETYPE> &dcoord_,
+                                 const std::vector<int> &datype_,
+                                 const std::vector<VALUETYPE> &dbox) {
+  int nall = dcoord_.size() / 3;
+  int nloc = nall;
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
+  std::vector<int> sel_fwd, sel_bkw;
+  int nghost_sel;
+  // this gives the raw selection map, will pass to run model
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, 0, sel_type);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd);
+  }
+}
+
+template void DeepTensorTF::compute_inner<double>(
+    std::vector<double> &dtensor_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensorTF::compute_inner<float>(
+    std::vector<float> &dtensor_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute_inner(std::vector<VALUETYPE> &dtensor_,
+                                 const std::vector<VALUETYPE> &dcoord_,
+                                 const std::vector<int> &datype_,
+                                 const std::vector<VALUETYPE> &dbox,
+                                 const int nghost,
+                                 const InputNlist &nlist_) {
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
+  std::vector<int> sel_fwd, sel_bkw;
+  int nghost_sel;
+  // this gives the raw selection map, will pass to run model
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
+                 sel_type);
+  sel_fwd.resize(nloc);
+
+  NeighborListData nlist_data;
+  nlist_data.copy_from_nlist(nlist_);
+  nlist_data.shuffle(atommap);
+  InputNlist nlist;
+  nlist_data.make_inlist(nlist);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd,
+                      nghost);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd,
+                     nghost);
+  }
+}
+
+template void DeepTensorTF::compute_inner<double>(
+    std::vector<double> &dtensor_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+template void DeepTensorTF::compute_inner<float>(
+    std::vector<float> &dtensor_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
+                                 std::vector<VALUETYPE> &dforce_,
+                                 std::vector<VALUETYPE> &dvirial_,
+                                 std::vector<VALUETYPE> &datom_tensor_,
+                                 std::vector<VALUETYPE> &datom_virial_,
+                                 const std::vector<VALUETYPE> &dcoord_,
+                                 const std::vector<int> &datype_,
+                                 const std::vector<VALUETYPE> &dbox) {
+  int nall = dcoord_.size() / 3;
+  int nloc = nall;
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
+  std::vector<int> sel_fwd, sel_bkw;
+  int nghost_sel;
+  // this gives the raw selection map, will pass to run model
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, 0, sel_type);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                      datom_virial_, session, input_tensors, atommap, sel_fwd);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                     datom_virial_, session, input_tensors, atommap, sel_fwd);
+  }
+}
+
+template void DeepTensorTF::compute_inner<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensorTF::compute_inner<float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox);
+
+template <typename VALUETYPE>
+void DeepTensorTF::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
+                                 std::vector<VALUETYPE> &dforce_,
+                                 std::vector<VALUETYPE> &dvirial_,
+                                 std::vector<VALUETYPE> &datom_tensor_,
+                                 std::vector<VALUETYPE> &datom_virial_,
+                                 const std::vector<VALUETYPE> &dcoord_,
+                                 const std::vector<int> &datype_,
+                                 const std::vector<VALUETYPE> &dbox,
+                                 const int nghost,
+                                 const InputNlist &nlist_) {
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
+  std::vector<int> sel_fwd, sel_bkw;
+  int nghost_sel;
+  // this gives the raw selection map, will pass to run model
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
+                 sel_type);
+  sel_fwd.resize(nloc);
+
+  NeighborListData nlist_data;
+  nlist_data.copy_from_nlist(nlist_);
+  nlist_data.shuffle(atommap);
+  InputNlist nlist;
+  nlist_data.make_inlist(nlist);
+
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                      datom_virial_, session, input_tensors, atommap, sel_fwd,
+                      nghost);
+  } else {
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                     datom_virial_, session, input_tensors, atommap, sel_fwd,
+                     nghost);
+  }
+}
+
+template void DeepTensorTF::compute_inner<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+template void DeepTensorTF::compute_inner<float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+void DeepTensorTF::get_type_map(std::string &type_map) {
+  type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
+}
+
+void DeepTensorTF::computew(std::vector<double> &value,
+                            const std::vector<double> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<double> &box) {
+  compute(value, coord, atype, box);
+}
+void DeepTensorTF::computew(std::vector<float> &value,
+                            const std::vector<float> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<float> &box) {
+  compute(value, coord, atype, box);
+}
+
+void DeepTensorTF::computew(std::vector<double> &value,
+                            const std::vector<double> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<double> &box,
+                            const int nghost,
+                            const InputNlist &inlist) {
+  compute(value, coord, atype, box, nghost, inlist);
+}
+void DeepTensorTF::computew(std::vector<float> &value,
+                            const std::vector<float> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<float> &box,
+                            const int nghost,
+                            const InputNlist &inlist) {
+  compute(value, coord, atype, box, nghost, inlist);
+}
+
+void DeepTensorTF::computew(std::vector<double> &global_tensor,
+                            std::vector<double> &force,
+                            std::vector<double> &virial,
+                            std::vector<double> &atom_tensor,
+                            std::vector<double> &atom_virial,
+                            const std::vector<double> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<double> &box) {
+  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
+          box);
+}
+void DeepTensorTF::computew(std::vector<float> &global_tensor,
+                            std::vector<float> &force,
+                            std::vector<float> &virial,
+                            std::vector<float> &atom_tensor,
+                            std::vector<float> &atom_virial,
+                            const std::vector<float> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<float> &box) {
+  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
+          box);
+}
+
+void DeepTensorTF::computew(std::vector<double> &global_tensor,
+                            std::vector<double> &force,
+                            std::vector<double> &virial,
+                            std::vector<double> &atom_tensor,
+                            std::vector<double> &atom_virial,
+                            const std::vector<double> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<double> &box,
+                            const int nghost,
+                            const InputNlist &inlist) {
+  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
+          box, nghost, inlist);
+}
+void DeepTensorTF::computew(std::vector<float> &global_tensor,
+                            std::vector<float> &force,
+                            std::vector<float> &virial,
+                            std::vector<float> &atom_tensor,
+                            std::vector<float> &atom_virial,
+                            const std::vector<float> &coord,
+                            const std::vector<int> &atype,
+                            const std::vector<float> &box,
+                            const int nghost,
+                            const InputNlist &inlist) {
+  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
+          box, nghost, inlist);
+}

From 1efc7f85a51f9b99c996966db911ca30a5b0bf59 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 17 Jan 2024 00:02:58 -0500
Subject: [PATCH 88/97] cc: refactor DataModifier for multiple-backend
 framework (#3148)

See #3119

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 source/api_cc/include/DataModifier.h   | 124 ++++++---
 source/api_cc/include/DataModifierTF.h | 132 +++++++++
 source/api_cc/src/DataModifier.cc      | 311 ++-------------------
 source/api_cc/src/DataModifierTF.cc    | 363 +++++++++++++++++++++++++
 4 files changed, 611 insertions(+), 319 deletions(-)
 create mode 100644 source/api_cc/include/DataModifierTF.h
 create mode 100644 source/api_cc/src/DataModifierTF.cc

diff --git a/source/api_cc/include/DataModifier.h b/source/api_cc/include/DataModifier.h
index 502d7fcf4e..1e611a3930 100644
--- a/source/api_cc/include/DataModifier.h
+++ b/source/api_cc/include/DataModifier.h
@@ -1,9 +1,92 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #pragma once
 
-#include "DeepPot.h"
+#include <memory>
+
+#include "common.h"
 
 namespace deepmd {
+/**
+ * @brief Dipole charge modifier. (Base class)
+ **/
+class DipoleChargeModifierBase {
+ public:
+  /**
+   * @brief Dipole charge modifier without initialization.
+   **/
+  DipoleChargeModifierBase(){};
+  /**
+   * @brief Dipole charge modifier without initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  DipoleChargeModifierBase(const std::string& model,
+                           const int& gpu_rank = 0,
+                           const std::string& name_scope = "");
+  virtual ~DipoleChargeModifierBase(){};
+  /**
+   * @brief Initialize the dipole charge modifier.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  virtual void init(const std::string& model,
+                    const int& gpu_rank = 0,
+                    const std::string& name_scope = "") = 0;
+  /**
+   * @brief Evaluate the force and virial correction by using this dipole charge
+   *modifier.
+   * @param[out] dfcorr_ The force correction on each atom.
+   * @param[out] dvcorr_ The virial correction.
+   * @param[in] dcoord_ The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   * @param[in] dbox The cell of the region. The array should be of size 9.
+   * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
+   *of ints.
+   * @param[in] delef_ The electric field on each atom. The array should be of
+   *size natoms x 3.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The neighbor list.
+   @{
+   **/
+  virtual void computew(std::vector<double>& dfcorr_,
+                        std::vector<double>& dvcorr_,
+                        const std::vector<double>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<double>& dbox,
+                        const std::vector<std::pair<int, int>>& pairs,
+                        const std::vector<double>& delef_,
+                        const int nghost,
+                        const InputNlist& lmp_list) = 0;
+  virtual void computew(std::vector<float>& dfcorr_,
+                        std::vector<float>& dvcorr_,
+                        const std::vector<float>& dcoord_,
+                        const std::vector<int>& datype_,
+                        const std::vector<float>& dbox,
+                        const std::vector<std::pair<int, int>>& pairs,
+                        const std::vector<float>& delef_,
+                        const int nghost,
+                        const InputNlist& lmp_list) = 0;
+  /** @} */
+  /**
+   * @brief Get cutoff radius.
+   * @return double cutoff radius.
+   */
+  virtual double cutoff() const = 0;
+  /**
+   * @brief Get the number of atom types.
+   * @return int number of atom types.
+   */
+  virtual int numb_types() const = 0;
+  /**
+   * @brief Get the list of sel types.
+   * @return The list of sel types.
+   */
+  virtual std::vector<int> sel_types() const = 0;
+};
+
 /**
  * @brief Dipole charge modifier.
  **/
@@ -38,7 +121,6 @@ class DipoleChargeModifier {
    **/
   void print_summary(const std::string& pre) const;
 
- public:
   /**
    * @brief Evaluate the force and virial correction by using this dipole charge
    *modifier.
@@ -69,50 +151,20 @@ class DipoleChargeModifier {
    * @brief Get cutoff radius.
    * @return double cutoff radius.
    */
-  double cutoff() const {
-    assert(inited);
-    return rcut;
-  };
+  double cutoff() const;
   /**
    * @brief Get the number of atom types.
    * @return int number of atom types.
    */
-  int numb_types() const {
-    assert(inited);
-    return ntypes;
-  };
+  int numb_types() const;
   /**
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  std::vector<int> sel_types() const {
-    assert(inited);
-    return sel_type;
-  };
+  std::vector<int> sel_types() const;
 
  private:
-  tensorflow::Session* session;
-  std::string name_scope, name_prefix;
-  int num_intra_nthreads, num_inter_nthreads;
-  tensorflow::GraphDef* graph_def;
   bool inited;
-  double rcut;
-  int dtype;
-  double cell_size;
-  int ntypes;
-  std::string model_type;
-  std::vector<int> sel_type;
-  template <class VT>
-  VT get_scalar(const std::string& name) const;
-  template <class VT>
-  void get_vector(std::vector<VT>& vec, const std::string& name) const;
-  template <typename MODELTYPE, typename VALUETYPE>
-  void run_model(std::vector<VALUETYPE>& dforce,
-                 std::vector<VALUETYPE>& dvirial,
-                 tensorflow::Session* session,
-                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
-                     input_tensors,
-                 const AtomMap& atommap,
-                 const int nghost);
+  std::shared_ptr<deepmd::DipoleChargeModifierBase> dcm;
 };
 }  // namespace deepmd
diff --git a/source/api_cc/include/DataModifierTF.h b/source/api_cc/include/DataModifierTF.h
new file mode 100644
index 0000000000..5c44322c0c
--- /dev/null
+++ b/source/api_cc/include/DataModifierTF.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#pragma once
+
+#include "DataModifier.h"
+#include "common.h"
+
+namespace deepmd {
+/**
+ * @brief Dipole charge modifier.
+ **/
+class DipoleChargeModifierTF : public DipoleChargeModifierBase {
+ public:
+  /**
+   * @brief Dipole charge modifier without initialization.
+   **/
+  DipoleChargeModifierTF();
+  /**
+   * @brief Dipole charge modifier without initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  DipoleChargeModifierTF(const std::string& model,
+                         const int& gpu_rank = 0,
+                         const std::string& name_scope = "");
+  ~DipoleChargeModifierTF();
+  /**
+   * @brief Initialize the dipole charge modifier.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& name_scope = "");
+
+ public:
+  /**
+   * @brief Evaluate the force and virial correction by using this dipole charge
+   *modifier.
+   * @param[out] dfcorr_ The force correction on each atom.
+   * @param[out] dvcorr_ The virial correction.
+   * @param[in] dcoord_ The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   * @param[in] dbox The cell of the region. The array should be of size 9.
+   * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
+   *of ints.
+   * @param[in] delef_ The electric field on each atom. The array should be of
+   *size natoms x 3.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& dfcorr_,
+               std::vector<VALUETYPE>& dvcorr_,
+               const std::vector<VALUETYPE>& dcoord_,
+               const std::vector<int>& datype_,
+               const std::vector<VALUETYPE>& dbox,
+               const std::vector<std::pair<int, int>>& pairs,
+               const std::vector<VALUETYPE>& delef_,
+               const int nghost,
+               const InputNlist& lmp_list);
+  /**
+   * @brief Get cutoff radius.
+   * @return double cutoff radius.
+   */
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
+  /**
+   * @brief Get the number of atom types.
+   * @return int number of atom types.
+   */
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
+  /**
+   * @brief Get the list of sel types.
+   * @return The list of sel types.
+   */
+  std::vector<int> sel_types() const {
+    assert(inited);
+    return sel_type;
+  };
+  void computew(std::vector<double>& dfcorr_,
+                std::vector<double>& dvcorr_,
+                const std::vector<double>& dcoord_,
+                const std::vector<int>& datype_,
+                const std::vector<double>& dbox,
+                const std::vector<std::pair<int, int>>& pairs,
+                const std::vector<double>& delef_,
+                const int nghost,
+                const InputNlist& lmp_list);
+  void computew(std::vector<float>& dfcorr_,
+                std::vector<float>& dvcorr_,
+                const std::vector<float>& dcoord_,
+                const std::vector<int>& datype_,
+                const std::vector<float>& dbox,
+                const std::vector<std::pair<int, int>>& pairs,
+                const std::vector<float>& delef_,
+                const int nghost,
+                const InputNlist& lmp_list);
+
+ private:
+  tensorflow::Session* session;
+  std::string name_scope, name_prefix;
+  int num_intra_nthreads, num_inter_nthreads;
+  tensorflow::GraphDef* graph_def;
+  bool inited;
+  double rcut;
+  int dtype;
+  double cell_size;
+  int ntypes;
+  std::string model_type;
+  std::vector<int> sel_type;
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
+  template <class VT>
+  void get_vector(std::vector<VT>& vec, const std::string& name) const;
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& dforce,
+                 std::vector<VALUETYPE>& dvirial,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const int nghost);
+};
+}  // namespace deepmd
diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index c44bbceaa2..954c969c13 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -1,26 +1,21 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DataModifier.h"
 
+#include "DataModifierTF.h"
+#include "common.h"
+
 using namespace deepmd;
-using namespace tensorflow;
 
-DipoleChargeModifier::DipoleChargeModifier()
-    : inited(false), graph_def(new GraphDef()) {}
+DipoleChargeModifier::DipoleChargeModifier() : inited(false) {}
 
 DipoleChargeModifier::DipoleChargeModifier(const std::string& model,
                                            const int& gpu_rank,
                                            const std::string& name_scope_)
-    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
-  try {
-    init(model, gpu_rank, name_scope_);
-  } catch (...) {
-    // Clean up and rethrow, as the destructor will not be called
-    delete graph_def;
-    throw;
-  }
+    : inited(false) {
+  init(model, gpu_rank, name_scope_);
 }
 
-DipoleChargeModifier::~DipoleChargeModifier() { delete graph_def; };
+DipoleChargeModifier::~DipoleChargeModifier(){};
 
 void DipoleChargeModifier::init(const std::string& model,
                                 const int& gpu_rank,
@@ -31,140 +26,22 @@ void DipoleChargeModifier::init(const std::string& model,
               << std::endl;
     return;
   }
-  name_scope = name_scope_;
-  SessionOptions options;
-  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
-  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
-  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
-  deepmd::load_op_library();
-  int gpu_num = -1;
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  DPGetDeviceCount(gpu_num);  // check current device environment
-  if (gpu_num > 0) {
-    options.config.set_allow_soft_placement(true);
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
-        0.9);
-    options.config.mutable_gpu_options()->set_allow_growth(true);
-    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
-    std::string str = "/gpu:";
-    str += std::to_string(gpu_rank % gpu_num);
-    graph::SetDefaultDevice(str, graph_def);
-  }
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  deepmd::check_status(NewSession(options, &session));
-  deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
-  deepmd::check_status(session->Create(*graph_def));
-  // int nnodes = graph_def.node_size();
-  // for (int ii = 0; ii < nnodes; ++ii){
-  //   cout << ii << " \t " << graph_def.node(ii).name() << endl;
-  // }
-  dtype = session_get_dtype(session, "descrpt_attr/rcut");
-  if (dtype == tensorflow::DT_DOUBLE) {
-    rcut = get_scalar<double>("descrpt_attr/rcut");
+  // TODO: To implement detect_backend
+  DPBackend backend = deepmd::DPBackend::TensorFlow;
+  if (deepmd::DPBackend::TensorFlow == backend) {
+    // TODO: throw errors if TF backend is not built, without mentioning TF
+    dcm = std::make_shared<deepmd::DipoleChargeModifierTF>(model, gpu_rank,
+                                                           name_scope_);
+  } else if (deepmd::DPBackend::PyTorch == backend) {
+    throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
+  } else if (deepmd::DPBackend::Paddle == backend) {
+    throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet");
   } else {
-    rcut = get_scalar<float>("descrpt_attr/rcut");
+    throw deepmd::deepmd_exception("Unknown file type");
   }
-  cell_size = rcut;
-  ntypes = get_scalar<int>("descrpt_attr/ntypes");
-  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
-  get_vector<int>(sel_type, "model_attr/sel_type");
-  sort(sel_type.begin(), sel_type.end());
   inited = true;
 }
 
-template <class VT>
-VT DipoleChargeModifier::get_scalar(const std::string& name) const {
-  return session_get_scalar<VT>(session, name, name_scope);
-}
-
-template <class VT>
-void DipoleChargeModifier::get_vector(std::vector<VT>& vec,
-                                      const std::string& name) const {
-  session_get_vector<VT>(vec, session, name, name_scope);
-}
-
-template <typename MODELTYPE, typename VALUETYPE>
-void DipoleChargeModifier::run_model(
-    std::vector<VALUETYPE>& dforce,
-    std::vector<VALUETYPE>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nghost) {
-  unsigned nloc = atommap.get_type().size();
-  unsigned nall = nloc + nghost;
-  if (nloc == 0) {
-    dforce.clear();
-    dvirial.clear();
-    return;
-  }
-
-  std::vector<Tensor> output_tensors;
-  deepmd::check_status(session->Run(input_tensors,
-                                    {"o_dm_force", "o_dm_virial", "o_dm_av"},
-                                    {}, &output_tensors));
-  int cc = 0;
-  Tensor output_f = output_tensors[cc++];
-  Tensor output_v = output_tensors[cc++];
-  Tensor output_av = output_tensors[cc++];
-  assert(output_f.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_v.dims() == 2 && "dim of output tensor should be 2");
-  assert(output_av.dims() == 2 && "dim of output tensor should be 2");
-  int nframes = output_f.dim_size(0);
-  int natoms = output_f.dim_size(1) / 3;
-  assert(output_f.dim_size(0) == 1 && "nframes should match");
-  assert(natoms == nall && "natoms should be nall");
-  assert(output_v.dim_size(0) == nframes && "nframes should match");
-  assert(output_v.dim_size(1) == 9 && "dof of virial should be 9");
-  assert(output_av.dim_size(0) == nframes && "nframes should match");
-  assert(output_av.dim_size(1) == natoms * 9 &&
-         "dof of atom virial should be 9 * natoms");
-
-  auto of = output_f.flat<MODELTYPE>();
-  auto ov = output_v.flat<MODELTYPE>();
-
-  dforce.resize(static_cast<size_t>(nall) * 3);
-  dvirial.resize(9);
-  for (int ii = 0; ii < nall * 3; ++ii) {
-    dforce[ii] = of(ii);
-  }
-  for (int ii = 0; ii < 9; ++ii) {
-    dvirial[ii] = ov(ii);
-  }
-}
-
-template void DipoleChargeModifier::run_model<double, double>(
-    std::vector<double>& dforce,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nghost);
-
-template void DipoleChargeModifier::run_model<float, double>(
-    std::vector<double>& dforce,
-    std::vector<double>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nghost);
-
-template void DipoleChargeModifier::run_model<double, float>(
-    std::vector<float>& dforce,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nghost);
-
-template void DipoleChargeModifier::run_model<float, float>(
-    std::vector<float>& dforce,
-    std::vector<float>& dvirial,
-    Session* session,
-    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
-    const AtomMap& atommap,
-    const int nghost);
-
 template <typename VALUETYPE>
 void DipoleChargeModifier::compute(
     std::vector<VALUETYPE>& dfcorr_,
@@ -176,148 +53,8 @@ void DipoleChargeModifier::compute(
     const std::vector<VALUETYPE>& delef_,
     const int nghost,
     const InputNlist& lmp_list) {
-  // firstly do selection
-  int nall = datype_.size();
-  int nloc = nall - nghost;
-  int nghost_real;
-  std::vector<int> real_fwd_map, real_bkw_map;
-  select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_,
-                    nghost, ntypes);
-  int nall_real = real_bkw_map.size();
-  int nloc_real = nall_real - nghost_real;
-  if (nloc_real == 0) {
-    dfcorr_.resize(static_cast<size_t>(nall) * 3);
-    dvcorr_.resize(9);
-    fill(dfcorr_.begin(), dfcorr_.end(), (VALUETYPE)0.0);
-    fill(dvcorr_.begin(), dvcorr_.end(), (VALUETYPE)0.0);
-    return;
-  }
-  // resize to nall_real
-  std::vector<VALUETYPE> dcoord_real;
-  std::vector<VALUETYPE> delef_real;
-  std::vector<int> datype_real;
-  dcoord_real.resize(static_cast<size_t>(nall_real) * 3);
-  delef_real.resize(static_cast<size_t>(nall_real) * 3);
-  datype_real.resize(nall_real);
-  // fwd map
-  select_map<VALUETYPE>(dcoord_real, dcoord_, real_fwd_map, 3);
-  select_map<VALUETYPE>(delef_real, delef_, real_fwd_map, 3);
-  select_map<int>(datype_real, datype_, real_fwd_map, 1);
-  // internal nlist
-  NeighborListData nlist_data;
-  nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(real_fwd_map);
-  // sort atoms
-  AtomMap atommap(datype_real.begin(), datype_real.begin() + nloc_real);
-  assert(nloc_real == atommap.get_type().size());
-  const std::vector<int>& sort_fwd_map(atommap.get_fwd_map());
-  const std::vector<int>& sort_bkw_map(atommap.get_bkw_map());
-  // shuffle nlist
-  nlist_data.shuffle(atommap);
-  InputNlist nlist;
-  nlist_data.make_inlist(nlist);
-  // make input tensors
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-  int ret;
-  if (dtype == tensorflow::DT_DOUBLE) {
-    ret = session_input_tensors<double>(
-        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        nghost_real, 0, name_scope);
-  } else {
-    ret = session_input_tensors<float>(
-        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
-        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
-        nghost_real, 0, name_scope);
-  }
-  assert(nloc_real == ret);
-  // make bond idx map
-  std::vector<int> bd_idx(nall, -1);
-  for (int ii = 0; ii < pairs.size(); ++ii) {
-    bd_idx[pairs[ii].first] = pairs[ii].second;
-  }
-  // make extf by bond idx map
-  std::vector<int> dtype_sort_loc = atommap.get_type();
-  std::vector<VALUETYPE> dextf;
-  for (int ii = 0; ii < dtype_sort_loc.size(); ++ii) {
-    if (binary_search(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii])) {
-      // selected atom
-      int first_idx = real_bkw_map[sort_bkw_map[ii]];
-      int second_idx = bd_idx[first_idx];
-      assert(second_idx >= 0);
-      dextf.push_back(delef_[second_idx * 3 + 0]);
-      dextf.push_back(delef_[second_idx * 3 + 1]);
-      dextf.push_back(delef_[second_idx * 3 + 2]);
-    }
-  }
-  // dextf should be loc and virtual
-  assert(dextf.size() == (nloc - nloc_real) * 3);
-  // make tensor for extf
-  int nframes = 1;
-  TensorShape extf_shape;
-  extf_shape.AddDim(nframes);
-  extf_shape.AddDim(dextf.size());
-  Tensor extf_tensor((tensorflow::DataType)dtype, extf_shape);
-  if (dtype == tensorflow::DT_DOUBLE) {
-    auto extf = extf_tensor.matrix<double>();
-    for (int ii = 0; ii < nframes; ++ii) {
-      for (int jj = 0; jj < extf.size(); ++jj) {
-        extf(ii, jj) = dextf[jj];
-      }
-    }
-  } else {
-    auto extf = extf_tensor.matrix<float>();
-    for (int ii = 0; ii < nframes; ++ii) {
-      for (int jj = 0; jj < extf.size(); ++jj) {
-        extf(ii, jj) = dextf[jj];
-      }
-    }
-  }
-  // append extf to input tensor
-  input_tensors.push_back({"t_ef", extf_tensor});
-  // run model
-  std::vector<VALUETYPE> dfcorr, dvcorr;
-  if (dtype == tensorflow::DT_DOUBLE) {
-    run_model<double>(dfcorr, dvcorr, session, input_tensors, atommap,
-                      nghost_real);
-  } else {
-    run_model<float>(dfcorr, dvcorr, session, input_tensors, atommap,
-                     nghost_real);
-  }
-  assert(dfcorr.size() == nall_real * 3);
-  // back map force
-  std::vector<VALUETYPE> dfcorr_1 = dfcorr;
-  atommap.backward<VALUETYPE>(dfcorr_1.begin(), dfcorr.begin(), 3);
-  assert(dfcorr_1.size() == nall_real * 3);
-  // resize to all and clear
-  std::vector<VALUETYPE> dfcorr_2(nall * 3);
-  fill(dfcorr_2.begin(), dfcorr_2.end(), (VALUETYPE)0.0);
-  // back map to original position
-  for (int ii = 0; ii < nall_real; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      dfcorr_2[real_bkw_map[ii] * 3 + dd] += dfcorr_1[ii * 3 + dd];
-    }
-  }
-  // self correction of bonded force
-  for (int ii = 0; ii < pairs.size(); ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      dfcorr_2[pairs[ii].first * 3 + dd] += delef_[pairs[ii].second * 3 + dd];
-    }
-  }
-  // add ele contrinution
-  dfcorr_ = dfcorr_2;
-  // for (int ii = 0; ii < nloc; ++ii){
-  //   for (int dd = 0; dd < 3; ++dd){
-  //     dfcorr_[ii*3+dd] += delef_[ii*3+dd];
-  //   }
-  // }
-  for (int ii = 0; ii < nloc_real; ++ii) {
-    int oii = real_bkw_map[ii];
-    for (int dd = 0; dd < 3; ++dd) {
-      dfcorr_[oii * 3 + dd] += delef_[oii * 3 + dd];
-    }
-  }
-  dvcorr_ = dvcorr;
+  dcm->computew(dfcorr_, dvcorr_, dcoord_, datype_, dbox, pairs, delef_, nghost,
+                lmp_list);
 }
 
 template void DipoleChargeModifier::compute<double>(
@@ -345,3 +82,11 @@ template void DipoleChargeModifier::compute<float>(
 void DipoleChargeModifier::print_summary(const std::string& pre) const {
   deepmd::print_summary(pre);
 }
+
+double DipoleChargeModifier::cutoff() const { return dcm->cutoff(); }
+
+int DipoleChargeModifier::numb_types() const { return dcm->numb_types(); }
+
+std::vector<int> DipoleChargeModifier::sel_types() const {
+  return dcm->sel_types();
+}
diff --git a/source/api_cc/src/DataModifierTF.cc b/source/api_cc/src/DataModifierTF.cc
new file mode 100644
index 0000000000..219139cf89
--- /dev/null
+++ b/source/api_cc/src/DataModifierTF.cc
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include "DataModifierTF.h"
+
+#include "common.h"
+
+using namespace deepmd;
+using namespace tensorflow;
+
+DipoleChargeModifierTF::DipoleChargeModifierTF()
+    : inited(false), graph_def(new GraphDef()) {}
+
+DipoleChargeModifierTF::DipoleChargeModifierTF(const std::string& model,
+                                               const int& gpu_rank,
+                                               const std::string& name_scope_)
+    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
+  try {
+    init(model, gpu_rank, name_scope_);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    delete graph_def;
+    throw;
+  }
+}
+
+DipoleChargeModifierTF::~DipoleChargeModifierTF() { delete graph_def; };
+
+void DipoleChargeModifierTF::init(const std::string& model,
+                                  const int& gpu_rank,
+                                  const std::string& name_scope_) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
+  }
+  name_scope = name_scope_;
+  SessionOptions options;
+  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
+  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
+  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
+  deepmd::load_op_library();
+  int gpu_num = -1;
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  DPGetDeviceCount(gpu_num);  // check current device environment
+  if (gpu_num > 0) {
+    options.config.set_allow_soft_placement(true);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
+        0.9);
+    options.config.mutable_gpu_options()->set_allow_growth(true);
+    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
+    std::string str = "/gpu:";
+    str += std::to_string(gpu_rank % gpu_num);
+    graph::SetDefaultDevice(str, graph_def);
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  deepmd::check_status(NewSession(options, &session));
+  deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
+  deepmd::check_status(session->Create(*graph_def));
+  dtype = session_get_dtype(session, "descrpt_attr/rcut");
+  if (dtype == tensorflow::DT_DOUBLE) {
+    rcut = get_scalar<double>("descrpt_attr/rcut");
+  } else {
+    rcut = get_scalar<float>("descrpt_attr/rcut");
+  }
+  cell_size = rcut;
+  ntypes = get_scalar<int>("descrpt_attr/ntypes");
+  model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
+  get_vector<int>(sel_type, "model_attr/sel_type");
+  sort(sel_type.begin(), sel_type.end());
+  inited = true;
+}
+
+template <class VT>
+VT DipoleChargeModifierTF::get_scalar(const std::string& name) const {
+  return session_get_scalar<VT>(session, name, name_scope);
+}
+
+template <class VT>
+void DipoleChargeModifierTF::get_vector(std::vector<VT>& vec,
+                                        const std::string& name) const {
+  session_get_vector<VT>(vec, session, name, name_scope);
+}
+
+template <typename MODELTYPE, typename VALUETYPE>
+void DipoleChargeModifierTF::run_model(
+    std::vector<VALUETYPE>& dforce,
+    std::vector<VALUETYPE>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  if (nloc == 0) {
+    dforce.clear();
+    dvirial.clear();
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  deepmd::check_status(session->Run(input_tensors,
+                                    {"o_dm_force", "o_dm_virial", "o_dm_av"},
+                                    {}, &output_tensors));
+  int cc = 0;
+  Tensor output_f = output_tensors[cc++];
+  Tensor output_v = output_tensors[cc++];
+  Tensor output_av = output_tensors[cc++];
+  assert(output_f.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_v.dims() == 2 && "dim of output tensor should be 2");
+  assert(output_av.dims() == 2 && "dim of output tensor should be 2");
+  int nframes = output_f.dim_size(0);
+  int natoms = output_f.dim_size(1) / 3;
+  assert(output_f.dim_size(0) == 1 && "nframes should match");
+  assert(natoms == nall && "natoms should be nall");
+  assert(output_v.dim_size(0) == nframes && "nframes should match");
+  assert(output_v.dim_size(1) == 9 && "dof of virial should be 9");
+  assert(output_av.dim_size(0) == nframes && "nframes should match");
+  assert(output_av.dim_size(1) == natoms * 9 &&
+         "dof of atom virial should be 9 * natoms");
+
+  auto of = output_f.flat<MODELTYPE>();
+  auto ov = output_v.flat<MODELTYPE>();
+
+  dforce.resize(nall * 3);
+  dvirial.resize(9);
+  for (int ii = 0; ii < nall * 3; ++ii) {
+    dforce[ii] = of(ii);
+  }
+  for (int ii = 0; ii < 9; ++ii) {
+    dvirial[ii] = ov(ii);
+  }
+}
+
+template void DipoleChargeModifierTF::run_model<double, double>(
+    std::vector<double>& dforce,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void DipoleChargeModifierTF::run_model<float, double>(
+    std::vector<double>& dforce,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void DipoleChargeModifierTF::run_model<double, float>(
+    std::vector<float>& dforce,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void DipoleChargeModifierTF::run_model<float, float>(
+    std::vector<float>& dforce,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template <typename VALUETYPE>
+void DipoleChargeModifierTF::compute(
+    std::vector<VALUETYPE>& dfcorr_,
+    std::vector<VALUETYPE>& dvcorr_,
+    const std::vector<VALUETYPE>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<VALUETYPE>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list) {
+  // firstly do selection
+  int nall = datype_.size();
+  int nloc = nall - nghost;
+  int nghost_real;
+  std::vector<int> real_fwd_map, real_bkw_map;
+  select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_,
+                    nghost, ntypes);
+  int nall_real = real_bkw_map.size();
+  int nloc_real = nall_real - nghost_real;
+  if (nloc_real == 0) {
+    dfcorr_.resize(nall * 3);
+    dvcorr_.resize(9);
+    fill(dfcorr_.begin(), dfcorr_.end(), (VALUETYPE)0.0);
+    fill(dvcorr_.begin(), dvcorr_.end(), (VALUETYPE)0.0);
+    return;
+  }
+  // resize to nall_real
+  std::vector<VALUETYPE> dcoord_real;
+  std::vector<VALUETYPE> delef_real;
+  std::vector<int> datype_real;
+  dcoord_real.resize(nall_real * 3);
+  delef_real.resize(nall_real * 3);
+  datype_real.resize(nall_real);
+  // fwd map
+  select_map<VALUETYPE>(dcoord_real, dcoord_, real_fwd_map, 3);
+  select_map<VALUETYPE>(delef_real, delef_, real_fwd_map, 3);
+  select_map<int>(datype_real, datype_, real_fwd_map, 1);
+  // internal nlist
+  NeighborListData nlist_data;
+  nlist_data.copy_from_nlist(lmp_list);
+  nlist_data.shuffle_exclude_empty(real_fwd_map);
+  // sort atoms
+  AtomMap atommap(datype_real.begin(), datype_real.begin() + nloc_real);
+  assert(nloc_real == atommap.get_type().size());
+  const std::vector<int>& sort_fwd_map(atommap.get_fwd_map());
+  const std::vector<int>& sort_bkw_map(atommap.get_bkw_map());
+  // shuffle nlist
+  nlist_data.shuffle(atommap);
+  InputNlist nlist;
+  nlist_data.make_inlist(nlist);
+  // make input tensors
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
+  int ret;
+  if (dtype == tensorflow::DT_DOUBLE) {
+    ret = session_input_tensors<double>(
+        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        nghost_real, 0, name_scope);
+  } else {
+    ret = session_input_tensors<float>(
+        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        nghost_real, 0, name_scope);
+  }
+  assert(nloc_real == ret);
+  // make bond idx map
+  std::vector<int> bd_idx(nall, -1);
+  for (int ii = 0; ii < pairs.size(); ++ii) {
+    bd_idx[pairs[ii].first] = pairs[ii].second;
+  }
+  // make extf by bond idx map
+  std::vector<int> dtype_sort_loc = atommap.get_type();
+  std::vector<VALUETYPE> dextf;
+  for (int ii = 0; ii < dtype_sort_loc.size(); ++ii) {
+    if (binary_search(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii])) {
+      // selected atom
+      int first_idx = real_bkw_map[sort_bkw_map[ii]];
+      int second_idx = bd_idx[first_idx];
+      assert(second_idx >= 0);
+      dextf.push_back(delef_[second_idx * 3 + 0]);
+      dextf.push_back(delef_[second_idx * 3 + 1]);
+      dextf.push_back(delef_[second_idx * 3 + 2]);
+    }
+  }
+  // dextf should be loc and virtual
+  assert(dextf.size() == (nloc - nloc_real) * 3);
+  // make tensor for extf
+  int nframes = 1;
+  TensorShape extf_shape;
+  extf_shape.AddDim(nframes);
+  extf_shape.AddDim(dextf.size());
+  Tensor extf_tensor((tensorflow::DataType)dtype, extf_shape);
+  if (dtype == tensorflow::DT_DOUBLE) {
+    auto extf = extf_tensor.matrix<double>();
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < extf.size(); ++jj) {
+        extf(ii, jj) = dextf[jj];
+      }
+    }
+  } else {
+    auto extf = extf_tensor.matrix<float>();
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < extf.size(); ++jj) {
+        extf(ii, jj) = dextf[jj];
+      }
+    }
+  }
+  // append extf to input tensor
+  input_tensors.push_back({"t_ef", extf_tensor});
+  // run model
+  std::vector<VALUETYPE> dfcorr, dvcorr;
+  if (dtype == tensorflow::DT_DOUBLE) {
+    run_model<double>(dfcorr, dvcorr, session, input_tensors, atommap,
+                      nghost_real);
+  } else {
+    run_model<float>(dfcorr, dvcorr, session, input_tensors, atommap,
+                     nghost_real);
+  }
+  assert(dfcorr.size() == nall_real * 3);
+  // back map force
+  std::vector<VALUETYPE> dfcorr_1 = dfcorr;
+  atommap.backward<VALUETYPE>(dfcorr_1.begin(), dfcorr.begin(), 3);
+  assert(dfcorr_1.size() == nall_real * 3);
+  // resize to all and clear
+  std::vector<VALUETYPE> dfcorr_2(nall * 3);
+  fill(dfcorr_2.begin(), dfcorr_2.end(), (VALUETYPE)0.0);
+  // back map to original position
+  for (int ii = 0; ii < nall_real; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_2[real_bkw_map[ii] * 3 + dd] += dfcorr_1[ii * 3 + dd];
+    }
+  }
+  // self correction of bonded force
+  for (int ii = 0; ii < pairs.size(); ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_2[pairs[ii].first * 3 + dd] += delef_[pairs[ii].second * 3 + dd];
+    }
+  }
+  // add ele contrinution
+  dfcorr_ = dfcorr_2;
+  for (int ii = 0; ii < nloc_real; ++ii) {
+    int oii = real_bkw_map[ii];
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_[oii * 3 + dd] += delef_[oii * 3 + dd];
+    }
+  }
+  dvcorr_ = dvcorr;
+}
+
+template void DipoleChargeModifierTF::compute<double>(
+    std::vector<double>& dfcorr_,
+    std::vector<double>& dvcorr_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<double>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list);
+
+template void DipoleChargeModifierTF::compute<float>(
+    std::vector<float>& dfcorr_,
+    std::vector<float>& dvcorr_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<float>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list);
+
+void DipoleChargeModifierTF::computew(
+    std::vector<double>& dfcorr_,
+    std::vector<double>& dvcorr_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<double>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list) {
+  compute(dfcorr_, dvcorr_, dcoord_, datype_, dbox, pairs, delef_, nghost,
+          lmp_list);
+}
+void DipoleChargeModifierTF::computew(
+    std::vector<float>& dfcorr_,
+    std::vector<float>& dvcorr_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<float>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list) {
+  compute(dfcorr_, dvcorr_, dcoord_, datype_, dbox, pairs, delef_, nghost,
+          lmp_list);
+}

From 549392119cd1704cfdb02c26b8cb8b9afb695935 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Thu, 18 Jan 2024 09:26:10 +0800
Subject: [PATCH 89/97] fix: some issue of the output def  (#3152)

- strict type hint
- allow the last dim to be variable (by setting the dim to -1)
- remove variable def, which is not very useful.
- _derv_c should be defined for each atom

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd_utils/model_format/__init__.py   |   6 +-
 deepmd_utils/model_format/output_def.py | 115 ++++++++++++------------
 source/tests/test_output_def.py         |  71 +++++++++++++--
 3 files changed, 128 insertions(+), 64 deletions(-)

diff --git a/deepmd_utils/model_format/__init__.py b/deepmd_utils/model_format/__init__.py
index 72dd7b59ee..253bca3507 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd_utils/model_format/__init__.py
@@ -24,8 +24,9 @@
     FittingOutputDef,
     ModelOutputDef,
     OutputVariableDef,
-    VariableDef,
     fitting_check_output,
+    get_deriv_name,
+    get_reduce_name,
     model_check_output,
 )
 from .se_e2_a import (
@@ -52,7 +53,8 @@
     "ModelOutputDef",
     "FittingOutputDef",
     "OutputVariableDef",
-    "VariableDef",
     "model_check_output",
     "fitting_check_output",
+    "get_reduce_name",
+    "get_deriv_name",
 ]
diff --git a/deepmd_utils/model_format/output_def.py b/deepmd_utils/model_format/output_def.py
index 7feb24a145..268dc21ea6 100644
--- a/deepmd_utils/model_format/output_def.py
+++ b/deepmd_utils/model_format/output_def.py
@@ -3,23 +3,34 @@
     Dict,
     List,
     Tuple,
-    Union,
 )
 
 
+def check_shape(
+    shape: List[int],
+    def_shape: List[int],
+):
+    """Check if the shape satisfies the defined shape."""
+    assert len(shape) == len(def_shape)
+    if def_shape[-1] == -1:
+        if list(shape[:-1]) != def_shape[:-1]:
+            raise ValueError(f"{shape[:-1]} shape not matching def {def_shape[:-1]}")
+    else:
+        if list(shape) != def_shape:
+            raise ValueError(f"{shape} shape not matching def {def_shape}")
+
+
 def check_var(var, var_def):
     if var_def.atomic:
         # var.shape == [nf, nloc, *var_def.shape]
         if len(var.shape) != len(var_def.shape) + 2:
             raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}")
-        if list(var.shape[2:]) != var_def.shape:
-            raise ValueError(f"{var.shape[2:]} not matching def {var_def.shape}")
+        check_shape(list(var.shape[2:]), var_def.shape)
     else:
         # var.shape == [nf, *var_def.shape]
         if len(var.shape) != len(var_def.shape) + 1:
             raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}")
-        if list(var.shape[1:]) != var_def.shape:
-            raise ValueError(f"{var.shape[1:]} not matching def {var_def.shape}")
+        check_shape(list(var.shape[1:]), var_def.shape)
 
 
 def model_check_output(cls):
@@ -38,7 +49,7 @@ def __init__(
             **kwargs,
         ):
             super().__init__(*args, **kwargs)
-            self.md = cls.output_def(self)
+            self.md = self.output_def()
 
         def __call__(
             self,
@@ -77,7 +88,7 @@ def __init__(
             **kwargs,
         ):
             super().__init__(*args, **kwargs)
-            self.md = cls.output_def(self)
+            self.md = self.output_def()
 
         def __call__(
             self,
@@ -93,35 +104,7 @@ def __call__(
     return wrapper
 
 
-class VariableDef:
-    """Defines the shape and other properties of a variable.
-
-    Parameters
-    ----------
-    name
-          Name of the output variable. Notice that the xxxx_redu,
-          xxxx_derv_c, xxxx_derv_r are reserved names that should
-          not be used to define variables.
-    shape
-          The shape of the variable. e.g. energy should be [1],
-          dipole should be [3], polarizabilty should be [3,3].
-    atomic
-          If the variable is defined for each atom.
-
-    """
-
-    def __init__(
-        self,
-        name: str,
-        shape: Union[List[int], Tuple[int]],
-        atomic: bool = True,
-    ):
-        self.name = name
-        self.shape = list(shape)
-        self.atomic = atomic
-
-
-class OutputVariableDef(VariableDef):
+class OutputVariableDef:
     """Defines the shape and other properties of the one output variable.
 
     It is assume that the fitting network output variables for each
@@ -149,12 +132,14 @@ class OutputVariableDef(VariableDef):
     def __init__(
         self,
         name: str,
-        shape: Union[List[int], Tuple[int]],
+        shape: List[int],
         reduciable: bool = False,
         differentiable: bool = False,
+        atomic: bool = True,
     ):
-        # fitting output must be atomic
-        super().__init__(name, shape, atomic=True)
+        self.name = name
+        self.shape = list(shape)
+        self.atomic = atomic
         self.reduciable = reduciable
         self.differentiable = differentiable
         if not self.reduciable and self.differentiable:
@@ -176,13 +161,13 @@ class FittingOutputDef:
 
     def __init__(
         self,
-        var_defs: List[OutputVariableDef] = [],
+        var_defs: List[OutputVariableDef],
     ):
         self.var_defs = {vv.name: vv for vv in var_defs}
 
     def __getitem__(
         self,
-        key,
+        key: str,
     ) -> OutputVariableDef:
         return self.var_defs[key]
 
@@ -215,7 +200,7 @@ def __init__(
         self.def_outp = fit_defs
         self.def_redu = do_reduce(self.def_outp)
         self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp)
-        self.var_defs = {}
+        self.var_defs: Dict[str, OutputVariableDef] = {}
         for ii in [
             self.def_outp.get_data(),
             self.def_redu,
@@ -224,10 +209,16 @@ def __init__(
         ]:
             self.var_defs.update(ii)
 
-    def __getitem__(self, key) -> VariableDef:
+    def __getitem__(
+        self,
+        key: str,
+    ) -> OutputVariableDef:
         return self.var_defs[key]
 
-    def get_data(self, key) -> Dict[str, VariableDef]:
+    def get_data(
+        self,
+        key: str,
+    ) -> Dict[str, OutputVariableDef]:
         return self.var_defs
 
     def keys(self):
@@ -246,33 +237,45 @@ def keys_derv_c(self):
         return self.def_derv_c.keys()
 
 
-def get_reduce_name(name):
+def get_reduce_name(name: str) -> str:
     return name + "_redu"
 
 
-def get_deriv_name(name):
+def get_deriv_name(name: str) -> Tuple[str, str]:
     return name + "_derv_r", name + "_derv_c"
 
 
 def do_reduce(
-    def_outp,
-):
-    def_redu = {}
+    def_outp: FittingOutputDef,
+) -> Dict[str, OutputVariableDef]:
+    def_redu: Dict[str, OutputVariableDef] = {}
     for kk, vv in def_outp.get_data().items():
         if vv.reduciable:
             rk = get_reduce_name(kk)
-            def_redu[rk] = VariableDef(rk, vv.shape, atomic=False)
+            def_redu[rk] = OutputVariableDef(
+                rk, vv.shape, reduciable=False, differentiable=False, atomic=False
+            )
     return def_redu
 
 
 def do_derivative(
-    def_outp,
-):
-    def_derv_r = {}
-    def_derv_c = {}
+    def_outp: FittingOutputDef,
+) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]:
+    def_derv_r: Dict[str, OutputVariableDef] = {}
+    def_derv_c: Dict[str, OutputVariableDef] = {}
     for kk, vv in def_outp.get_data().items():
         if vv.differentiable:
             rkr, rkc = get_deriv_name(kk)
-            def_derv_r[rkr] = VariableDef(rkr, [*vv.shape, 3], atomic=True)
-            def_derv_c[rkc] = VariableDef(rkc, [*vv.shape, 3, 3], atomic=False)
+            def_derv_r[rkr] = OutputVariableDef(
+                rkr,
+                vv.shape + [3],  # noqa: RUF005
+                reduciable=False,
+                differentiable=False,
+            )
+            def_derv_c[rkc] = OutputVariableDef(
+                rkc,
+                vv.shape + [3, 3],  # noqa: RUF005
+                reduciable=True,
+                differentiable=False,
+            )
     return def_derv_r, def_derv_c
diff --git a/source/tests/test_output_def.py b/source/tests/test_output_def.py
index e0c56784da..82d1b13a80 100644
--- a/source/tests/test_output_def.py
+++ b/source/tests/test_output_def.py
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import unittest
+from typing import (
+    List,
+)
 
 import numpy as np
 
@@ -11,6 +14,21 @@
     fitting_check_output,
     model_check_output,
 )
+from deepmd_utils.model_format.output_def import (
+    check_var,
+)
+
+
+class VariableDef:
+    def __init__(
+        self,
+        name: str,
+        shape: List[int],
+        atomic: bool = True,
+    ):
+        self.name = name
+        self.shape = list(shape)
+        self.atomic = atomic
 
 
 class TestDef(unittest.TestCase):
@@ -81,7 +99,7 @@ def test_model_output_def(self):
         self.assertEqual(md["foo"].atomic, True)
         self.assertEqual(md["energy_redu"].atomic, False)
         self.assertEqual(md["energy_derv_r"].atomic, True)
-        self.assertEqual(md["energy_derv_c"].atomic, False)
+        self.assertEqual(md["energy_derv_c"].atomic, True)
 
     def test_raise_no_redu_deriv(self):
         with self.assertRaises(ValueError) as context:
@@ -90,6 +108,7 @@ def test_raise_no_redu_deriv(self):
     def test_model_decorator(self):
         nf = 2
         nloc = 3
+        nall = 4
 
         @model_check_output
         class Foo(NativeOP):
@@ -103,8 +122,8 @@ def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros([nf, 1]),
-                    "energy_derv_r": np.zeros([nf, nloc, 1, 3]),
-                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                    "energy_derv_r": np.zeros([nf, nall, 1, 3]),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
                 }
 
         ff = Foo()
@@ -113,6 +132,7 @@ def call(self):
     def test_model_decorator_keyerror(self):
         nf = 2
         nloc = 3
+        nall = 4
 
         @model_check_output
         class Foo(NativeOP):
@@ -129,7 +149,7 @@ def call(self):
                 return {
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros([nf, 1]),
-                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
                 }
 
         ff = Foo()
@@ -140,13 +160,14 @@ def call(self):
     def test_model_decorator_shapeerror(self):
         nf = 2
         nloc = 3
+        nall = 4
 
         @model_check_output
         class Foo(NativeOP):
             def __init__(
                 self,
                 shape_rd=[nf, 1],
-                shape_dr=[nf, nloc, 1, 3],
+                shape_dr=[nf, nall, 1, 3],
             ):
                 self.shape_rd, self.shape_dr = shape_rd, shape_dr
 
@@ -161,7 +182,7 @@ def call(self):
                     "energy": np.zeros([nf, nloc, 1]),
                     "energy_redu": np.zeros(self.shape_rd),
                     "energy_derv_r": np.zeros(self.shape_dr),
-                    "energy_derv_c": np.zeros([nf, 1, 3, 3]),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
                 }
 
         ff = Foo()
@@ -192,6 +213,7 @@ def call(self):
     def test_fitting_decorator(self):
         nf = 2
         nloc = 3
+        nall = 4
 
         @fitting_check_output
         class Foo(NativeOP):
@@ -243,3 +265,40 @@ def call(self):
             ff = Foo(shape=[nf, nloc, 2])
             ff()
             self.assertIn("not matching", context.exception)
+
+    def test_check_var(self):
+        var_def = VariableDef("foo", [2, 3], atomic=True)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 3, 2, 3]), var_def)
+
+        var_def = VariableDef("foo", [2, 3], atomic=False)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 2, 3]), var_def)
+
+        var_def = VariableDef("foo", [2, -1], atomic=True)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 3, 2, 8]), var_def)
+
+        var_def = VariableDef("foo", [2, -1], atomic=False)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 2, 8]), var_def)

From d915a912fe78e5ced652f55182fc2d496df8093c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 17 Jan 2024 20:31:04 -0500
Subject: [PATCH 90/97] cc: merge `DeepPotBase` and `DeepTensor` member
 functions (#3145)

Remove 18 member functions from `DeepPotBase` and 4 member functions
from `DeepTensor`.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/include/DataModifierTF.h |   4 +-
 source/api_cc/include/DeepPot.h        | 384 +++++-------------
 source/api_cc/include/DeepPotTF.h      | 268 +------------
 source/api_cc/include/DeepTensor.h     |  59 +--
 source/api_cc/include/DeepTensorTF.h   | 108 +----
 source/api_cc/src/DeepPot.cc           | 532 +++++++++++++++----------
 source/api_cc/src/DeepPotTF.cc         | 503 -----------------------
 source/api_cc/src/DeepTensor.cc        |  16 +-
 source/api_cc/src/DeepTensorTF.cc      | 150 +++----
 9 files changed, 503 insertions(+), 1521 deletions(-)

diff --git a/source/api_cc/include/DataModifierTF.h b/source/api_cc/include/DataModifierTF.h
index 5c44322c0c..2ca3729525 100644
--- a/source/api_cc/include/DataModifierTF.h
+++ b/source/api_cc/include/DataModifierTF.h
@@ -34,7 +34,7 @@ class DipoleChargeModifierTF : public DipoleChargeModifierBase {
             const int& gpu_rank = 0,
             const std::string& name_scope = "");
 
- public:
+ private:
   /**
    * @brief Evaluate the force and virial correction by using this dipole charge
    *modifier.
@@ -61,6 +61,8 @@ class DipoleChargeModifierTF : public DipoleChargeModifierBase {
                const std::vector<VALUETYPE>& delef_,
                const int nghost,
                const InputNlist& lmp_list);
+
+ public:
   /**
    * @brief Get cutoff radius.
    * @return double cutoff radius.
diff --git a/source/api_cc/include/DeepPot.h b/source/api_cc/include/DeepPot.h
index e8e64e31c0..a8aedde510 100644
--- a/source/api_cc/include/DeepPot.h
+++ b/source/api_cc/include/DeepPot.h
@@ -38,141 +38,11 @@ class DeepPotBase {
                     const int& gpu_rank = 0,
                     const std::string& file_content = "") = 0;
 
-  /**
-   * @brief Evaluate the energy, force and virial by using this DP.
-   * @param[out] ener The system energy.
-   * @param[out] force The force on each atom.
-   * @param[out] virial The virial.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *nframes x natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size nframes
-   *x 9.
-   * @param[in] fparam The frame parameter. The array can be of size :
-   * nframes x dim_fparam.
-   * dim_fparam. Then all frames are assumed to be provided with the same
-   *fparam.
-   * @param[in] aparam The atomic parameter The array can be of size :
-   * nframes x natoms x dim_aparam.
-   * natoms x dim_aparam. Then all frames are assumed to be provided with the
-   *same aparam.
-   * @{
-   **/
-  virtual void computew(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-
-  virtual void computew(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  virtual void computew(
-      std::vector<double>& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-  virtual void computew(
-      std::vector<double>& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  /** @} */
-  /**
-   * @brief Evaluate the energy, force and virial by using this DP.
-   * @param[out] ener The system energy.
-   * @param[out] force The force on each atom.
-   * @param[out] virial The virial.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *nframes x natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size nframes
-   *x 9.
-   * @param[in] nghost The number of ghost atoms.
-   * @param[in] inlist The input neighbour list.
-   * @param[in] ago Update the internal neighbour list if ago is 0.
-   * @param[in] fparam The frame parameter. The array can be of size :
-   * nframes x dim_fparam.
-   * dim_fparam. Then all frames are assumed to be provided with the same
-   *fparam.
-   * @param[in] aparam The atomic parameter The array can be of size :
-   * nframes x natoms x dim_aparam.
-   * natoms x dim_aparam. Then all frames are assumed to be provided with the
-   *same aparam.
-   * @{
-   **/
-  virtual void computew(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-
-  virtual void computew(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  virtual void computew(
-      std::vector<double>& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-  virtual void computew(
-      std::vector<double>& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  /** @} */
-
   /**
    * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
    *by using this DP.
+   * @note The double precision interface is used by i-PI, GROMACS, ABACUS, and
+   *CP2k.
    * @param[out] ener The system energy.
    * @param[out] force The force on each atom.
    * @param[out] virial The virial.
@@ -193,29 +63,6 @@ class DeepPotBase {
    *same aparam.
    * @{
    **/
-  virtual void computew(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      std::vector<double>& atom_energy,
-      std::vector<double>& atom_virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-
-  virtual void computew(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      std::vector<float>& atom_energy,
-      std::vector<float>& atom_virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
   virtual void computew(
       std::vector<double>& ener,
       std::vector<double>& force,
@@ -242,6 +89,7 @@ class DeepPotBase {
   /**
    * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
    *by using this DP.
+   * @note The double precision interface is used by LAMMPS and AMBER.
    * @param[out] ener The system energy.
    * @param[out] force The force on each atom.
    * @param[out] virial The virial.
@@ -265,35 +113,6 @@ class DeepPotBase {
    *same aparam.
    * @{
    **/
-  virtual void computew(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      std::vector<double>& atom_energy,
-      std::vector<double>& atom_virial,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-
-  virtual void computew(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      std::vector<float>& atom_energy,
-      std::vector<float>& atom_virial,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const int nghost,
-      const InputNlist& inlist,
-      const int& ago,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
   virtual void computew(
       std::vector<double>& ener,
       std::vector<double>& force,
@@ -323,73 +142,11 @@ class DeepPotBase {
       const std::vector<float>& fparam = std::vector<float>(),
       const std::vector<float>& aparam = std::vector<float>()) = 0;
   /** @} */
+
   /**
    * @brief Evaluate the energy, force, and virial with the mixed type
    *by using this DP.
-   * @param[out] ener The system energy.
-   * @param[out] force The force on each atom.
-   * @param[out] virial The virial.
-   * @param[in] nframes The number of frames.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *nframes x natoms x 3.
-   * @param[in] atype The atom types. The array should be of size nframes x
-   *natoms.
-   * @param[in] box The cell of the region. The array should be of size nframes
-   *x 9.
-   * @param[in] fparam The frame parameter. The array can be of size :
-   * nframes x dim_fparam.
-   * dim_fparam. Then all frames are assumed to be provided with the same
-   *fparam.
-   * @param[in] aparam The atomic parameter The array can be of size :
-   * nframes x natoms x dim_aparam.
-   * natoms x dim_aparam. Then all frames are assumed to be provided with the
-   *same aparam.
-   * @{
-   **/
-  virtual void computew_mixed_type(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-  virtual void computew_mixed_type(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  virtual void computew_mixed_type(
-      std::vector<double>& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-  virtual void computew_mixed_type(
-      std::vector<double>& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
-  /** @} */
-  /**
-   * @brief Evaluate the energy, force, and virial with the mixed type
-   *by using this DP.
+   * @note At this time, no external program uses this interface.
    * @param[out] ener The system energy.
    * @param[out] force The force on each atom.
    * @param[out] virial The virial.
@@ -412,30 +169,6 @@ class DeepPotBase {
    *same aparam.
    * @{
    **/
-  virtual void computew_mixed_type(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      std::vector<double>& atom_energy,
-      std::vector<double>& atom_virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>()) = 0;
-  virtual void computew_mixed_type(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      std::vector<float>& atom_energy,
-      std::vector<float>& atom_virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>()) = 0;
   virtual void computew_mixed_type(
       std::vector<double>& ener,
       std::vector<double>& force,
@@ -554,9 +287,19 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& ener,
                std::vector<VALUETYPE>& force,
                std::vector<VALUETYPE>& virial,
                const std::vector<VALUETYPE>& coord,
@@ -564,6 +307,7 @@ class DeepPot {
                const std::vector<VALUETYPE>& box,
                const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
                const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
   /**
    * @brief Evaluate the energy, force and virial by using this DP.
    * @param[out] ener The system energy.
@@ -585,9 +329,10 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
                std::vector<VALUETYPE>& force,
                std::vector<VALUETYPE>& virial,
                const std::vector<VALUETYPE>& coord,
@@ -598,6 +343,19 @@ class DeepPot {
                const int& ago,
                const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
                const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
   /**
    * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
    *by using this DP.
@@ -619,9 +377,10 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
                std::vector<VALUETYPE>& force,
                std::vector<VALUETYPE>& virial,
                std::vector<VALUETYPE>& atom_energy,
@@ -631,6 +390,19 @@ class DeepPot {
                const std::vector<VALUETYPE>& box,
                const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
                const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
+
   /**
    * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
    *by using this DP.
@@ -655,9 +427,24 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& ener,
                std::vector<VALUETYPE>& force,
                std::vector<VALUETYPE>& virial,
                std::vector<VALUETYPE>& atom_energy,
@@ -670,6 +457,7 @@ class DeepPot {
                const int& ago,
                const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
                const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
   /**
    * @brief Evaluate the energy, force, and virial with the mixed type
    *by using this DP.
@@ -691,10 +479,11 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
+  template <typename VALUETYPE>
   void compute_mixed_type(
-      ENERGYVTYPE& ener,
+      ENERGYTYPE& ener,
       std::vector<VALUETYPE>& force,
       std::vector<VALUETYPE>& virial,
       const int& nframes,
@@ -703,6 +492,18 @@ class DeepPot {
       const std::vector<VALUETYPE>& box,
       const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute_mixed_type(
+      std::vector<ENERGYTYPE>& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
   /**
    * @brief Evaluate the energy, force, and virial with the mixed type
    *by using this DP.
@@ -726,10 +527,11 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @{
    **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
+  template <typename VALUETYPE>
   void compute_mixed_type(
-      ENERGYVTYPE& ener,
+      ENERGYTYPE& ener,
       std::vector<VALUETYPE>& force,
       std::vector<VALUETYPE>& virial,
       std::vector<VALUETYPE>& atom_energy,
@@ -740,6 +542,20 @@ class DeepPot {
       const std::vector<VALUETYPE>& box,
       const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void compute_mixed_type(
+      std::vector<ENERGYTYPE>& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      std::vector<VALUETYPE>& atom_energy,
+      std::vector<VALUETYPE>& atom_virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /** @} */
   /**
    * @brief Get the cutoff radius.
    * @return The cutoff radius.
diff --git a/source/api_cc/include/DeepPotTF.h b/source/api_cc/include/DeepPotTF.h
index 59bd5f476f..0580c61da5 100644
--- a/source/api_cc/include/DeepPotTF.h
+++ b/source/api_cc/include/DeepPotTF.h
@@ -37,68 +37,7 @@ class DeepPotTF : public DeepPotBase {
             const int& gpu_rank = 0,
             const std::string& file_content = "");
 
-  /**
-   * @brief Evaluate the energy, force and virial by using this DP.
-   * @param[out] ener The system energy.
-   * @param[out] force The force on each atom.
-   * @param[out] virial The virial.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *nframes x natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size nframes
-   *x 9.
-   * @param[in] fparam The frame parameter. The array can be of size :
-   * nframes x dim_fparam.
-   * dim_fparam. Then all frames are assumed to be provided with the same
-   *fparam.
-   * @param[in] aparam The atomic parameter The array can be of size :
-   * nframes x natoms x dim_aparam.
-   * natoms x dim_aparam. Then all frames are assumed to be provided with the
-   *same aparam.
-   **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const std::vector<VALUETYPE>& box,
-               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
-               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
-  /**
-   * @brief Evaluate the energy, force and virial by using this DP.
-   * @param[out] ener The system energy.
-   * @param[out] force The force on each atom.
-   * @param[out] virial The virial.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *nframes x natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size nframes
-   *x 9.
-   * @param[in] nghost The number of ghost atoms.
-   * @param[in] inlist The input neighbour list.
-   * @param[in] ago Update the internal neighbour list if ago is 0.
-   * @param[in] fparam The frame parameter. The array can be of size :
-   * nframes x dim_fparam.
-   * dim_fparam. Then all frames are assumed to be provided with the same
-   *fparam.
-   * @param[in] aparam The atomic parameter The array can be of size :
-   * nframes x natoms x dim_aparam.
-   * natoms x dim_aparam. Then all frames are assumed to be provided with the
-   *same aparam.
-   **/
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute(ENERGYVTYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const std::vector<VALUETYPE>& box,
-               const int nghost,
-               const InputNlist& inlist,
-               const int& ago,
-               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
-               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+ private:
   /**
    * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
    *by using this DP.
@@ -241,6 +180,8 @@ class DeepPotTF : public DeepPotBase {
       const std::vector<VALUETYPE>& box,
       const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+
+ public:
   /**
    * @brief Get the cutoff radius.
    * @return The cutoff radius.
@@ -298,85 +239,7 @@ class DeepPotTF : public DeepPotBase {
   };
 
   // forward to template class
-  void computew(double& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-
-  void computew(double& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
-  void computew(std::vector<double>& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-  void computew(std::vector<double>& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
-  void computew(double& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-
-  void computew(double& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
   void computew(std::vector<double>& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-  void computew(std::vector<double>& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
-  void computew(double& ener,
                 std::vector<double>& force,
                 std::vector<double>& virial,
                 std::vector<double>& atom_energy,
@@ -386,52 +249,7 @@ class DeepPotTF : public DeepPotBase {
                 const std::vector<double>& box,
                 const std::vector<double>& fparam = std::vector<double>(),
                 const std::vector<double>& aparam = std::vector<double>());
-
-  void computew(double& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                std::vector<float>& atom_energy,
-                std::vector<float>& atom_virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
   void computew(std::vector<double>& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                std::vector<double>& atom_energy,
-                std::vector<double>& atom_virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-  void computew(std::vector<double>& ener,
-                std::vector<float>& force,
-                std::vector<float>& virial,
-                std::vector<float>& atom_energy,
-                std::vector<float>& atom_virial,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const std::vector<float>& fparam = std::vector<float>(),
-                const std::vector<float>& aparam = std::vector<float>());
-  void computew(double& ener,
-                std::vector<double>& force,
-                std::vector<double>& virial,
-                std::vector<double>& atom_energy,
-                std::vector<double>& atom_virial,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
-                const std::vector<double>& fparam = std::vector<double>(),
-                const std::vector<double>& aparam = std::vector<double>());
-
-  void computew(double& ener,
                 std::vector<float>& force,
                 std::vector<float>& virial,
                 std::vector<float>& atom_energy,
@@ -439,9 +257,6 @@ class DeepPotTF : public DeepPotBase {
                 const std::vector<float>& coord,
                 const std::vector<int>& atype,
                 const std::vector<float>& box,
-                const int nghost,
-                const InputNlist& inlist,
-                const int& ago,
                 const std::vector<float>& fparam = std::vector<float>(),
                 const std::vector<float>& aparam = std::vector<float>());
   void computew(std::vector<double>& ener,
@@ -470,70 +285,6 @@ class DeepPotTF : public DeepPotBase {
                 const int& ago,
                 const std::vector<float>& fparam = std::vector<float>(),
                 const std::vector<float>& aparam = std::vector<float>());
-  void computew_mixed_type(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>());
-  void computew_mixed_type(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>());
-  void computew_mixed_type(
-      std::vector<double>& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>());
-  void computew_mixed_type(
-      std::vector<double>& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>());
-  void computew_mixed_type(
-      double& ener,
-      std::vector<double>& force,
-      std::vector<double>& virial,
-      std::vector<double>& atom_energy,
-      std::vector<double>& atom_virial,
-      const int& nframes,
-      const std::vector<double>& coord,
-      const std::vector<int>& atype,
-      const std::vector<double>& box,
-      const std::vector<double>& fparam = std::vector<double>(),
-      const std::vector<double>& aparam = std::vector<double>());
-  void computew_mixed_type(
-      double& ener,
-      std::vector<float>& force,
-      std::vector<float>& virial,
-      std::vector<float>& atom_energy,
-      std::vector<float>& atom_virial,
-      const int& nframes,
-      const std::vector<float>& coord,
-      const std::vector<int>& atype,
-      const std::vector<float>& box,
-      const std::vector<float>& fparam = std::vector<float>(),
-      const std::vector<float>& aparam = std::vector<float>());
   void computew_mixed_type(
       std::vector<double>& ener,
       std::vector<double>& force,
@@ -604,19 +355,6 @@ class DeepPotTF : public DeepPotBase {
                           const int& nframes,
                           const int& dparam,
                           const std::vector<VALUETYPE>& param) const;
-  template <typename VALUETYPE, typename ENERGYVTYPE>
-  void compute_inner(
-      ENERGYVTYPE& ener,
-      std::vector<VALUETYPE>& force,
-      std::vector<VALUETYPE>& virial,
-      const std::vector<VALUETYPE>& coord,
-      const std::vector<int>& atype,
-      const std::vector<VALUETYPE>& box,
-      const int nghost,
-      const int& ago,
-      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
-      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
-
   // copy neighbor list info from host
   bool init_nbor;
   std::vector<int> sec_a;
diff --git a/source/api_cc/include/DeepTensor.h b/source/api_cc/include/DeepTensor.h
index 6150eca970..5592942d87 100644
--- a/source/api_cc/include/DeepTensor.h
+++ b/source/api_cc/include/DeepTensor.h
@@ -35,49 +35,6 @@ class DeepTensorBase {
   virtual void init(const std::string& model,
                     const int& gpu_rank = 0,
                     const std::string& name_scope = "") = 0;
-
-  /**
-   * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   * @{
-   **/
-  virtual void computew(std::vector<double>& value,
-                        const std::vector<double>& coord,
-                        const std::vector<int>& atype,
-                        const std::vector<double>& box) = 0;
-  virtual void computew(std::vector<float>& value,
-                        const std::vector<float>& coord,
-                        const std::vector<int>& atype,
-                        const std::vector<float>& box) = 0;
-  /** @} */
-  /**
-   * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   * @param[in] nghost The number of ghost atoms.
-   * @param[in] inlist The input neighbour list.
-   * @{
-   **/
-  virtual void computew(std::vector<double>& value,
-                        const std::vector<double>& coord,
-                        const std::vector<int>& atype,
-                        const std::vector<double>& box,
-                        const int nghost,
-                        const InputNlist& inlist) = 0;
-  virtual void computew(std::vector<float>& value,
-                        const std::vector<float>& coord,
-                        const std::vector<int>& atype,
-                        const std::vector<float>& box,
-                        const int nghost,
-                        const InputNlist& inlist) = 0;
-  /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
    * @param[out] global_tensor The global tensor to evalute.
@@ -93,6 +50,8 @@ class DeepTensorBase {
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
    * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] request_deriv Whether to request the derivative of the global
+   * tensor, including force and virial.
    * @{
    **/
   virtual void computew(std::vector<double>& global_tensor,
@@ -102,7 +61,8 @@ class DeepTensorBase {
                         std::vector<double>& atom_virial,
                         const std::vector<double>& coord,
                         const std::vector<int>& atype,
-                        const std::vector<double>& box) = 0;
+                        const std::vector<double>& box,
+                        const bool request_deriv) = 0;
   virtual void computew(std::vector<float>& global_tensor,
                         std::vector<float>& force,
                         std::vector<float>& virial,
@@ -110,7 +70,8 @@ class DeepTensorBase {
                         std::vector<float>& atom_virial,
                         const std::vector<float>& coord,
                         const std::vector<int>& atype,
-                        const std::vector<float>& box) = 0;
+                        const std::vector<float>& box,
+                        const bool request_deriv) = 0;
   /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
@@ -129,6 +90,8 @@ class DeepTensorBase {
    * @param[in] box The cell of the region. The array should be of size 9.
    * @param[in] nghost The number of ghost atoms.
    * @param[in] inlist The input neighbour list.
+   * @param[in] request_deriv Whether to request the derivative of the global
+   * tensor, including force and virial.
    * @{
    **/
   virtual void computew(std::vector<double>& global_tensor,
@@ -140,7 +103,8 @@ class DeepTensorBase {
                         const std::vector<int>& atype,
                         const std::vector<double>& box,
                         const int nghost,
-                        const InputNlist& inlist) = 0;
+                        const InputNlist& inlist,
+                        const bool request_deriv) = 0;
   virtual void computew(std::vector<float>& global_tensor,
                         std::vector<float>& force,
                         std::vector<float>& virial,
@@ -150,7 +114,8 @@ class DeepTensorBase {
                         const std::vector<int>& atype,
                         const std::vector<float>& box,
                         const int nghost,
-                        const InputNlist& inlist) = 0;
+                        const InputNlist& inlist,
+                        const bool request_deriv) = 0;
   /** @} */
   /**
    * @brief Get the cutoff radius.
diff --git a/source/api_cc/include/DeepTensorTF.h b/source/api_cc/include/DeepTensorTF.h
index 2ba7697076..3c724dce88 100644
--- a/source/api_cc/include/DeepTensorTF.h
+++ b/source/api_cc/include/DeepTensorTF.h
@@ -34,12 +34,8 @@ class DeepTensorTF : public DeepTensorBase {
   void init(const std::string& model,
             const int& gpu_rank = 0,
             const std::string& name_scope = "");
-  /**
-   * @brief Print the DP summary to the screen.
-   * @param[in] pre The prefix to each line.
-   **/
-  void print_summary(const std::string& pre) const;
 
+ private:
   /**
    * @brief Evaluate the value by using this model.
    * @param[out] value The value to evalute, usually would be the atomic tensor.
@@ -70,48 +66,6 @@ class DeepTensorTF : public DeepTensorBase {
                const std::vector<VALUETYPE>& box,
                const int nghost,
                const InputNlist& inlist);
-  /**
-   * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
-   * @param[out] force The component-wise force of the global tensor, size odim
-   *x natoms x 3.
-   * @param[out] virial The component-wise virial of the global tensor, size
-   *odim x 9.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   **/
-  template <typename VALUETYPE>
-  void compute(std::vector<VALUETYPE>& global_tensor,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const std::vector<VALUETYPE>& box);
-  /**
-   * @brief Evaluate the global tensor and component-wise force and virial.
-   * @param[out] global_tensor The global tensor to evalute.
-   * @param[out] force The component-wise force of the global tensor, size odim
-   *x natoms x 3.
-   * @param[out] virial The component-wise virial of the global tensor, size
-   *odim x 9.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   * @param[in] nghost The number of ghost atoms.
-   * @param[in] inlist The input neighbour list.
-   **/
-  template <typename VALUETYPE>
-  void compute(std::vector<VALUETYPE>& global_tensor,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const std::vector<VALUETYPE>& box,
-               const int nghost,
-               const InputNlist& inlist);
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
    * @param[out] global_tensor The global tensor to evalute.
@@ -166,6 +120,8 @@ class DeepTensorTF : public DeepTensorBase {
                const std::vector<VALUETYPE>& box,
                const int nghost,
                const InputNlist& inlist);
+
+ public:
   /**
    * @brief Get the cutoff radius.
    * @return The cutoff radius.
@@ -204,48 +160,6 @@ class DeepTensorTF : public DeepTensorBase {
    **/
   void get_type_map(std::string& type_map);
 
-  /**
-   * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   * @{
-   **/
-  void computew(std::vector<double>& value,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box);
-  void computew(std::vector<float>& value,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box);
-  /** @} */
-  /**
-   * @brief Evaluate the value by using this model.
-   * @param[out] value The value to evalute, usually would be the atomic tensor.
-   * @param[in] coord The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] atype The atom types. The list should contain natoms ints.
-   * @param[in] box The cell of the region. The array should be of size 9.
-   * @param[in] nghost The number of ghost atoms.
-   * @param[in] inlist The input neighbour list.
-   * @{
-   **/
-  void computew(std::vector<double>& value,
-                const std::vector<double>& coord,
-                const std::vector<int>& atype,
-                const std::vector<double>& box,
-                const int nghost,
-                const InputNlist& inlist);
-  void computew(std::vector<float>& value,
-                const std::vector<float>& coord,
-                const std::vector<int>& atype,
-                const std::vector<float>& box,
-                const int nghost,
-                const InputNlist& inlist);
-  /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
    * @param[out] global_tensor The global tensor to evalute.
@@ -261,6 +175,8 @@ class DeepTensorTF : public DeepTensorBase {
    *natoms x 3.
    * @param[in] atype The atom types. The list should contain natoms ints.
    * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] request_deriv Whether to request the derivative of the global
+   * tensor, including force and virial.
    * @{
    **/
   void computew(std::vector<double>& global_tensor,
@@ -270,7 +186,8 @@ class DeepTensorTF : public DeepTensorBase {
                 std::vector<double>& atom_virial,
                 const std::vector<double>& coord,
                 const std::vector<int>& atype,
-                const std::vector<double>& box);
+                const std::vector<double>& box,
+                const bool request_deriv);
   void computew(std::vector<float>& global_tensor,
                 std::vector<float>& force,
                 std::vector<float>& virial,
@@ -278,7 +195,8 @@ class DeepTensorTF : public DeepTensorBase {
                 std::vector<float>& atom_virial,
                 const std::vector<float>& coord,
                 const std::vector<int>& atype,
-                const std::vector<float>& box);
+                const std::vector<float>& box,
+                const bool request_deriv);
   /** @} */
   /**
    * @brief Evaluate the global tensor and component-wise force and virial.
@@ -297,6 +215,8 @@ class DeepTensorTF : public DeepTensorBase {
    * @param[in] box The cell of the region. The array should be of size 9.
    * @param[in] nghost The number of ghost atoms.
    * @param[in] inlist The input neighbour list.
+   * @param[in] request_deriv Whether to request the derivative of the global
+   * tensor, including force and virial.
    * @{
    **/
   void computew(std::vector<double>& global_tensor,
@@ -308,7 +228,8 @@ class DeepTensorTF : public DeepTensorBase {
                 const std::vector<int>& atype,
                 const std::vector<double>& box,
                 const int nghost,
-                const InputNlist& inlist);
+                const InputNlist& inlist,
+                const bool request_deriv);
   void computew(std::vector<float>& global_tensor,
                 std::vector<float>& force,
                 std::vector<float>& virial,
@@ -318,7 +239,8 @@ class DeepTensorTF : public DeepTensorBase {
                 const std::vector<int>& atype,
                 const std::vector<float>& box,
                 const int nghost,
-                const InputNlist& inlist);
+                const InputNlist& inlist,
+                const bool request_deriv);
   /** @} */
 
  private:
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index d290565c2b..feb0f283b1 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -51,8 +51,8 @@ void DeepPot::print_summary(const std::string& pre) const {
   deepmd::print_summary(pre);
 }
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute(ENERGYTYPE& dener,
                       std::vector<VALUETYPE>& dforce_,
                       std::vector<VALUETYPE>& dvirial,
                       const std::vector<VALUETYPE>& dcoord_,
@@ -60,52 +60,65 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& dbox,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam_) {
-  dp->computew(dener, dforce_, dvirial, dcoord_, datype_, dbox, fparam_,
-               aparam_);
+  std::vector<ENERGYTYPE> dener_;
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew(dener_, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, fparam_, aparam_);
+  dener = dener_[0];
 }
 
-template void DeepPot::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPot::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template void DeepPot::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
+template <typename VALUETYPE>
+void DeepPot::compute(std::vector<ENERGYTYPE>& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const std::vector<VALUETYPE>& fparam_,
+                      const std::vector<VALUETYPE>& aparam_) {
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew(dener, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, fparam_, aparam_);
+}
 
-template void DeepPot::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
+
+template void DeepPot::compute<double>(std::vector<ENERGYTYPE>& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(std::vector<ENERGYTYPE>& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute(ENERGYTYPE& dener,
                       std::vector<VALUETYPE>& dforce_,
                       std::vector<VALUETYPE>& dvirial,
                       const std::vector<VALUETYPE>& dcoord_,
@@ -116,64 +129,96 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const int& ago,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam__) {
-  dp->computew(dener, dforce_, dvirial, dcoord_, datype_, dbox, nghost,
-               lmp_list, ago, fparam_, aparam__);
+  std::vector<ENERGYTYPE> dener_;
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew(dener_, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, nghost, lmp_list, ago, fparam_, aparam__);
+  dener = dener_[0];
 }
 
-template void DeepPot::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
-
-template void DeepPot::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
+template <typename VALUETYPE>
+void DeepPot::compute(std::vector<ENERGYTYPE>& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const int nghost,
+                      const InputNlist& lmp_list,
+                      const int& ago,
+                      const std::vector<VALUETYPE>& fparam_,
+                      const std::vector<VALUETYPE>& aparam__) {
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew(dener, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, nghost, lmp_list, ago, fparam_, aparam__);
+}
 
-template void DeepPot::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
+
+template void DeepPot::compute<double>(std::vector<ENERGYTYPE>& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(std::vector<ENERGYTYPE>& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
 
-template void DeepPot::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
-
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      std::vector<VALUETYPE>& datom_energy_,
+                      std::vector<VALUETYPE>& datom_virial_,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const std::vector<VALUETYPE>& fparam_,
+                      const std::vector<VALUETYPE>& aparam_) {
+  std::vector<ENERGYTYPE> dener_;
+  dp->computew(dener_, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, fparam_, aparam_);
+  dener = dener_[0];
+}
+template <typename VALUETYPE>
+void DeepPot::compute(std::vector<ENERGYTYPE>& dener,
                       std::vector<VALUETYPE>& dforce_,
                       std::vector<VALUETYPE>& dvirial,
                       std::vector<VALUETYPE>& datom_energy_,
@@ -187,56 +232,71 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                datype_, dbox, fparam_, aparam_);
 }
 
-template void DeepPot::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPot::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template void DeepPot::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPot::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
+
+template void DeepPot::compute<double>(std::vector<ENERGYTYPE>& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(std::vector<ENERGYTYPE>& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      std::vector<VALUETYPE>& datom_energy_,
+                      std::vector<VALUETYPE>& datom_virial_,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const int nghost,
+                      const InputNlist& lmp_list,
+                      const int& ago,
+                      const std::vector<VALUETYPE>& fparam_,
+                      const std::vector<VALUETYPE>& aparam__) {
+  std::vector<ENERGYTYPE> dener_;
+  dp->computew(dener_, dforce_, dvirial, datom_energy_, datom_virial_, dcoord_,
+               datype_, dbox, nghost, lmp_list, ago, fparam_, aparam__);
+  dener = dener_[0];
+}
+template <typename VALUETYPE>
+void DeepPot::compute(std::vector<ENERGYTYPE>& dener,
                       std::vector<VALUETYPE>& dforce_,
                       std::vector<VALUETYPE>& dvirial,
                       std::vector<VALUETYPE>& datom_energy_,
@@ -253,69 +313,65 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                datype_, dbox, nghost, lmp_list, ago, fparam_, aparam__);
 }
 
-template void DeepPot::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
-
-template void DeepPot::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
-
-template void DeepPot::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    std::vector<double>& datom_energy_,
-    std::vector<double>& datom_virial_,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
-
-template void DeepPot::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    std::vector<float>& datom_energy_,
-    std::vector<float>& datom_virial_,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
+
+template void DeepPot::compute<double>(std::vector<ENERGYTYPE>& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(std::vector<ENERGYTYPE>& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
 
 // mixed type
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute_mixed_type(ENERGYTYPE& dener,
                                  std::vector<VALUETYPE>& dforce_,
                                  std::vector<VALUETYPE>& dvirial,
                                  const int& nframes,
@@ -324,11 +380,29 @@ void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
                                  const std::vector<VALUETYPE>& dbox,
                                  const std::vector<VALUETYPE>& fparam_,
                                  const std::vector<VALUETYPE>& aparam_) {
-  dp->computew_mixed_type(dener, dforce_, dvirial, nframes, dcoord_, datype_,
-                          dbox, fparam_, aparam_);
+  std::vector<ENERGYTYPE> dener_;
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew_mixed_type(dener_, dforce_, dvirial, datom_energy_,
+                          datom_virial_, nframes, dcoord_, datype_, dbox,
+                          fparam_, aparam_);
+  dener = dener_[0];
+}
+template <typename VALUETYPE>
+void DeepPot::compute_mixed_type(std::vector<ENERGYTYPE>& dener,
+                                 std::vector<VALUETYPE>& dforce_,
+                                 std::vector<VALUETYPE>& dvirial,
+                                 const int& nframes,
+                                 const std::vector<VALUETYPE>& dcoord_,
+                                 const std::vector<int>& datype_,
+                                 const std::vector<VALUETYPE>& dbox,
+                                 const std::vector<VALUETYPE>& fparam_,
+                                 const std::vector<VALUETYPE>& aparam_) {
+  std::vector<VALUETYPE> datom_energy_, datom_virial_;
+  dp->computew_mixed_type(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                          nframes, dcoord_, datype_, dbox, fparam_, aparam_);
 }
 
-template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
+template void DeepPot::compute_mixed_type<double>(
     ENERGYTYPE& dener,
     std::vector<double>& dforce_,
     std::vector<double>& dvirial,
@@ -339,7 +413,7 @@ template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
     const std::vector<double>& fparam,
     const std::vector<double>& aparam);
 
-template void DeepPot::compute_mixed_type<float, ENERGYTYPE>(
+template void DeepPot::compute_mixed_type<float>(
     ENERGYTYPE& dener,
     std::vector<float>& dforce_,
     std::vector<float>& dvirial,
@@ -350,7 +424,7 @@ template void DeepPot::compute_mixed_type<float, ENERGYTYPE>(
     const std::vector<float>& fparam,
     const std::vector<float>& aparam);
 
-template void DeepPot::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
+template void DeepPot::compute_mixed_type<double>(
     std::vector<ENERGYTYPE>& dener,
     std::vector<double>& dforce_,
     std::vector<double>& dvirial,
@@ -361,7 +435,7 @@ template void DeepPot::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
     const std::vector<double>& fparam,
     const std::vector<double>& aparam);
 
-template void DeepPot::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
+template void DeepPot::compute_mixed_type<float>(
     std::vector<ENERGYTYPE>& dener,
     std::vector<float>& dforce_,
     std::vector<float>& dvirial,
@@ -372,8 +446,26 @@ template void DeepPot::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
     const std::vector<float>& fparam,
     const std::vector<float>& aparam);
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
+template <typename VALUETYPE>
+void DeepPot::compute_mixed_type(ENERGYTYPE& dener,
+                                 std::vector<VALUETYPE>& dforce_,
+                                 std::vector<VALUETYPE>& dvirial,
+                                 std::vector<VALUETYPE>& datom_energy_,
+                                 std::vector<VALUETYPE>& datom_virial_,
+                                 const int& nframes,
+                                 const std::vector<VALUETYPE>& dcoord_,
+                                 const std::vector<int>& datype_,
+                                 const std::vector<VALUETYPE>& dbox,
+                                 const std::vector<VALUETYPE>& fparam_,
+                                 const std::vector<VALUETYPE>& aparam_) {
+  std::vector<ENERGYTYPE> dener_;
+  dp->computew_mixed_type(dener_, dforce_, dvirial, datom_energy_,
+                          datom_virial_, nframes, dcoord_, datype_, dbox,
+                          fparam_, aparam_);
+  dener = dener_[0];
+}
+template <typename VALUETYPE>
+void DeepPot::compute_mixed_type(std::vector<ENERGYTYPE>& dener,
                                  std::vector<VALUETYPE>& dforce_,
                                  std::vector<VALUETYPE>& dvirial,
                                  std::vector<VALUETYPE>& datom_energy_,
@@ -388,7 +480,7 @@ void DeepPot::compute_mixed_type(ENERGYVTYPE& dener,
                           nframes, dcoord_, datype_, dbox, fparam_, aparam_);
 }
 
-template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
+template void DeepPot::compute_mixed_type<double>(
     ENERGYTYPE& dener,
     std::vector<double>& dforce_,
     std::vector<double>& dvirial,
@@ -401,7 +493,7 @@ template void DeepPot::compute_mixed_type<double, ENERGYTYPE>(
     const std::vector<double>& fparam,
     const std::vector<double>& aparam);
 
-template void DeepPot::compute_mixed_type<float, ENERGYTYPE>(
+template void DeepPot::compute_mixed_type<float>(
     ENERGYTYPE& dener,
     std::vector<float>& dforce_,
     std::vector<float>& dvirial,
@@ -414,7 +506,7 @@ template void DeepPot::compute_mixed_type<float, ENERGYTYPE>(
     const std::vector<float>& fparam,
     const std::vector<float>& aparam);
 
-template void DeepPot::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
+template void DeepPot::compute_mixed_type<double>(
     std::vector<ENERGYTYPE>& dener,
     std::vector<double>& dforce_,
     std::vector<double>& dvirial,
@@ -427,7 +519,7 @@ template void DeepPot::compute_mixed_type<double, std::vector<ENERGYTYPE>>(
     const std::vector<double>& fparam,
     const std::vector<double>& aparam);
 
-template void DeepPot::compute_mixed_type<float, std::vector<ENERGYTYPE>>(
+template void DeepPot::compute_mixed_type<float>(
     std::vector<ENERGYTYPE>& dener,
     std::vector<float>& dforce_,
     std::vector<float>& dvirial,
diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc
index ed3bede4df..ef348fe14c 100644
--- a/source/api_cc/src/DeepPotTF.cc
+++ b/source/api_cc/src/DeepPotTF.cc
@@ -569,275 +569,6 @@ template void DeepPotTF::tile_fparam_aparam<float>(
 
 // ENERGYVTYPE: std::vector<ENERGYTYPE> or ENERGYTYPE
 
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPotTF::compute(ENERGYVTYPE& dener,
-                        std::vector<VALUETYPE>& dforce_,
-                        std::vector<VALUETYPE>& dvirial,
-                        const std::vector<VALUETYPE>& dcoord_,
-                        const std::vector<int>& datype_,
-                        const std::vector<VALUETYPE>& dbox,
-                        const std::vector<VALUETYPE>& fparam_,
-                        const std::vector<VALUETYPE>& aparam_) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  int nloc = nall;
-  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-  assert(nloc == atommap.get_type().size());
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam;
-  validate_fparam_aparam(nframes, (aparam_nall ? nall : nloc), fparam_,
-                         aparam_);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam, nframes, (aparam_nall ? nall : nloc) * daparam,
-                     aparam_);
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
-                                            datype_, dbox, cell_size, fparam,
-                                            aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                      nframes);
-  } else {
-    int ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
-                                           datype_, dbox, cell_size, fparam,
-                                           aparam, atommap, "", aparam_nall);
-    assert(ret == nloc);
-    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                     nframes);
-  }
-}
-
-template void DeepPotTF::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPotTF::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPotTF::compute(ENERGYVTYPE& dener,
-                        std::vector<VALUETYPE>& dforce_,
-                        std::vector<VALUETYPE>& dvirial,
-                        const std::vector<VALUETYPE>& dcoord_,
-                        const std::vector<int>& datype_,
-                        const std::vector<VALUETYPE>& dbox,
-                        const int nghost,
-                        const InputNlist& lmp_list,
-                        const int& ago,
-                        const std::vector<VALUETYPE>& fparam_,
-                        const std::vector<VALUETYPE>& aparam__) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  std::vector<VALUETYPE> fparam;
-  std::vector<VALUETYPE> aparam_;
-  validate_fparam_aparam(nframes, (aparam_nall ? nall : (nall - nghost)),
-                         fparam_, aparam__);
-  tile_fparam_aparam(fparam, nframes, dfparam, fparam_);
-  tile_fparam_aparam(aparam_, nframes,
-                     (aparam_nall ? nall : (nall - nghost)) * daparam,
-                     aparam__);
-
-  // select real atoms
-  std::vector<VALUETYPE> dcoord, dforce, aparam;
-  std::vector<int> datype, fwd_map, bkw_map;
-  int nghost_real, nall_real, nloc_real;
-  select_real_atoms_coord(dcoord, datype, aparam, nghost_real, fwd_map, bkw_map,
-                          nall_real, nloc_real, dcoord_, datype_, aparam_,
-                          nghost, ntypes, nframes, daparam, nall, aparam_nall);
-
-  // internal nlist
-  if (ago == 0) {
-    nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);
-  }
-  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, ago,
-                fparam, aparam);
-  // bkw map
-  dforce_.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
-  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3, nframes, fwd_map.size(),
-                        bkw_map.size());
-}
-
-template void DeepPotTF::compute<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
-
-template void DeepPotTF::compute<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
-
-template void DeepPotTF::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam_);
-
-template void DeepPotTF::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const InputNlist& lmp_list,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam_);
-
-template <typename VALUETYPE, typename ENERGYVTYPE>
-void DeepPotTF::compute_inner(ENERGYVTYPE& dener,
-                              std::vector<VALUETYPE>& dforce_,
-                              std::vector<VALUETYPE>& dvirial,
-                              const std::vector<VALUETYPE>& dcoord_,
-                              const std::vector<int>& datype_,
-                              const std::vector<VALUETYPE>& dbox,
-                              const int nghost,
-                              const int& ago,
-                              const std::vector<VALUETYPE>& fparam,
-                              const std::vector<VALUETYPE>& aparam) {
-  int nall = datype_.size();
-  // if nall==0, unclear nframes, but 1 is ok
-  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
-  int nloc = nall - nghost;
-
-  std::vector<std::pair<std::string, Tensor>> input_tensors;
-
-  // agp == 0 means that the LAMMPS nbor list has been updated
-  if (ago == 0) {
-    atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
-    assert(nloc == atommap.get_type().size());
-    nlist_data.shuffle(atommap);
-    nlist_data.make_inlist(nlist);
-  }
-  if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
-        atommap, nghost, ago, "", aparam_nall);
-    assert(nloc == ret);
-    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                      nframes, nghost);
-  } else {
-    int ret = session_input_tensors<float>(
-        input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam,
-        atommap, nghost, ago, "", aparam_nall);
-    assert(nloc == ret);
-    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
-                     nframes, nghost);
-  }
-}
-
-template void DeepPotTF::compute_inner<double, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPotTF::compute_inner<float, ENERGYTYPE>(
-    ENERGYTYPE& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
-template void DeepPotTF::compute_inner<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<double>& dforce_,
-    std::vector<double>& dvirial,
-    const std::vector<double>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<double>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<double>& fparam,
-    const std::vector<double>& aparam);
-
-template void DeepPotTF::compute_inner<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
-    std::vector<float>& dforce_,
-    std::vector<float>& dvirial,
-    const std::vector<float>& dcoord_,
-    const std::vector<int>& datype_,
-    const std::vector<float>& dbox,
-    const int nghost,
-    const int& ago,
-    const std::vector<float>& fparam,
-    const std::vector<float>& aparam);
-
 template <typename VALUETYPE, typename ENERGYVTYPE>
 void DeepPotTF::compute(ENERGYVTYPE& dener,
                         std::vector<VALUETYPE>& dforce_,
@@ -1234,131 +965,6 @@ void DeepPotTF::get_type_map(std::string& type_map) {
 }
 
 // forward to template method
-void DeepPotTF::computew(double& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, coord, atype, box, fparam, aparam);
-}
-
-void DeepPotTF::computew(double& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, coord, atype, box, fparam, aparam);
-}
-void DeepPotTF::computew(std::vector<double>& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, coord, atype, box, fparam, aparam);
-}
-void DeepPotTF::computew(std::vector<double>& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, coord, atype, box, fparam, aparam);
-}
-void DeepPotTF::computew(double& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
-          aparam);
-}
-
-void DeepPotTF::computew(double& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
-          aparam);
-}
-void DeepPotTF::computew(std::vector<double>& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
-          aparam);
-}
-void DeepPotTF::computew(std::vector<double>& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, coord, atype, box, nghost, inlist, ago, fparam,
-          aparam);
-}
-void DeepPotTF::computew(double& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         std::vector<double>& atom_energy,
-                         std::vector<double>& atom_virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
-          fparam, aparam);
-}
-
-void DeepPotTF::computew(double& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         std::vector<float>& atom_energy,
-                         std::vector<float>& atom_virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
-          fparam, aparam);
-}
 void DeepPotTF::computew(std::vector<double>& ener,
                          std::vector<double>& force,
                          std::vector<double>& virial,
@@ -1385,39 +991,6 @@ void DeepPotTF::computew(std::vector<double>& ener,
   compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
           fparam, aparam);
 }
-void DeepPotTF::computew(double& ener,
-                         std::vector<double>& force,
-                         std::vector<double>& virial,
-                         std::vector<double>& atom_energy,
-                         std::vector<double>& atom_virial,
-                         const std::vector<double>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<double>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<double>& fparam,
-                         const std::vector<double>& aparam) {
-  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
-          nghost, inlist, ago, fparam, aparam);
-}
-
-void DeepPotTF::computew(double& ener,
-                         std::vector<float>& force,
-                         std::vector<float>& virial,
-                         std::vector<float>& atom_energy,
-                         std::vector<float>& atom_virial,
-                         const std::vector<float>& coord,
-                         const std::vector<int>& atype,
-                         const std::vector<float>& box,
-                         const int nghost,
-                         const InputNlist& inlist,
-                         const int& ago,
-                         const std::vector<float>& fparam,
-                         const std::vector<float>& aparam) {
-  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
-          nghost, inlist, ago, fparam, aparam);
-}
 void DeepPotTF::computew(std::vector<double>& ener,
                          std::vector<double>& force,
                          std::vector<double>& virial,
@@ -1450,82 +1023,6 @@ void DeepPotTF::computew(std::vector<double>& ener,
   compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
           nghost, inlist, ago, fparam, aparam);
 }
-void DeepPotTF::computew_mixed_type(double& ener,
-                                    std::vector<double>& force,
-                                    std::vector<double>& virial,
-                                    const int& nframes,
-                                    const std::vector<double>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<double>& box,
-                                    const std::vector<double>& fparam,
-                                    const std::vector<double>& aparam) {
-  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
-                     aparam);
-}
-void DeepPotTF::computew_mixed_type(double& ener,
-                                    std::vector<float>& force,
-                                    std::vector<float>& virial,
-                                    const int& nframes,
-                                    const std::vector<float>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<float>& box,
-                                    const std::vector<float>& fparam,
-                                    const std::vector<float>& aparam) {
-  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
-                     aparam);
-}
-void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
-                                    std::vector<double>& force,
-                                    std::vector<double>& virial,
-                                    const int& nframes,
-                                    const std::vector<double>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<double>& box,
-                                    const std::vector<double>& fparam,
-                                    const std::vector<double>& aparam) {
-  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
-                     aparam);
-}
-void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
-                                    std::vector<float>& force,
-                                    std::vector<float>& virial,
-                                    const int& nframes,
-                                    const std::vector<float>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<float>& box,
-                                    const std::vector<float>& fparam,
-                                    const std::vector<float>& aparam) {
-  compute_mixed_type(ener, force, virial, nframes, coord, atype, box, fparam,
-                     aparam);
-}
-void DeepPotTF::computew_mixed_type(double& ener,
-                                    std::vector<double>& force,
-                                    std::vector<double>& virial,
-                                    std::vector<double>& atom_energy,
-                                    std::vector<double>& atom_virial,
-                                    const int& nframes,
-                                    const std::vector<double>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<double>& box,
-                                    const std::vector<double>& fparam,
-                                    const std::vector<double>& aparam) {
-  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
-                     coord, atype, box, fparam, aparam);
-}
-void DeepPotTF::computew_mixed_type(double& ener,
-                                    std::vector<float>& force,
-                                    std::vector<float>& virial,
-                                    std::vector<float>& atom_energy,
-                                    std::vector<float>& atom_virial,
-                                    const int& nframes,
-                                    const std::vector<float>& coord,
-                                    const std::vector<int>& atype,
-                                    const std::vector<float>& box,
-                                    const std::vector<float>& fparam,
-                                    const std::vector<float>& aparam) {
-  compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
-                     coord, atype, box, fparam, aparam);
-}
 void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
                                     std::vector<double>& force,
                                     std::vector<double>& virial,
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 9d4e71c3d3..2c88ab2f4b 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -52,7 +52,9 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
                          const std::vector<VALUETYPE> &dcoord_,
                          const std::vector<int> &datype_,
                          const std::vector<VALUETYPE> &dbox) {
-  dt->computew(dtensor_, dcoord_, datype_, dbox);
+  std::vector<VALUETYPE> force_, virial_, datom_tensor_, datom_virial_;
+  dt->computew(dtensor_, force_, virial_, datom_tensor_, datom_virial_, dcoord_,
+               datype_, dbox, false);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
@@ -72,7 +74,9 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
                          const std::vector<VALUETYPE> &dbox,
                          const int nghost,
                          const InputNlist &lmp_list) {
-  dt->computew(dtensor_, dcoord_, datype_, dbox, nghost, lmp_list);
+  std::vector<VALUETYPE> force_, virial_, datom_tensor_, datom_virial_;
+  dt->computew(dtensor_, force_, virial_, datom_tensor_, datom_virial_, dcoord_,
+               datype_, dbox, nghost, lmp_list, false);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
@@ -98,7 +102,7 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<VALUETYPE> &dbox) {
   std::vector<VALUETYPE> datom_tensor_, datom_virial_;
   dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
-               dcoord_, datype_, dbox);
+               dcoord_, datype_, dbox, true);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -126,7 +130,7 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const InputNlist &lmp_list) {
   std::vector<VALUETYPE> datom_tensor_, datom_virial_;
   dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
-               dcoord_, datype_, dbox, nghost, lmp_list);
+               dcoord_, datype_, dbox, nghost, lmp_list, true);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -157,7 +161,7 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const std::vector<int> &datype_,
                          const std::vector<VALUETYPE> &dbox) {
   dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
-               dcoord_, datype_, dbox);
+               dcoord_, datype_, dbox, true);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
@@ -190,7 +194,7 @@ void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                          const int nghost,
                          const InputNlist &lmp_list) {
   dt->computew(dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_,
-               dcoord_, datype_, dbox, nghost, lmp_list);
+               dcoord_, datype_, dbox, nghost, lmp_list, true);
 }
 
 template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc
index 75399e9f39..436e389ad2 100644
--- a/source/api_cc/src/DeepTensorTF.cc
+++ b/source/api_cc/src/DeepTensorTF.cc
@@ -390,66 +390,6 @@ template void DeepTensorTF::compute<float>(std::vector<float> &dtensor_,
                                            const int nghost,
                                            const InputNlist &lmp_list);
 
-template <typename VALUETYPE>
-void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
-                           std::vector<VALUETYPE> &dforce_,
-                           std::vector<VALUETYPE> &dvirial_,
-                           const std::vector<VALUETYPE> &dcoord_,
-                           const std::vector<int> &datype_,
-                           const std::vector<VALUETYPE> &dbox) {
-  std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
-          datype_, dbox);
-}
-
-template void DeepTensorTF::compute<double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox);
-
-template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
-                                           std::vector<float> &dforce_,
-                                           std::vector<float> &dvirial_,
-                                           const std::vector<float> &dcoord_,
-                                           const std::vector<int> &datype_,
-                                           const std::vector<float> &dbox);
-
-template <typename VALUETYPE>
-void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
-                           std::vector<VALUETYPE> &dforce_,
-                           std::vector<VALUETYPE> &dvirial_,
-                           const std::vector<VALUETYPE> &dcoord_,
-                           const std::vector<int> &datype_,
-                           const std::vector<VALUETYPE> &dbox,
-                           const int nghost,
-                           const InputNlist &lmp_list) {
-  std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
-          datype_, dbox, nghost, lmp_list);
-}
-
-template void DeepTensorTF::compute<double>(
-    std::vector<double> &dglobal_tensor_,
-    std::vector<double> &dforce_,
-    std::vector<double> &dvirial_,
-    const std::vector<double> &dcoord_,
-    const std::vector<int> &datype_,
-    const std::vector<double> &dbox,
-    const int nghost,
-    const InputNlist &lmp_list);
-
-template void DeepTensorTF::compute<float>(std::vector<float> &dglobal_tensor_,
-                                           std::vector<float> &dforce_,
-                                           std::vector<float> &dvirial_,
-                                           const std::vector<float> &dcoord_,
-                                           const std::vector<int> &datype_,
-                                           const std::vector<float> &dbox,
-                                           const int nghost,
-                                           const InputNlist &lmp_list);
-
 template <typename VALUETYPE>
 void DeepTensorTF::compute(std::vector<VALUETYPE> &dglobal_tensor_,
                            std::vector<VALUETYPE> &dforce_,
@@ -819,36 +759,6 @@ void DeepTensorTF::get_type_map(std::string &type_map) {
   type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
 }
 
-void DeepTensorTF::computew(std::vector<double> &value,
-                            const std::vector<double> &coord,
-                            const std::vector<int> &atype,
-                            const std::vector<double> &box) {
-  compute(value, coord, atype, box);
-}
-void DeepTensorTF::computew(std::vector<float> &value,
-                            const std::vector<float> &coord,
-                            const std::vector<int> &atype,
-                            const std::vector<float> &box) {
-  compute(value, coord, atype, box);
-}
-
-void DeepTensorTF::computew(std::vector<double> &value,
-                            const std::vector<double> &coord,
-                            const std::vector<int> &atype,
-                            const std::vector<double> &box,
-                            const int nghost,
-                            const InputNlist &inlist) {
-  compute(value, coord, atype, box, nghost, inlist);
-}
-void DeepTensorTF::computew(std::vector<float> &value,
-                            const std::vector<float> &coord,
-                            const std::vector<int> &atype,
-                            const std::vector<float> &box,
-                            const int nghost,
-                            const InputNlist &inlist) {
-  compute(value, coord, atype, box, nghost, inlist);
-}
-
 void DeepTensorTF::computew(std::vector<double> &global_tensor,
                             std::vector<double> &force,
                             std::vector<double> &virial,
@@ -856,9 +766,18 @@ void DeepTensorTF::computew(std::vector<double> &global_tensor,
                             std::vector<double> &atom_virial,
                             const std::vector<double> &coord,
                             const std::vector<int> &atype,
-                            const std::vector<double> &box) {
-  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
-          box);
+                            const std::vector<double> &box,
+                            const bool request_deriv) {
+  if (request_deriv) {
+    compute(global_tensor, force, virial, atom_tensor, atom_virial, coord,
+            atype, box);
+  } else {
+    compute(global_tensor, coord, atype, box);
+    force.clear();
+    virial.clear();
+    atom_tensor.clear();
+    atom_virial.clear();
+  }
 }
 void DeepTensorTF::computew(std::vector<float> &global_tensor,
                             std::vector<float> &force,
@@ -867,9 +786,18 @@ void DeepTensorTF::computew(std::vector<float> &global_tensor,
                             std::vector<float> &atom_virial,
                             const std::vector<float> &coord,
                             const std::vector<int> &atype,
-                            const std::vector<float> &box) {
-  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
-          box);
+                            const std::vector<float> &box,
+                            const bool request_deriv) {
+  if (request_deriv) {
+    compute(global_tensor, force, virial, atom_tensor, atom_virial, coord,
+            atype, box);
+  } else {
+    compute(global_tensor, coord, atype, box);
+    force.clear();
+    virial.clear();
+    atom_tensor.clear();
+    atom_virial.clear();
+  }
 }
 
 void DeepTensorTF::computew(std::vector<double> &global_tensor,
@@ -881,9 +809,18 @@ void DeepTensorTF::computew(std::vector<double> &global_tensor,
                             const std::vector<int> &atype,
                             const std::vector<double> &box,
                             const int nghost,
-                            const InputNlist &inlist) {
-  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
-          box, nghost, inlist);
+                            const InputNlist &inlist,
+                            const bool request_deriv) {
+  if (request_deriv) {
+    compute(global_tensor, force, virial, atom_tensor, atom_virial, coord,
+            atype, box, nghost, inlist);
+  } else {
+    compute(global_tensor, coord, atype, box, nghost, inlist);
+    force.clear();
+    virial.clear();
+    atom_tensor.clear();
+    atom_virial.clear();
+  }
 }
 void DeepTensorTF::computew(std::vector<float> &global_tensor,
                             std::vector<float> &force,
@@ -894,7 +831,16 @@ void DeepTensorTF::computew(std::vector<float> &global_tensor,
                             const std::vector<int> &atype,
                             const std::vector<float> &box,
                             const int nghost,
-                            const InputNlist &inlist) {
-  compute(global_tensor, force, virial, atom_tensor, atom_virial, coord, atype,
-          box, nghost, inlist);
+                            const InputNlist &inlist,
+                            const bool request_deriv) {
+  if (request_deriv) {
+    compute(global_tensor, force, virial, atom_tensor, atom_virial, coord,
+            atype, box, nghost, inlist);
+  } else {
+    compute(global_tensor, coord, atype, box, nghost, inlist);
+    force.clear();
+    virial.clear();
+    atom_tensor.clear();
+    atom_virial.clear();
+  }
 }

From 8c1b467f1d4e499977107c70dc943bde45c25cab Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 20 Jan 2024 02:31:35 -0500
Subject: [PATCH 91/97] docs: update Amber interface (#3074)

AmberTools24 should be released in April 2024.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/third-party/out-of-deepmd-kit.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/third-party/out-of-deepmd-kit.md b/doc/third-party/out-of-deepmd-kit.md
index 3d46b53578..ca5c7437fb 100644
--- a/doc/third-party/out-of-deepmd-kit.md
+++ b/doc/third-party/out-of-deepmd-kit.md
@@ -19,9 +19,15 @@ By inferring with the DP model `frozen_model_compressed.pb`, dpdata will generat
 
 An [OpenMM](https://github.com/openmm/openmm) plugin is provided from [JingHuangLab/openmm_deepmd_plugin](https://github.com/JingHuangLab/openmm_deepmd_plugin), written by the [Huang Lab](http://www.compbiophysics.org/) at Westlake University.
 
-## AMBER interface to DeePMD-kit
-
-An [AMBER](https://ambermd.org/) interface to DeePMD-kit is written by the [York [Lab](https://theory.rutgers.edu/) from Rutgers University. It is open-source at [GitLab RutgersLBSR/AmberDPRc](https://gitlab.com/RutgersLBSR/AmberDPRc/). Details can be found in [this paper](https://doi.org/10.1021/acs.jctc.1c00201).
+## Amber interface to DeePMD-kit
+
+Starting from [AmberTools24](https://ambermd.org/), `sander` includes an interface to the DeePMD-kit, which implements the [Deep Potential Range Corrected (DPRc) correction](../model/dprc.md).
+The DPRc model and the interface were developed by the [York Lab](https://theory.rutgers.edu/) from Rutgers University.
+More details are available in
+- [Amber Reference Manuals](https://ambermd.org/Manuals.php), providing documentation for how to enable the interface and the `&dprc` namelist;
+- [GitLab RutgersLBSR/AmberDPRc](https://gitlab.com/RutgersLBSR/AmberDPRc/), providing examples mdin files;
+- [DP-Amber](https://github.com/njzjz/dpamber/), a tiny tool to convert Amber trajectory to DPRc training data;
+- [The original DPRc paper](https://doi.org/10.1021/acs.jctc.1c00201).
 
 ## DP-GEN
 

From efe0962f5e48749d5dae6b0846046d8820ca1e36 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 20 Jan 2024 09:11:59 -0500
Subject: [PATCH 92/97] move `OutOfMemoryError` from `deepmd` to `deepmd_utils`
 (#3153)

To prevent circular import

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/utils/errors.py           | 12 ++++++++++--
 deepmd_utils/utils/batch_size.py |  2 +-
 deepmd_utils/utils/errors.py     |  3 +++
 3 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 deepmd_utils/utils/errors.py

diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py
index 5d96fa0e6a..683131e48a 100644
--- a/deepmd/utils/errors.py
+++ b/deepmd/utils/errors.py
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd_utils.utils.errors import (
+    OutOfMemoryError,
+)
+
+
 class GraphTooLargeError(Exception):
     """The graph is too large, exceeding protobuf's hard limit of 2GB."""
 
@@ -7,5 +12,8 @@ class GraphWithoutTensorError(Exception):
     pass
 
 
-class OutOfMemoryError(Exception):
-    """This error is caused by out-of-memory (OOM)."""
+__all__ = [
+    "OutOfMemoryError",
+    "GraphTooLargeError",
+    "GraphWithoutTensorError",
+]
diff --git a/deepmd_utils/utils/batch_size.py b/deepmd_utils/utils/batch_size.py
index c85806458f..1b93a51242 100644
--- a/deepmd_utils/utils/batch_size.py
+++ b/deepmd_utils/utils/batch_size.py
@@ -12,7 +12,7 @@
 
 import numpy as np
 
-from deepmd.utils.errors import (
+from deepmd_utils.utils.errors import (
     OutOfMemoryError,
 )
 
diff --git a/deepmd_utils/utils/errors.py b/deepmd_utils/utils/errors.py
new file mode 100644
index 0000000000..11f42ede96
--- /dev/null
+++ b/deepmd_utils/utils/errors.py
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+class OutOfMemoryError(Exception):
+    """This error is caused by out-of-memory (OOM)."""

From 937f03f1434f08c6c8f486c9a6aa5745b2c4dee4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 20 Jan 2024 09:14:57 -0500
Subject: [PATCH 93/97] cmake: fix setting `CMAKE_HIP_FLAGS` (#3155)

Fix
https://github.com/deepmodeling/deepmd-kit/discussions/2523#discussioncomment-8176469.


[`CMAKE_<LANG>_FLAGS`](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html#variable:CMAKE_%3CLANG%3E_FLAGS)
is a string, so when using `set`, we should use quotes.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 source/lib/src/gpu/CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index 5eb833e24d..3bd24cc620 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -73,9 +73,10 @@ elseif(USE_ROCM_TOOLKIT)
 
   message(STATUS "HIP major version is " ${hip_VERSION_MAJOR})
 
-  set(CMAKE_HIP_FLAGS -fno-gpu-rdc ${CMAKE_HIP_FLAGS}) # --amdgpu-target=gfx906
+  set(CMAKE_HIP_FLAGS "-fno-gpu-rdc ${CMAKE_HIP_FLAGS}"
+  )# --amdgpu-target=gfx906
   if(hip_VERSION VERSION_LESS 3.5.1)
-    set(CMAKE_HIP_FLAGS -hc ${CMAKE_HIP_FLAGS})
+    set(CMAKE_HIP_FLAGS "-hc ${CMAKE_HIP_FLAGS}")
   endif()
 
   file(GLOB SOURCE_FILES "*.cu")

From 4d824300736c54b4d6a9ee3f0ccbfcc8c56250ec Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 20 Jan 2024 09:22:10 -0500
Subject: [PATCH 94/97] docs: document CP2K interface (#3158)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/third-party/out-of-deepmd-kit.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/third-party/out-of-deepmd-kit.md b/doc/third-party/out-of-deepmd-kit.md
index ca5c7437fb..71dc9adb23 100644
--- a/doc/third-party/out-of-deepmd-kit.md
+++ b/doc/third-party/out-of-deepmd-kit.md
@@ -29,6 +29,10 @@ More details are available in
 - [DP-Amber](https://github.com/njzjz/dpamber/), a tiny tool to convert Amber trajectory to DPRc training data;
 - [The original DPRc paper](https://doi.org/10.1021/acs.jctc.1c00201).
 
+## CP2K interface to DeePMD-kit
+
+[CP2K](https://github.com/cp2k/cp2k/) v2024.2 adds an interface to the DeePMD-kit for molecular dynamics. Read the [CP2K manual](https://manual.cp2k.org/trunk/methods/machine_learning/deepmd.html#deepmd-kit) for details.
+
 ## DP-GEN
 
 [DP-GEN](https://github.com/deepmodeling/dpgen) provides a workflow to generate accurate DP models by calling DeePMD-kit's command line interface (CLI) in the local or remote server. Details can be found in [this paper](https://doi.org/10.1016/j.cpc.2020.107206).

From 10970629c55f3a58989a029c786def07631a2b79 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 20 Jan 2024 09:23:48 -0500
Subject: [PATCH 95/97] resolve "Multiplication result converted to larger
 type" (#3159)

Follow up #3149.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_c/include/deepmd.hpp            | 42 +++++++++++++---------
 source/api_c/src/c_api.cc                  |  2 +-
 source/api_cc/src/AtomMap.cc               | 12 ++++---
 source/api_cc/src/DeepPot.cc               |  6 ++--
 source/ipi/src/Convert.cc                  |  4 +--
 source/lib/tests/test_env_mat_a.cc         | 22 ++++++------
 source/lib/tests/test_env_mat_a_mix.cc     | 33 +++++++++--------
 source/lib/tests/test_env_mat_a_nvnmd.cc   |  4 +--
 source/lmp/compute_deeptensor_atom.cpp     |  2 +-
 source/op/ewald_recp.cc                    |  8 +++--
 source/op/matmul_fitnet_nvnmd.cc           |  2 +-
 source/op/matmul_flt_nvnmd.cc              |  6 ++--
 source/op/pair_tab.cc                      | 12 ++++---
 source/op/prod_env_mat_multi_device.cc     |  2 +-
 source/op/prod_force.cc                    |  3 +-
 source/op/prod_force_grad.cc               |  4 ++-
 source/op/prod_force_grad_multi_device.cc  | 22 +++++++-----
 source/op/prod_force_multi_device.cc       | 18 ++++++----
 source/op/prod_force_se_a_grad.cc          |  4 ++-
 source/op/prod_force_se_a_mask.cc          |  3 +-
 source/op/prod_force_se_a_mask_grad.cc     |  4 ++-
 source/op/prod_force_se_r_grad.cc          |  4 ++-
 source/op/prod_virial.cc                   |  7 ++--
 source/op/prod_virial_grad.cc              | 10 ++++--
 source/op/prod_virial_grad_multi_device.cc | 28 +++++++++------
 source/op/prod_virial_se_a_grad.cc         | 10 ++++--
 source/op/prod_virial_se_r_grad.cc         | 10 ++++--
 source/op/soft_min.cc                      |  7 ++--
 source/op/soft_min_force.cc                |  3 +-
 source/op/soft_min_force_grad.cc           |  6 ++--
 source/op/soft_min_virial.cc               |  7 ++--
 source/op/soft_min_virial_grad.cc          | 12 ++++---
 32 files changed, 202 insertions(+), 117 deletions(-)

diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 503a4c4b4b..06a50ee3f0 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -1060,14 +1060,15 @@ class DeepPot {
                               const int &nloc,
                               const std::vector<VALUETYPE> &fparam,
                               const std::vector<VALUETYPE> &aparam) const {
-    if (fparam.size() != dfparam && fparam.size() != nframes * dfparam) {
+    if (fparam.size() != dfparam &&
+        fparam.size() != static_cast<size_t>(nframes) * dfparam) {
       throw deepmd::hpp::deepmd_exception(
           "the dim of frame parameter provided is not consistent with what the "
           "model uses");
     }
 
-    if (aparam.size() != daparam * nloc &&
-        aparam.size() != nframes * daparam * nloc) {
+    if (aparam.size() != static_cast<size_t>(daparam) * nloc &&
+        aparam.size() != static_cast<size_t>(nframes) * daparam * nloc) {
       throw deepmd::hpp::deepmd_exception(
           "the dim of atom parameter provided is not consistent with what the "
           "model uses");
@@ -1081,9 +1082,10 @@ class DeepPot {
     if (param.size() == dparam) {
       out_param.resize(static_cast<size_t>(nframes) * dparam);
       for (int ii = 0; ii < nframes; ++ii) {
-        std::copy(param.begin(), param.end(), out_param.begin() + ii * dparam);
+        std::copy(param.begin(), param.end(),
+                  out_param.begin() + static_cast<std::ptrdiff_t>(ii) * dparam);
       }
-    } else if (param.size() == nframes * dparam) {
+    } else if (param.size() == static_cast<size_t>(nframes) * dparam) {
       out_param = param;
     }
   }
@@ -1184,7 +1186,8 @@ class DeepPotModelDevi {
 
     // memory will be continous for std::vector but not std::vector<std::vector>
     std::vector<double> energy_flat(numb_models);
-    std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
+    std::vector<VALUETYPE> force_flat(static_cast<size_t>(numb_models) *
+                                      natoms * 3);
     std::vector<VALUETYPE> virial_flat(numb_models * 9);
     double *ener_ = &energy_flat[0];
     VALUETYPE *force_ = &force_flat[0];
@@ -1260,10 +1263,13 @@ class DeepPotModelDevi {
     const int *atype_ = &atype[0];
 
     std::vector<double> energy_flat(numb_models);
-    std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
+    std::vector<VALUETYPE> force_flat(static_cast<size_t>(numb_models) *
+                                      natoms * 3);
     std::vector<VALUETYPE> virial_flat(numb_models * 9);
-    std::vector<VALUETYPE> atom_energy_flat(numb_models * natoms);
-    std::vector<VALUETYPE> atom_virial_flat(numb_models * natoms * 9);
+    std::vector<VALUETYPE> atom_energy_flat(static_cast<size_t>(numb_models) *
+                                            natoms);
+    std::vector<VALUETYPE> atom_virial_flat(static_cast<size_t>(numb_models) *
+                                            natoms * 9);
     double *ener_ = &energy_flat[0];
     VALUETYPE *force_ = &force_flat[0];
     VALUETYPE *virial_ = &virial_flat[0];
@@ -1402,8 +1408,8 @@ class DeepPotModelDevi {
 
     for (unsigned ii = 0; ii < numb_models; ++ii) {
       for (unsigned jj = 0; jj < nloc; ++jj) {
-        const VALUETYPE *tmp_f = &(xx[ii][jj * stride]);
-        const VALUETYPE *tmp_avg = &(avg[jj * stride]);
+        const VALUETYPE *tmp_f = &(xx[ii][static_cast<size_t>(jj) * stride]);
+        const VALUETYPE *tmp_avg = &(avg[static_cast<size_t>(jj) * stride]);
         for (unsigned dd = 0; dd < stride; ++dd) {
           VALUETYPE vdiff = tmp_f[dd] - tmp_avg[dd];
           std[jj] += vdiff * vdiff;
@@ -1432,7 +1438,7 @@ class DeepPotModelDevi {
     assert(nloc * stride == ndof);
 
     for (unsigned ii = 0; ii < nloc; ++ii) {
-      const VALUETYPE *tmp_avg = &(avg[ii * stride]);
+      const VALUETYPE *tmp_avg = &(avg[static_cast<size_t>(ii) * stride]);
       VALUETYPE f_norm = 0.0;
       for (unsigned dd = 0; dd < stride; ++dd) {
         f_norm += tmp_avg[dd] * tmp_avg[dd];
@@ -1477,14 +1483,15 @@ class DeepPotModelDevi {
                               const int &nloc,
                               const std::vector<VALUETYPE> &fparam,
                               const std::vector<VALUETYPE> &aparam) const {
-    if (fparam.size() != dfparam && fparam.size() != nframes * dfparam) {
+    if (fparam.size() != dfparam &&
+        fparam.size() != static_cast<size_t>(nframes) * dfparam) {
       throw deepmd::hpp::deepmd_exception(
           "the dim of frame parameter provided is not consistent with what the "
           "model uses");
     }
 
-    if (aparam.size() != daparam * nloc &&
-        aparam.size() != nframes * daparam * nloc) {
+    if (aparam.size() != static_cast<size_t>(daparam) * nloc &&
+        aparam.size() != static_cast<size_t>(nframes) * daparam * nloc) {
       throw deepmd::hpp::deepmd_exception(
           "the dim of atom parameter provided is not consistent with what the "
           "model uses");
@@ -1498,9 +1505,10 @@ class DeepPotModelDevi {
     if (param.size() == dparam) {
       out_param.resize(static_cast<size_t>(nframes) * dparam);
       for (int ii = 0; ii < nframes; ++ii) {
-        std::copy(param.begin(), param.end(), out_param.begin() + ii * dparam);
+        std::copy(param.begin(), param.end(),
+                  out_param.begin() + static_cast<std::ptrdiff_t>(ii) * dparam);
       }
-    } else if (param.size() == nframes * dparam) {
+    } else if (param.size() == static_cast<size_t>(nframes) * dparam) {
       out_param = param;
     }
   }
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index 935e812cf0..bc6178702f 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -1414,7 +1414,7 @@ void DP_SelectMapInt(const int* in,
                      int* out) {
   std::vector<int> in_(in, in + stride * nall1);
   std::vector<int> fwd_map_(fwd_map, fwd_map + nall1);
-  std::vector<int> out_(stride * nall2);
+  std::vector<int> out_(static_cast<size_t>(stride) * nall2);
   deepmd::select_map(out_, in_, fwd_map_, stride);
   if (out) {
     std::copy(out_.begin(), out_.end(), out);
diff --git a/source/api_cc/src/AtomMap.cc b/source/api_cc/src/AtomMap.cc
index 0b6105c5f2..b79f848277 100644
--- a/source/api_cc/src/AtomMap.cc
+++ b/source/api_cc/src/AtomMap.cc
@@ -39,8 +39,10 @@ void AtomMap::forward(typename std::vector<VALUETYPE>::iterator out,
       int gro_i = idx_map[ii];
       for (int dd = 0; dd < stride; ++dd) {
         // out[ii*stride+dd] = in[gro_i*stride+dd];
-        *(out + kk * nall * stride + ii * stride + dd) =
-            *(in + kk * nall * stride + gro_i * stride + dd);
+        *(out + static_cast<std::ptrdiff_t>(kk) * nall * stride +
+          static_cast<std::ptrdiff_t>(ii) * stride + dd) =
+            *(in + static_cast<std::ptrdiff_t>(kk) * nall * stride +
+              static_cast<std::ptrdiff_t>(gro_i) * stride + dd);
       }
     }
   }
@@ -58,8 +60,10 @@ void AtomMap::backward(typename std::vector<VALUETYPE>::iterator out,
       int gro_i = idx_map[ii];
       for (int dd = 0; dd < stride; ++dd) {
         // out[gro_i*stride+dd] = in[ii*stride+dd];
-        *(out + kk * nall * stride + gro_i * stride + dd) =
-            *(in + kk * nall * stride + ii * stride + dd);
+        *(out + static_cast<std::ptrdiff_t>(kk) * nall * stride +
+          static_cast<std::ptrdiff_t>(gro_i) * stride + dd) =
+            *(in + static_cast<std::ptrdiff_t>(kk) * nall * stride +
+              static_cast<std::ptrdiff_t>(ii) * stride + dd);
       }
     }
   }
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index feb0f283b1..083e9b091f 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -761,8 +761,8 @@ void DeepPotModelDevi::compute_std(
 
   for (unsigned ii = 0; ii < numb_models; ++ii) {
     for (unsigned jj = 0; jj < nloc; ++jj) {
-      const VALUETYPE* tmp_f = &(xx[ii][jj * stride]);
-      const VALUETYPE* tmp_avg = &(avg[jj * stride]);
+      const VALUETYPE* tmp_f = &(xx[ii][static_cast<size_t>(jj) * stride]);
+      const VALUETYPE* tmp_avg = &(avg[static_cast<size_t>(jj) * stride]);
       for (unsigned dd = 0; dd < stride; ++dd) {
         VALUETYPE vdiff = tmp_f[dd] - tmp_avg[dd];
         std[jj] += vdiff * vdiff;
@@ -833,7 +833,7 @@ void DeepPotModelDevi::compute_relative_std(std::vector<VALUETYPE>& std,
   assert(nloc * stride == ndof);
 
   for (unsigned ii = 0; ii < nloc; ++ii) {
-    const VALUETYPE* tmp_avg = &(avg[ii * stride]);
+    const VALUETYPE* tmp_avg = &(avg[static_cast<size_t>(ii) * stride]);
     VALUETYPE f_norm = 0.0;
     for (unsigned dd = 0; dd < stride; ++dd) {
       f_norm += tmp_avg[dd] * tmp_avg[dd];
diff --git a/source/ipi/src/Convert.cc b/source/ipi/src/Convert.cc
index 0a98962518..7dea877e3b 100644
--- a/source/ipi/src/Convert.cc
+++ b/source/ipi/src/Convert.cc
@@ -30,7 +30,7 @@ void Convert<VALUETYPE>::forward(std::vector<VALUETYPE>& out,
                                  const int stride) const {
   assert(in.size() == stride * idx_map.size());
   int natoms = idx_map.size();
-  out.resize(stride * natoms);
+  out.resize(static_cast<size_t>(stride) * natoms);
   for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
     for (int dd = 0; dd < stride; ++dd) {
@@ -45,7 +45,7 @@ void Convert<VALUETYPE>::backward(std::vector<VALUETYPE>& out,
                                   const int stride) const {
   int natoms = idx_map.size();
   assert(in.size() == stride * idx_map.size());
-  out.resize(stride * natoms);
+  out.resize(static_cast<size_t>(stride) * natoms);
   for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
     for (int dd = 0; dd < stride; ++dd) {
diff --git a/source/lib/tests/test_env_mat_a.cc b/source/lib/tests/test_env_mat_a.cc
index 89756c9fc5..d041d1a0a1 100644
--- a/source/lib/tests/test_env_mat_a.cc
+++ b/source/lib/tests/test_env_mat_a.cc
@@ -504,11 +504,12 @@ TEST_F(TestEnvMatA, prod_cpu) {
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
 
-  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
-  std::vector<int> nlist(nloc * nnei);
-  std::vector<double> avg(ntypes * ndescrpt, 0);
-  std::vector<double> std(ntypes * ndescrpt, 1);
+  std::vector<double> em(static_cast<size_t>(nloc) * ndescrpt),
+      em_deriv(static_cast<size_t>(nloc) * ndescrpt * 3),
+      rij(static_cast<size_t>(nloc) * nnei * 3);
+  std::vector<int> nlist(static_cast<size_t>(nloc) * nnei);
+  std::vector<double> avg(static_cast<size_t>(ntypes) * ndescrpt, 0);
+  std::vector<double> std(static_cast<size_t>(ntypes) * ndescrpt, 1);
   deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
                              &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
                              &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
@@ -538,11 +539,12 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) {
   std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
-  std::vector<int> nlist(nloc * nnei);
-  std::vector<double> avg(ntypes * ndescrpt, 0);
-  std::vector<double> std(ntypes * ndescrpt, 1);
+  std::vector<double> em(static_cast<size_t>(nloc) * ndescrpt),
+      em_deriv(static_cast<size_t>(nloc) * ndescrpt * 3),
+      rij(static_cast<size_t>(nloc) * nnei * 3);
+  std::vector<int> nlist(static_cast<size_t>(nloc) * nnei);
+  std::vector<double> avg(static_cast<size_t>(ntypes) * ndescrpt, 0);
+  std::vector<double> std(static_cast<size_t>(ntypes) * ndescrpt, 1);
   deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
                              &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
                              &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
diff --git a/source/lib/tests/test_env_mat_a_mix.cc b/source/lib/tests/test_env_mat_a_mix.cc
index 909088d1e3..d7e6cc88eb 100644
--- a/source/lib/tests/test_env_mat_a_mix.cc
+++ b/source/lib/tests/test_env_mat_a_mix.cc
@@ -532,11 +532,12 @@ TEST_F(TestEnvMatAMix, prod_cpu) {
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
 
-  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
-  std::vector<int> nlist(nloc * nnei);
-  std::vector<int> ntype(nloc * nnei);
-  bool *nmask = new bool[nloc * nnei];
+  std::vector<double> em(static_cast<size_t>(nloc) * ndescrpt),
+      em_deriv(static_cast<size_t>(nloc) * ndescrpt * 3),
+      rij(static_cast<size_t>(nloc) * nnei * 3);
+  std::vector<int> nlist(static_cast<size_t>(nloc) * nnei);
+  std::vector<int> ntype(static_cast<size_t>(nloc) * nnei);
+  bool *nmask = new bool[static_cast<size_t>(nloc) * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
@@ -575,11 +576,12 @@ TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu) {
   std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
-  std::vector<int> nlist(nloc * nnei);
-  std::vector<double> avg(ntypes * ndescrpt, 0);
-  std::vector<double> std(ntypes * ndescrpt, 1);
+  std::vector<double> em(static_cast<size_t>(nloc) * ndescrpt),
+      em_deriv(static_cast<size_t>(nloc) * ndescrpt * 3),
+      rij(static_cast<size_t>(nloc) * nnei * 3);
+  std::vector<int> nlist(static_cast<size_t>(nloc) * nnei);
+  std::vector<double> avg(static_cast<size_t>(ntypes) * ndescrpt, 0);
+  std::vector<double> std(static_cast<size_t>(ntypes) * ndescrpt, 1);
   deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
                              &posi_cpy[0], &atype[0], inlist, max_nbor_size,
                              &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a,
@@ -652,11 +654,12 @@ TEST_F(TestEnvMatAMix, prod_gpu) {
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double> em(nloc * ndescrpt, 0.0),
-      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
-  std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<int> ntype(nloc * nnei, 0);
-  bool *nmask = new bool[nloc * nnei];
+  std::vector<double> em(static_cast<size_t>(nloc) * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0),
+      rij(static_cast<size_t>(nloc) * nnei * 3, 0.0);
+  std::vector<int> nlist(static_cast<size_t>(nloc) * nnei, 0);
+  std::vector<int> ntype(static_cast<size_t>(nloc) * nnei, 0);
+  bool *nmask = new bool[static_cast<size_t>(nloc) * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
diff --git a/source/lib/tests/test_env_mat_a_nvnmd.cc b/source/lib/tests/test_env_mat_a_nvnmd.cc
index cca468829e..bf55323ee0 100644
--- a/source/lib/tests/test_env_mat_a_nvnmd.cc
+++ b/source/lib/tests/test_env_mat_a_nvnmd.cc
@@ -274,7 +274,7 @@ TEST_F(TestEnvMatANvnmd, prod_cpu) {
   deepmd::convert_nlist(inlist, nlist_a_cpy);
 
   std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
+      rij(static_cast<size_t>(nloc) * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
@@ -308,7 +308,7 @@ TEST_F(TestEnvMatANvnmd, prod_cpu_equal_cpu) {
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
   std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
-      rij(nloc * nnei * 3);
+      rij(static_cast<size_t>(nloc) * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
diff --git a/source/lmp/compute_deeptensor_atom.cpp b/source/lmp/compute_deeptensor_atom.cpp
index 2f4486002e..6e6e9508b7 100644
--- a/source/lmp/compute_deeptensor_atom.cpp
+++ b/source/lmp/compute_deeptensor_atom.cpp
@@ -178,6 +178,6 @@ void ComputeDeeptensorAtom::compute_peratom() {
 ------------------------------------------------------------------------- */
 
 double ComputeDeeptensorAtom::memory_usage() {
-  double bytes = nmax * size_peratom_cols * sizeof(double);
+  double bytes = static_cast<size_t>(nmax) * size_peratom_cols * sizeof(double);
   return bytes;
 }
diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc
index 72f3c3d5dc..dcad204467 100644
--- a/source/op/ewald_recp.cc
+++ b/source/op/ewald_recp.cc
@@ -56,10 +56,14 @@ class EwaldRecpOp : public OpKernel {
 
     // check the sizes
     OP_REQUIRES(
-        context, (nsamples * nloc * 3 == coord_tensor.shape().dim_size(0)),
+        context,
+        (static_cast<int64_t>(nsamples) * nloc * 3 ==
+         coord_tensor.shape().dim_size(0)),
         errors::InvalidArgument("coord  number of samples should match"));
     OP_REQUIRES(
-        context, (nsamples * nloc * 1 == charge_tensor.shape().dim_size(0)),
+        context,
+        (static_cast<int64_t>(nsamples) * nloc * 1 ==
+         charge_tensor.shape().dim_size(0)),
         errors::InvalidArgument("charge number of samples should match"));
     OP_REQUIRES(
         context, (nsamples * 9 == box_tensor.shape().dim_size(0)),
diff --git a/source/op/matmul_fitnet_nvnmd.cc b/source/op/matmul_fitnet_nvnmd.cc
index 28795526de..b5dc32a642 100644
--- a/source/op/matmul_fitnet_nvnmd.cc
+++ b/source/op/matmul_fitnet_nvnmd.cc
@@ -133,7 +133,7 @@ class MatmulFitnetNvnmdOp : public OpKernel {
     expo_maxs.resize(K);
 
     if (normw == 0) {
-      find_max_expo(expo_max, (FPTYPE*)&w[0], M * K);
+      find_max_expo(expo_max, (FPTYPE*)&w[0], static_cast<int64_t>(M) * K);
       for (kk = 0; kk < K; kk++) {
         expo_maxs[kk] = expo_max;
       }
diff --git a/source/op/matmul_flt_nvnmd.cc b/source/op/matmul_flt_nvnmd.cc
index 19e7a4869e..92b6375100 100644
--- a/source/op/matmul_flt_nvnmd.cc
+++ b/source/op/matmul_flt_nvnmd.cc
@@ -130,7 +130,8 @@ class MatmulFltNvnmdOp : public OpKernel {
     for (hh = 0; hh < H; hh++) {
       // find x max exponnet
       if ((normx & 0x0f) == 0) {  // normalize x[:,:]
-        find_max_expo(expo_max1, (FPTYPE *)&x[hh * N * M], N * M);
+        find_max_expo(expo_max1, (FPTYPE *)&x[hh * N * M],
+                      static_cast<int64_t>(N) * M);
         for (ii = 0; ii < N; ii++) {
           expo_max1s[ii] = expo_max1;
         }
@@ -144,7 +145,8 @@ class MatmulFltNvnmdOp : public OpKernel {
 
       // find w max exponnet
       if ((normw & 0x0f) == 0) {  // normalize w[:,:]
-        find_max_expo(expo_max2, (FPTYPE *)&w[hh * M * K], M * K);
+        find_max_expo(expo_max2, (FPTYPE *)&w[hh * M * K],
+                      static_cast<int64_t>(M) * K);
         for (kk = 0; kk < K; kk++) {
           expo_max2s[kk] = expo_max2;
         }
diff --git a/source/op/pair_tab.cc b/source/op/pair_tab.cc
index 5c16e0faa4..0b04390d5f 100644
--- a/source/op/pair_tab.cc
+++ b/source/op/pair_tab.cc
@@ -89,10 +89,13 @@ class PairTabOp : public OpKernel {
     OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("shape of type should be nall"));
     OP_REQUIRES(
-        context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),
+        context,
+        (3 * static_cast<int64_t>(nnei) * nloc ==
+         rij_tensor.shape().dim_size(1)),
         errors::InvalidArgument("shape of rij should be 3 * nloc * nnei"));
     OP_REQUIRES(
-        context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),
+        context,
+        (static_cast<int64_t>(nnei) * nloc == nlist_tensor.shape().dim_size(1)),
         errors::InvalidArgument("shape of nlist should be nloc * nnei"));
     OP_REQUIRES(context, (nloc == scale_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("shape of scale should be nloc"));
@@ -134,10 +137,11 @@ class PairTabOp : public OpKernel {
                     "ntypes provided in table does not match deeppot"));
     int nspline = table_info(2) + 0.1;
     int tab_stride = 4 * nspline;
-    assert(ntypes * ntypes * tab_stride ==
+    assert(static_cast<int64_t>(ntypes) * ntypes * tab_stride ==
            table_data_tensor.shape().dim_size(0));
     std::vector<double> d_table_info(4);
-    std::vector<double> d_table_data(ntypes * ntypes * tab_stride);
+    std::vector<double> d_table_data(static_cast<size_t>(ntypes) * ntypes *
+                                     tab_stride);
     for (unsigned ii = 0; ii < d_table_info.size(); ++ii) {
       d_table_info[ii] = table_info(ii);
     }
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index a99804cb9e..5ee43d2af3 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -1098,7 +1098,7 @@ class ProdEnvMatAMixOp : public OpKernel {
 
     Tensor fake_type_tensor;  // all zeros
     TensorShape fake_type_shape;
-    fake_type_shape.AddDim(nsamples * nall);
+    fake_type_shape.AddDim(static_cast<int64_t>(nsamples) * nall);
     OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape,
                                                    &fake_type_tensor));
 
diff --git a/source/op/prod_force.cc b/source/op/prod_force.cc
index 57d1cd1331..20269ebef3 100644
--- a/source/op/prod_force.cc
+++ b/source/op/prod_force.cc
@@ -70,7 +70,8 @@ class ProdForceOp : public OpKernel {
                 errors::InvalidArgument("number of samples should match"));
 
     OP_REQUIRES(context,
-                (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * ndescrpt * 12 ==
+                 in_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/prod_force_grad.cc b/source/op/prod_force_grad.cc
index c1cf63917e..acfe9145fe 100644
--- a/source/op/prod_force_grad.cc
+++ b/source/op/prod_force_grad.cc
@@ -81,7 +81,9 @@ class ProdForceGradOp : public OpKernel {
     OP_REQUIRES(
         context, (nloc * 3 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 12 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/prod_force_grad_multi_device.cc b/source/op/prod_force_grad_multi_device.cc
index bbcef6bd91..ee8a29732d 100644
--- a/source/op/prod_force_grad_multi_device.cc
+++ b/source/op/prod_force_grad_multi_device.cc
@@ -107,11 +107,14 @@ class ProdForceSeAGradOp : public OpKernel {
     assert(nframes == net_deriv_tensor.shape().dim_size(0));
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
-    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt == grad_net_shape.dim_size(1));
     assert(nloc * 3 == grad_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei ==
+           nlist_tensor.shape().dim_size(1));
     assert(nnei * 4 == ndescrpt);
     // flat the tensors
     FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
@@ -215,11 +218,14 @@ class ProdForceSeRGradOp : public OpKernel {
     assert(nframes == net_deriv_tensor.shape().dim_size(0));
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
-    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt == grad_net_shape.dim_size(1));
     assert(nloc * 3 == grad_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei ==
+           nlist_tensor.shape().dim_size(1));
     assert(nnei * 1 == ndescrpt);
     // flat the tensors
     FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc
index 20cc96dd31..d48749faa5 100644
--- a/source/op/prod_force_multi_device.cc
+++ b/source/op/prod_force_multi_device.cc
@@ -115,9 +115,12 @@ class ProdForceSeAOp : public OpKernel {
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
     assert(nall * 3 == force_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei ==
+           nlist_tensor.shape().dim_size(1));
     assert(nnei * 4 == ndescrpt);
 
     // flat the tensors
@@ -195,7 +198,8 @@ class ProdForceSeROp : public OpKernel {
     OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
     OP_REQUIRES(context,
-                (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
     // Create an output tensor
     TensorShape force_shape;
@@ -212,8 +216,10 @@ class ProdForceSeROp : public OpKernel {
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
     assert(nall * 3 == force_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
     assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
     assert(nnei * 1 == ndescrpt);
     // flat the tensors
diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/prod_force_se_a_grad.cc
index 5aaf030512..05e26b5058 100644
--- a/source/op/prod_force_se_a_grad.cc
+++ b/source/op/prod_force_se_a_grad.cc
@@ -77,7 +77,9 @@ class ProdForceSeAGradOp : public OpKernel {
     OP_REQUIRES(
         context, (nloc * 3 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/prod_force_se_a_mask.cc b/source/op/prod_force_se_a_mask.cc
index aa4268434d..a7b08ae664 100644
--- a/source/op/prod_force_se_a_mask.cc
+++ b/source/op/prod_force_se_a_mask.cc
@@ -57,7 +57,8 @@ class ProdForceSeAMaskOp : public OpKernel {
     OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
     OP_REQUIRES(context,
-                (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
 
     // Create an output tensor
diff --git a/source/op/prod_force_se_a_mask_grad.cc b/source/op/prod_force_se_a_mask_grad.cc
index dabe405545..a01919199f 100644
--- a/source/op/prod_force_se_a_mask_grad.cc
+++ b/source/op/prod_force_se_a_mask_grad.cc
@@ -71,7 +71,9 @@ class ProdForceSeAMaskGradOp : public OpKernel {
     OP_REQUIRES(
         context, (nloc * 3 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
 
     // Create an output tensor
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc
index 0b5338c241..44741d20fb 100644
--- a/source/op/prod_force_se_r_grad.cc
+++ b/source/op/prod_force_se_r_grad.cc
@@ -71,7 +71,9 @@ class ProdForceSeRGradOp : public OpKernel {
     OP_REQUIRES(
         context, (nloc * 3 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
 
     // Create an output tensor
diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc
index 10532d74db..42e7be669d 100644
--- a/source/op/prod_virial.cc
+++ b/source/op/prod_virial.cc
@@ -77,9 +77,12 @@ class ProdVirialOp : public OpKernel {
                 errors::InvalidArgument("number of samples should match"));
 
     OP_REQUIRES(context,
-                (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * ndescrpt * 12 ==
+                 in_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * nnei * 3 ==
+                 rij_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("dim of rij should be nnei * 3"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc
index 02feba4eee..a764e524a6 100644
--- a/source/op/prod_virial_grad.cc
+++ b/source/op/prod_virial_grad.cc
@@ -88,10 +88,14 @@ class ProdVirialGradOp : public OpKernel {
     OP_REQUIRES(
         context, (9 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 12 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
-                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+        errors::InvalidArgument("dim of rij should be  nnei * 3"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
     OP_REQUIRES(
diff --git a/source/op/prod_virial_grad_multi_device.cc b/source/op/prod_virial_grad_multi_device.cc
index 215c26f184..3d8d2a96b3 100644
--- a/source/op/prod_virial_grad_multi_device.cc
+++ b/source/op/prod_virial_grad_multi_device.cc
@@ -118,12 +118,16 @@ class ProdVirialSeAGradOp : public OpKernel {
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == rij_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
-    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt == grad_net_shape.dim_size(1));
     assert(9 == grad_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert(nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
-    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei * 3 ==
+           rij_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei ==
+           nlist_tensor.shape().dim_size(1));
     assert(nnei * 4 == ndescrpt);
 
     // flat the tensors
@@ -246,12 +250,16 @@ class ProdVirialSeRGradOp : public OpKernel {
     assert(nframes == in_deriv_tensor.shape().dim_size(0));
     assert(nframes == rij_tensor.shape().dim_size(0));
     assert(nframes == nlist_tensor.shape().dim_size(0));
-    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt == grad_net_shape.dim_size(1));
     assert(9 == grad_shape.dim_size(1));
-    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert(nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
-    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt ==
+           net_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+           in_deriv_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei * 3 ==
+           rij_tensor.shape().dim_size(1));
+    assert(static_cast<int64_t>(nloc) * nnei ==
+           nlist_tensor.shape().dim_size(1));
     assert(nnei * 1 == ndescrpt);
 
     // flat the tensors
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc
index d6c55b6969..e3a9374b8f 100644
--- a/source/op/prod_virial_se_a_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -84,10 +84,14 @@ class ProdVirialSeAGradOp : public OpKernel {
     OP_REQUIRES(
         context, (9 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
-                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+        errors::InvalidArgument("dim of rij should be  nnei * 3"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
 
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc
index 40c2828ca7..8e9b2c25b0 100644
--- a/source/op/prod_virial_se_r_grad.cc
+++ b/source/op/prod_virial_se_r_grad.cc
@@ -78,10 +78,14 @@ class ProdVirialSeRGradOp : public OpKernel {
     OP_REQUIRES(
         context, (9 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * ndescrpt * 3 ==
+                 in_deriv_shape.dim_size(1)),
                 errors::InvalidArgument("number of descriptors should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
-                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+        errors::InvalidArgument("dim of rij should be  nnei * 3"));
 
     // Create an output tensor
     TensorShape grad_net_shape;
diff --git a/source/op/soft_min.cc b/source/op/soft_min.cc
index 85aade5e7b..07c7404bbf 100644
--- a/source/op/soft_min.cc
+++ b/source/op/soft_min.cc
@@ -82,10 +82,13 @@ class SoftMinSwitchOp : public OpKernel {
     OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("shape of type should be nall"));
     OP_REQUIRES(
-        context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),
+        context,
+        (3 * static_cast<int64_t>(nnei) * nloc ==
+         rij_tensor.shape().dim_size(1)),
         errors::InvalidArgument("shape of rij should be 3 * nloc * nnei"));
     OP_REQUIRES(
-        context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),
+        context,
+        (static_cast<int64_t>(nnei) * nloc == nlist_tensor.shape().dim_size(1)),
         errors::InvalidArgument("shape of nlist should be nloc * nnei"));
 
     // Create an output tensor
diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc
index 0801170597..14cb42b993 100644
--- a/source/op/soft_min_force.cc
+++ b/source/op/soft_min_force.cc
@@ -65,7 +65,8 @@ class SoftMinForceOp : public OpKernel {
     OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of du should match"));
     OP_REQUIRES(context,
-                (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * nnei * 3 ==
+                 sw_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of switch deriv should match"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/soft_min_force_grad.cc b/source/op/soft_min_force_grad.cc
index 752ad4f93d..e173586d22 100644
--- a/source/op/soft_min_force_grad.cc
+++ b/source/op/soft_min_force_grad.cc
@@ -77,8 +77,10 @@ class SoftMinForceGradOp : public OpKernel {
     OP_REQUIRES(
         context, (nloc * 3 == grad_shape.dim_size(1)),
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),
-                errors::InvalidArgument("number of sw deriv should match"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == sw_deriv_shape.dim_size(1)),
+        errors::InvalidArgument("number of sw deriv should match"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
 
diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc
index 26daa78604..6c0e1f72f3 100644
--- a/source/op/soft_min_virial.cc
+++ b/source/op/soft_min_virial.cc
@@ -73,9 +73,12 @@ class SoftMinVirialOp : public OpKernel {
     OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of du should match"));
     OP_REQUIRES(context,
-                (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)),
+                (static_cast<int64_t>(nloc) * nnei * 3 ==
+                 sw_deriv_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of sw_deriv should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),
+    OP_REQUIRES(context,
+                (static_cast<int64_t>(nloc) * nnei * 3 ==
+                 rij_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("dim of rij should be nnei * 3"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
diff --git a/source/op/soft_min_virial_grad.cc b/source/op/soft_min_virial_grad.cc
index bc9cb96a63..ac129b29af 100644
--- a/source/op/soft_min_virial_grad.cc
+++ b/source/op/soft_min_virial_grad.cc
@@ -84,10 +84,14 @@ class SoftMinVirialGradOp : public OpKernel {
         errors::InvalidArgument("input grad shape should be 3 x natoms"));
     OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
                 errors::InvalidArgument("number of du should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),
-                errors::InvalidArgument("number of descriptors should match"));
-    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
-                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == sw_deriv_shape.dim_size(1)),
+        errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(
+        context,
+        (static_cast<int64_t>(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+        errors::InvalidArgument("dim of rij should be  nnei * 3"));
     OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
                 errors::InvalidArgument("number of neighbors should match"));
 

From e5f9117f63cd1b17aaec48241224af3904782369 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 21 Jan 2024 23:05:51 -0500
Subject: [PATCH 96/97] set dpgui entry point to `deepmd_utils` (#3161)

This saves the time to load TensorFlow.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7b733adc81..e91fd320f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ keywords = ["deepmd"]
 deepmd = "deepmd.lmp:get_op_dir"
 
 [project.entry-points."dpgui"]
-"DeePMD-kit" = "deepmd.utils.argcheck:gen_args"
+"DeePMD-kit" = "deepmd_utils.utils.argcheck:gen_args"
 
 [project.urls]
 Homepage = "https://github.com/deepmodeling/deepmd-kit"

From 138ffef5dcac276d0f9e2ec1682f654375bcebe0 Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Tue, 23 Jan 2024 11:01:14 +0800
Subject: [PATCH 97/97] Fix max nbor size related issues (#3157)

---
 source/op/prod_env_mat_multi_device.cc | 29 ++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index 5ee43d2af3..7037a00a6c 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -577,6 +577,15 @@ class ProdEnvMatAOp : public OpKernel {
             mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
             rcut_r, max_cpy_trial, max_nnei_trial);
 
+        // max_nbor_size may be changed after _prepare_coord_nlist_gpu
+        // So we need to update the uint64_temp tensor if necessary
+        if (uint64_temp.NumElements() < int_64(nloc) * max_nbor_size * 2) {
+          TensorShape uint64_shape;
+          uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+          OP_REQUIRES_OK(context, context->allocate_temp(
+                                      DT_UINT64, uint64_shape, &uint64_temp));
+          array_longlong = uint64_temp.flat<unsigned long long>().data();
+        }
         // launch the gpu(nv) compute function
         deepmd::prod_env_mat_a_gpu(em, em_deriv, rij, nlist, coord, type,
                                    gpu_inlist, array_int, array_longlong,
@@ -875,6 +884,16 @@ class ProdEnvMatROp : public OpKernel {
             mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
             rcut, max_cpy_trial, max_nnei_trial);
 
+        // max_nbor_size may be changed after _prepare_coord_nlist_gpu
+        // So we need to update the uint64_temp tensor if necessary
+        if (uint64_temp.NumElements() < int_64(nloc) * max_nbor_size * 2) {
+          TensorShape uint64_shape;
+          uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+          OP_REQUIRES_OK(context, context->allocate_temp(
+                                      DT_UINT64, uint64_shape, &uint64_temp));
+          array_longlong = uint64_temp.flat<unsigned long long>().data();
+        }
+
         // launch the gpu(nv) compute function
         deepmd::prod_env_mat_r_gpu(em, em_deriv, rij, nlist, coord, type,
                                    gpu_inlist, array_int, array_longlong,
@@ -1221,6 +1240,16 @@ class ProdEnvMatAMixOp : public OpKernel {
             mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
             rcut_r, max_cpy_trial, max_nnei_trial);
 
+        // max_nbor_size may be changed after _prepare_coord_nlist_gpu
+        // So we need to update the uint64_temp tensor if necessary
+        if (uint64_temp.NumElements() < int_64(nloc) * max_nbor_size * 2) {
+          TensorShape uint64_shape;
+          uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+          OP_REQUIRES_OK(context, context->allocate_temp(
+                                      DT_UINT64, uint64_shape, &uint64_temp));
+          array_longlong = uint64_temp.flat<unsigned long long>().data();
+        }
+
         // launch the gpu(nv) compute function
         deepmd::prod_env_mat_a_gpu(em, em_deriv, rij, nlist, coord, type,
                                    gpu_inlist, array_int, array_longlong,