From 8a657d53103d04a64f97c3650b252b7fb3b4b251 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:33:33 +0800
Subject: [PATCH 1/2] fix: correct `exclude_types` in descriptors (#3841)

1. make `exclude_types` consistent with mask in nlist for all
descriptors. (bugs fixed in dpa1 and dpa2)
2. add universal tests for descriptor. (now only test_exclude_types)
3. `TestCaseSingleFrameWithNlist` in
source/tests/pt/model/test_env_mat.py will be removed in a seperate PR.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added new test case classes for simulation testing and common test
cases on atomic models.

- **Bug Fixes**
- Adjusted exclusion mask calculations and applications affecting
neighbor lists and switch behavior in various descriptors.

- **Chores**
- Updated GitHub Actions workflow to install CMake directly via pip,
removing dependency on an external action.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .github/workflows/test_cuda.yml               |   4 +-
 deepmd/dpmodel/descriptor/dpa1.py             |   5 +-
 deepmd/dpmodel/descriptor/repformers.py       |   2 +-
 deepmd/dpmodel/descriptor/se_e2_a.py          |   5 +
 deepmd/dpmodel/descriptor/se_r.py             |   5 +
 deepmd/dpmodel/descriptor/se_t.py             |   5 +
 deepmd/pt/model/descriptor/repformers.py      |   2 +-
 deepmd/pt/model/descriptor/se_atten.py        |   9 +-
 deepmd/tf/descriptor/se_atten.py              |  19 ++-
 source/tests/universal/common/cases/cases.py  | 133 ++++++++++++++++++
 .../common/cases/descriptor/__init__.py       |   1 +
 .../common/cases/descriptor/descriptor.py     |  11 ++
 .../common/cases/descriptor/utils.py          |  95 +++++++++++++
 .../universal/dpmodel/descriptor/__init__.py  |   1 +
 .../dpmodel/descriptor/test_descriptor.py     |  84 +++++++++++
 .../tests/universal/pt/descriptor/__init__.py |   1 +
 .../pt/descriptor/test_descriptor.py          |  84 +++++++++++
 17 files changed, 452 insertions(+), 14 deletions(-)
 create mode 100644 source/tests/universal/common/cases/cases.py
 create mode 100644 source/tests/universal/common/cases/descriptor/__init__.py
 create mode 100644 source/tests/universal/common/cases/descriptor/descriptor.py
 create mode 100644 source/tests/universal/common/cases/descriptor/utils.py
 create mode 100644 source/tests/universal/dpmodel/descriptor/__init__.py
 create mode 100644 source/tests/universal/dpmodel/descriptor/test_descriptor.py
 create mode 100644 source/tests/universal/pt/descriptor/__init__.py
 create mode 100644 source/tests/universal/pt/descriptor/test_descriptor.py

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index d7ff72bafa..d97b1f9431 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -34,12 +34,12 @@ jobs:
       uses: mpi4py/setup-mpi@v1
       with:
         mpi: mpich
+    - name: Install wget and unzip
+      run: apt-get update && apt-get install -y wget unzip
     - uses: lukka/get-cmake@latest
       with:
         useLocalCache: true
         useCloudCache: false
-    - name: Install wget and unzip
-      run: apt-get update && apt-get install -y wget unzip
     - run: |
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index 8abc8c2777..9f10aa15e5 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -804,7 +804,10 @@ def call(
         nf, nloc, nnei, _ = dmatrix.shape
         exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
         # nfnl x nnei
+        exclude_mask = exclude_mask.reshape(nf * nloc, nnei)
+        # nfnl x nnei
         nlist = nlist.reshape(nf * nloc, nnei)
+        nlist = np.where(exclude_mask, nlist, -1)
         # nfnl x nnei x 4
         dmatrix = dmatrix.reshape(nf * nloc, nnei, 4)
         # nfnl x nnei x 1
@@ -824,8 +827,6 @@ def call(
             nf * nloc, nnei, self.tebd_dim
         )
         ng = self.neuron[-1]
-        # nfnl x nnei
-        exclude_mask = exclude_mask.reshape(nf * nloc, nnei)
         # nfnl x nnei x 4
         rr = dmatrix.reshape(nf * nloc, nnei, 4)
         rr = rr * exclude_mask[:, :, None]
diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py
index c9ac48efec..3f7f57478b 100644
--- a/deepmd/dpmodel/descriptor/repformers.py
+++ b/deepmd/dpmodel/descriptor/repformers.py
@@ -338,7 +338,7 @@ def call(
         mapping: Optional[np.ndarray] = None,
     ):
         exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
-        nlist = nlist * exclude_mask
+        nlist = np.where(exclude_mask, nlist, -1)
         # nf x nloc x nnei x 4
         dmatrix, diff, sw = self.env_mat.call(
             coord_ext, atype_ext, nlist, self.mean, self.stddev
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index 7a8899b4e5..d26cba2aff 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -117,6 +117,9 @@ class DescrptSeA(NativeOP, BaseDescriptor):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     spin
             The deepspin object.
+    ntypes : int
+            Number of element types.
+            Not used in this descriptor, only to be compat with input.
 
     Limitations
     -----------
@@ -150,9 +153,11 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
+        ntypes: Optional[int] = None,  # to be compat with input
         # consistent with argcheck, not used though
         seed: Optional[int] = None,
     ) -> None:
+        del ntypes
         ## seed, uniform_seed, not included.
         if spin is not None:
             raise NotImplementedError("spin is not implemented")
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index dbb6e104fb..9197859294 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -75,6 +75,9 @@ class DescrptSeR(NativeOP, BaseDescriptor):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     spin
             The deepspin object.
+    ntypes : int
+            Number of element types.
+            Not used in this descriptor, only to be compat with input.
 
     Limitations
     -----------
@@ -107,9 +110,11 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
+        ntypes: Optional[int] = None,  # to be compat with input
         # consistent with argcheck, not used though
         seed: Optional[int] = None,
     ) -> None:
+        del ntypes
         ## seed, uniform_seed, not included.
         if not type_one_side:
             raise NotImplementedError("type_one_side == False not implemented")
diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py
index cdbeb701ce..2ed951f440 100644
--- a/deepmd/dpmodel/descriptor/se_t.py
+++ b/deepmd/dpmodel/descriptor/se_t.py
@@ -78,6 +78,9 @@ class DescrptSeT(NativeOP, BaseDescriptor):
             If the weights of embedding net are trainable.
     seed : int, Optional
             Random seed for initializing the network parameters.
+    ntypes : int
+            Number of element types.
+            Not used in this descriptor, only to be compat with input.
     """
 
     def __init__(
@@ -94,7 +97,9 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
         trainable: bool = True,
         seed: Optional[int] = None,
+        ntypes: Optional[int] = None,  # to be compat with input
     ) -> None:
+        del ntypes
         self.rcut = rcut
         self.rcut_smth = rcut_smth
         self.sel = sel
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index a66693653e..2e091d6bb8 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -410,7 +410,7 @@ def forward(
         atype = extended_atype[:, :nloc]
         # nb x nloc x nnei
         exclude_mask = self.emask(nlist, extended_atype)
-        nlist = nlist * exclude_mask
+        nlist = torch.where(exclude_mask != 0, nlist, -1)
         # nb x nloc x nnei x 4, nb x nloc x nnei x 3, nb x nloc x nnei x 1
         dmatrix, diff, sw = prod_env_mat(
             extended_coord,
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index a59eaca409..181277b637 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -478,11 +478,12 @@ def forward(
             self.rcut_smth,
             protection=self.env_protection,
         )
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = torch.where(exclude_mask != 0, nlist, -1)
         nlist_mask = nlist != -1
         nlist = torch.where(nlist == -1, 0, nlist)
         sw = torch.squeeze(sw, -1)
-        # beyond the cutoff sw should be 0.0
-        sw = sw.masked_fill(~nlist_mask, 0.0)
         # nf x nloc x nt -> nf x nloc x nnei x nt
         atype_tebd = extended_atype_embd[:, :nloc, :]
         atype_tebd_nnei = atype_tebd.unsqueeze(2).expand(-1, -1, self.nnei, -1)
@@ -495,8 +496,10 @@ def forward(
         atype_tebd_nlist = torch.gather(atype_tebd_ext, dim=1, index=index)
         # nb x nloc x nnei x nt
         atype_tebd_nlist = atype_tebd_nlist.view(nb, nloc, nnei, nt)
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
         # (nb x nloc) x nnei
-        exclude_mask = self.emask(nlist, extended_atype).view(nb * nloc, nnei)
+        exclude_mask = exclude_mask.view(nb * nloc, nnei)
         if self.old_impl:
             assert self.filter_layers_old is not None
             dmatrix = dmatrix.view(
diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index 6d3cfeaa6e..b240f00647 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -718,6 +718,12 @@ def _pass_filter(
                 tf.shape(inputs_i)[0],
                 self.nei_type_vec,  # extra input for atten
             )
+            #  (nframes * nloc * nnei, 1)
+            nei_exclude_mask = tf.slice(
+                tf.reshape(tf.cast(mask, self.filter_precision), [-1, 4]),
+                [0, 0],
+                [-1, 1],
+            )
             if self.smooth:
                 inputs_i = tf.where(
                     tf.cast(mask, tf.bool),
@@ -727,15 +733,18 @@ def _pass_filter(
                         tf.reshape(self.avg_looked_up, [-1, 1]), [1, self.ndescrpt]
                     ),
                 )
+                #  (nframes, nloc, nnei)
                 self.recovered_switch *= tf.reshape(
-                    tf.slice(
-                        tf.reshape(tf.cast(mask, self.filter_precision), [-1, 4]),
-                        [0, 0],
-                        [-1, 1],
-                    ),
+                    nei_exclude_mask,
                     [-1, natoms[0], self.sel_all_a[0]],
                 )
             else:
+                #  (nframes * nloc, 1,  nnei)
+                self.nmask *= tf.reshape(
+                    nei_exclude_mask,
+                    [-1, 1, self.sel_all_a[0]],
+                )
+                self.negative_mask = -(2 << 32) * (1.0 - self.nmask)
                 inputs_i *= mask
         if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor:
             inputs_i = descrpt2r4(inputs_i, atype)
diff --git a/source/tests/universal/common/cases/cases.py b/source/tests/universal/common/cases/cases.py
new file mode 100644
index 0000000000..a8c9a7cd71
--- /dev/null
+++ b/source/tests/universal/common/cases/cases.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+# originally copied from source/tests/pt/model/test_env_mat.py
+class TestCaseSingleFrameWithNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        self.mapping = np.array([0, 1, 2, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.sel_mix = [7]
+        self.natoms = [3, 3, 2, 1]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([2, 0, 1, 3], dtype=np.int32)
+        inv_perm = np.array([1, 2, 0, 3], dtype=np.int32)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        self.mapping = np.concatenate(
+            [self.mapping, self.mapping[:, self.perm]], axis=0
+        )
+
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.sel_mix = [7]
+        self.natoms = [3, 3, 2, 1]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithoutNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nf, self.nt = 1, 2
+        self.coord = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nloc * 3])
+        self.atype = np.array([0, 0, 1], dtype=int).reshape([1, self.nloc])
+        self.cell = 2.0 * np.eye(3).reshape([1, 9])
+        # sel = [5, 2]
+        self.sel = [16, 8]
+        self.sel_mix = [24]
+        self.natoms = [3, 3, 2, 1]
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        self.atol = 1e-12
diff --git a/source/tests/universal/common/cases/descriptor/__init__.py b/source/tests/universal/common/cases/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/universal/common/cases/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/universal/common/cases/descriptor/descriptor.py b/source/tests/universal/common/cases/descriptor/descriptor.py
new file mode 100644
index 0000000000..2f3bdbb1ee
--- /dev/null
+++ b/source/tests/universal/common/cases/descriptor/descriptor.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+from .utils import (
+    DescriptorTestCase,
+)
+
+
+class DescriptorTest(DescriptorTestCase):
+    def setUp(self) -> None:
+        DescriptorTestCase.setUp(self)
diff --git a/source/tests/universal/common/cases/descriptor/utils.py b/source/tests/universal/common/cases/descriptor/utils.py
new file mode 100644
index 0000000000..aa1a8c21d4
--- /dev/null
+++ b/source/tests/universal/common/cases/descriptor/utils.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.utils import (
+    PairExcludeMask,
+)
+
+from ..cases import (
+    TestCaseSingleFrameWithNlist,
+)
+
+
+class DescriptorTestCase(TestCaseSingleFrameWithNlist):
+    """Common test case for descriptor."""
+
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.input_dict = {
+            "ntypes": self.nt,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+        }
+
+    def test_forward_consistency(self):
+        ret = []
+        for module in self.modules_to_test:
+            module = self.forward_wrapper(module)
+            ret.append(
+                module(
+                    self.coord_ext,
+                    self.atype_ext,
+                    self.nlist,
+                    mapping=self.mapping,
+                )
+            )
+        for kk, vv in enumerate(ret[0]):
+            subret = []
+            for rr in ret:
+                if rr is not None:
+                    subret.append(rr[kk])
+            if len(subret):
+                for ii, rr in enumerate(subret[1:]):
+                    if subret[0] is None:
+                        assert rr is None
+                    else:
+                        np.testing.assert_allclose(
+                            subret[0],
+                            rr,
+                            err_msg=f"compare {kk} output between 0 and {ii}",
+                        )
+
+    def test_exclude_types(
+        self,
+    ):
+        coord_ext_device = self.coord_ext
+        atype_ext_device = self.atype_ext
+        nlist_device = self.nlist
+        mapping_device = self.mapping
+        dd = self.forward_wrapper(self.module)
+        # only equal when set_davg_zero is True
+        serialize_dict = self.module.serialize()
+
+        for em in [[[0, 1]], [[1, 1]]]:
+            ex_pair = PairExcludeMask(self.nt, em)
+            pair_mask = ex_pair.build_type_exclude_mask(nlist_device, atype_ext_device)
+            # exclude neighbors in the nlist
+            nlist_exclude = np.where(pair_mask == 1, nlist_device, -1)
+            rd_ex, _, _, _, sw_ex = dd(
+                coord_ext_device,
+                atype_ext_device,
+                nlist_exclude,
+                mapping=mapping_device,
+            )
+
+            # normal nlist but use exclude_types params
+            serialize_dict_em = deepcopy(serialize_dict)
+            if "list" not in serialize_dict_em:
+                serialize_dict_em.update({"exclude_types": em})
+            else:
+                # for hybrid
+                for sd in serialize_dict_em["list"]:
+                    sd.update({"exclude_types": em})
+            dd0 = self.forward_wrapper(self.module.deserialize(serialize_dict_em))
+            rd0, _, _, _, sw0 = dd0(
+                coord_ext_device,
+                atype_ext_device,
+                nlist_device,
+                mapping=mapping_device,
+            )
+            np.testing.assert_allclose(rd0, rd_ex)
diff --git a/source/tests/universal/dpmodel/descriptor/__init__.py b/source/tests/universal/dpmodel/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/universal/dpmodel/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/universal/dpmodel/descriptor/test_descriptor.py b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
new file mode 100644
index 0000000000..9d0253c54c
--- /dev/null
+++ b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+from deepmd.dpmodel.descriptor import (
+    DescrptDPA1,
+    DescrptDPA2,
+    DescrptHybrid,
+    DescrptSeA,
+    DescrptSeR,
+    DescrptSeT,
+)
+
+from ...common.cases.descriptor.descriptor import (
+    DescriptorTest,
+)
+from ..backend import (
+    DPTestCase,
+)
+
+
+class TestDescriptorSeADP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeA(**self.input_dict)
+
+
+class TestDescriptorSeRDP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeR(**self.input_dict)
+
+
+class TestDescriptorSeTDP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeT(**self.input_dict)
+
+
+class TestDescriptorDPA1DP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptDPA1(**self.input_dict)
+
+
+class TestDescriptorDPA2DP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.input_dict = {
+            "ntypes": self.nt,
+            "repinit": {
+                "rcut": self.rcut,
+                "rcut_smth": self.rcut_smth,
+                "nsel": self.sel_mix,
+            },
+            "repformer": {
+                "rcut": self.rcut / 2,
+                "rcut_smth": self.rcut_smth,
+                "nsel": self.sel_mix[0] // 2,
+            },
+        }
+        self.module = DescrptDPA2(**self.input_dict)
+
+
+class TestDescriptorHybridDP(unittest.TestCase, DescriptorTest, DPTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        ddsub0 = {
+            "type": "se_e2_a",
+            "ntypes": self.nt,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+        }
+        ddsub1 = {
+            "type": "dpa1",
+            "ntypes": self.nt,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel_mix,
+        }
+        self.input_dict = {
+            "list": [ddsub0, ddsub1],
+        }
+        self.module = DescrptHybrid(**self.input_dict)
diff --git a/source/tests/universal/pt/descriptor/__init__.py b/source/tests/universal/pt/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/universal/pt/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/universal/pt/descriptor/test_descriptor.py b/source/tests/universal/pt/descriptor/test_descriptor.py
new file mode 100644
index 0000000000..87107a2f90
--- /dev/null
+++ b/source/tests/universal/pt/descriptor/test_descriptor.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+from deepmd.pt.model.descriptor import (
+    DescrptDPA1,
+    DescrptDPA2,
+    DescrptHybrid,
+    DescrptSeA,
+    DescrptSeR,
+    DescrptSeT,
+)
+
+from ...common.cases.descriptor.descriptor import (
+    DescriptorTest,
+)
+from ..backend import (
+    PTTestCase,
+)
+
+
+class TestDescriptorSeAPT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeA(**self.input_dict)
+
+
+class TestDescriptorSeRPT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeR(**self.input_dict)
+
+
+class TestDescriptorSeTPT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptSeT(**self.input_dict)
+
+
+class TestDescriptorDPA1PT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.module = DescrptDPA1(**self.input_dict)
+
+
+class TestDescriptorDPA2PT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        self.input_dict = {
+            "ntypes": self.nt,
+            "repinit": {
+                "rcut": self.rcut,
+                "rcut_smth": self.rcut_smth,
+                "nsel": self.sel_mix,
+            },
+            "repformer": {
+                "rcut": self.rcut / 2,
+                "rcut_smth": self.rcut_smth,
+                "nsel": self.sel_mix[0] // 2,
+            },
+        }
+        self.module = DescrptDPA2(**self.input_dict)
+
+
+class TestDescriptorHybridPT(unittest.TestCase, DescriptorTest, PTTestCase):
+    def setUp(self):
+        DescriptorTest.setUp(self)
+        ddsub0 = {
+            "type": "se_e2_a",
+            "ntypes": self.nt,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+        }
+        ddsub1 = {
+            "type": "dpa1",
+            "ntypes": self.nt,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel_mix,
+        }
+        self.input_dict = {
+            "list": [ddsub0, ddsub1],
+        }
+        self.module = DescrptHybrid(**self.input_dict)

From eb474d485f786c95c805cd5dabbb213f1d872725 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 4 Jun 2024 12:40:46 -0400
Subject: [PATCH 2/2] feat: support generating JSON schema for integration with
 VSCode (#3849)

Doc:

![image](https://github.com/deepmodeling/deepmd-kit/assets/9496702/b313616d-4f98-4978-931b-b135208f42ac)

Validation of type:

![image](https://github.com/deepmodeling/deepmd-kit/assets/9496702/c54b912d-9440-4dd2-94be-5c5392f492c8)

Validation of required keys:

![image](https://github.com/deepmodeling/deepmd-kit/assets/9496702/bf6c6469-d6a1-4d89-9015-b845ddddc921)

Auto-completion:


![image](https://github.com/deepmodeling/deepmd-kit/assets/9496702/c1aea3ef-0c93-4a50-85e4-7070c40460dd)


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
  - Added support for generating JSON schema documentation.
  - Updated `--out-type` argument choices to include "json_schema".

- **Documentation**
- Added instructions for using Visual Studio Code to handle JSON files
and generate JSON schema.

- **Tests**
- Introduced unit tests for the `doc_train_input` function to ensure
proper handling of different output types.

- **Chores**
- Updated the version constraint for the `dargs` dependency to `>=
0.4.6`.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/entrypoints/doc.py                   |  3 ++
 deepmd/main.py                              |  2 +-
 deepmd/utils/argcheck.py                    | 18 ++++++++++
 doc/train/train-input.rst                   | 38 +++++++++++++++++++--
 pyproject.toml                              |  2 +-
 source/tests/common/test_doc_train_input.py | 33 ++++++++++++++++++
 6 files changed, 92 insertions(+), 4 deletions(-)
 create mode 100644 source/tests/common/test_doc_train_input.py

diff --git a/deepmd/entrypoints/doc.py b/deepmd/entrypoints/doc.py
index 2f0d57e645..65e38940f8 100644
--- a/deepmd/entrypoints/doc.py
+++ b/deepmd/entrypoints/doc.py
@@ -4,6 +4,7 @@
 from deepmd.utils.argcheck import (
     gen_doc,
     gen_json,
+    gen_json_schema,
 )
 
 __all__ = ["doc_train_input"]
@@ -15,6 +16,8 @@ def doc_train_input(*, out_type: str = "rst", **kwargs):
         doc_str = gen_doc(make_anchor=True)
     elif out_type == "json":
         doc_str = gen_json()
+    elif out_type == "json_schema":
+        doc_str = gen_json_schema()
     else:
         raise RuntimeError(f"Unsupported out type {out_type}")
     print(doc_str)  # noqa: T201
diff --git a/deepmd/main.py b/deepmd/main.py
index e8b93320c6..322933333c 100644
--- a/deepmd/main.py
+++ b/deepmd/main.py
@@ -500,7 +500,7 @@ def main_parser() -> argparse.ArgumentParser:
     parsers_doc.add_argument(
         "--out-type",
         default="rst",
-        choices=["rst", "json"],
+        choices=["rst", "json", "json_schema"],
         type=str,
         help="The output type",
     )
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 7b6e13be3a..fadec096eb 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -14,7 +14,13 @@
     Variant,
     dargs,
 )
+from dargs.json_schema import (
+    generate_json_schema,
+)
 
+from deepmd import (
+    __version__,
+)
 from deepmd.common import (
     VALID_ACTIVATION,
     VALID_PRECISION,
@@ -2450,6 +2456,18 @@ def gen_args(**kwargs) -> List[Argument]:
     ]
 
 
+def gen_json_schema() -> str:
+    """Generate JSON schema.
+
+    Returns
+    -------
+    str
+        JSON schema.
+    """
+    arg = Argument("DeePMD-kit", dict, gen_args(), doc=f"DeePMD-kit {__version__}")
+    return json.dumps(generate_json_schema(arg))
+
+
 def normalize(data):
     base = Argument("base", dict, gen_args())
     data = base.normalize_value(data, trim_pattern="_*")
diff --git a/doc/train/train-input.rst b/doc/train/train-input.rst
index 04e82451e4..8c5d811576 100644
--- a/doc/train/train-input.rst
+++ b/doc/train/train-input.rst
@@ -1,8 +1,42 @@
 Training Parameters
 ======================================
 .. note::
-   One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://deepmodeling.com/dpgui/input/deepmd-kit-2.0>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dp gui`. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for furthur training.
+   One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://deepmodeling.com/dpgui/input/deepmd-kit-2.0>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dp gui`. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for further training.
+
+.. note::
+   One can benefit from IntelliSense and validation when
+   :ref:`writing JSON files using Visual Studio Code <json_vscode>`.
+   See :ref:`here <json_vscode>` to learn how to configure.
 
 .. dargs::
-   :module: deepmd.tf.utils.argcheck
+   :module: deepmd.utils.argcheck
    :func: gen_args
+
+.. _json_vscode:
+
+Writing JSON files using Visual Studio Code
+-------------------------------------------
+
+When writing JSON files using `Visual Studio Code <https://code.visualstudio.com/>`_, one can benefit from IntelliSense and
+validation by adding a `JSON schema <https://json-schema.org/>`_.
+To do so, in a VS Code workspace, one can generate a JSON schema file for the input file by running the following command:
+
+.. code-block:: bash
+
+   dp doc-train-input --out-type json_schema > deepmd.json
+
+Then one can `map the schema <https://code.visualstudio.com/docs/languages/json#_mapping-to-a-schema-in-the-workspace>`_
+by updating the workspace settings in the `.vscode/settings.json` file as follows:
+
+.. code-block:: json
+
+   {
+      "json.schemas": [
+         {
+               "fileMatch": [
+                  "/**/*.json"
+               ],
+               "url": "./deepmd.json"
+         }
+      ]
+   }
diff --git a/pyproject.toml b/pyproject.toml
index 75aab35936..f22ff40fa2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
     'numpy',
     'scipy',
     'pyyaml',
-    'dargs >= 0.4.1',
+    'dargs >= 0.4.6',
     'typing_extensions; python_version < "3.8"',
     'importlib_metadata>=1.4; python_version < "3.8"',
     'h5py',
diff --git a/source/tests/common/test_doc_train_input.py b/source/tests/common/test_doc_train_input.py
new file mode 100644
index 0000000000..aabbad63a2
--- /dev/null
+++ b/source/tests/common/test_doc_train_input.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import io
+import json
+import unittest
+from contextlib import (
+    redirect_stdout,
+)
+
+from deepmd.entrypoints.doc import (
+    doc_train_input,
+)
+
+
+class TestDocTrainInput(unittest.TestCase):
+    def test_rst(self):
+        f = io.StringIO()
+        with redirect_stdout(f):
+            doc_train_input(out_type="rst")
+        self.assertNotEqual(f.getvalue(), "")
+
+    def test_json(self):
+        f = io.StringIO()
+        with redirect_stdout(f):
+            doc_train_input(out_type="json")
+        # validate json
+        json.loads(f.getvalue())
+
+    def test_json_schema(self):
+        f = io.StringIO()
+        with redirect_stdout(f):
+            doc_train_input(out_type="json_schema")
+        # validate json
+        json.loads(f.getvalue())