docs: improve docs, examples, and error messages

1. Refactor type embedding docs; 2. Refactor model compression docs; 3. Refactor DPA-1 docs; 4. Add error messages when type embedding is set in other backends; 5. Bump `sel` in the DPA-2 example. Signed-off-by: Jinzhe Zeng <[email protected]>
njzjz · Nov 11, 2024 · 46c22c5 · 46c22c5
1 parent 3a95d22
commit 46c22c5
Show file tree

Hide file tree

Showing 19 changed files with 119 additions and 35 deletions.
diff --git a/deepmd/dpmodel/model/model.py b/deepmd/dpmodel/model/model.py
@@ -36,6 +36,10 @@ def get_standard_model(data: dict) -> EnergyModel:
     data : dict
         The data to construct the model.
     """
+    if "type_embedding" in data:
+        raise ValueError(
+            "In the DP backend, type_embedding is not at the model level, but within the descriptor. See type embedding documentation for details."
+        )
     data["descriptor"]["type_map"] = data["type_map"]
     data["descriptor"]["ntypes"] = len(data["type_map"])
     fitting_type = data["fitting_net"].pop("type")

diff --git a/deepmd/jax/model/model.py b/deepmd/jax/model/model.py
@@ -35,6 +35,10 @@ def get_standard_model(data: dict):
         The data to construct the model.
     """
     data = deepcopy(data)
+    if "type_embedding" in data:
+        raise ValueError(
+            "In the JAX backend, type_embedding is not at the model level, but within the descriptor. See type embedding documentation for details."
+        )
     descriptor_type = data["descriptor"].pop("type")
     data["descriptor"]["type_map"] = data["type_map"]
     data["descriptor"]["ntypes"] = len(data["type_map"])

diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
@@ -73,6 +73,10 @@
 
 
 def _get_standard_model_components(model_params, ntypes):
+    if "type_embedding" in model_params:
+        raise ValueError(
+            "In the PyTorch backend, type_embedding is not at the model level, but within the descriptor. See type embedding documentation for details."
+        )
     # descriptor
     model_params["descriptor"]["ntypes"] = ntypes
     model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])

diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
@@ -841,6 +841,10 @@ def serialize(self, suffix: str = "") -> dict:
             Name suffix to identify this descriptor
         """
         if self.typeebd is not None:
+            if not self.descrpt.explicit_ntypes:
+                raise RuntimeError(
+                    "type embedding for descriptors without mixed types is not supported in other backends"
+                )
             self.descrpt.type_embedding = self.typeebd
             self.fitting.tebd_dim = self.typeebd.neuron[-1]
         if self.spin is not None:

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -1772,7 +1772,7 @@ def model_args(exclude_hybrid=False):
     doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
     doc_data_stat_protect = "Protect parameter for atomic energy regression."
     doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
-    doc_type_embedding = "The type embedding."
+    doc_type_embedding = "The type embedding. In other backends, the type embedding is already included in the descriptor."
     doc_modifier = "The modifier of model output."
     doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
     doc_smin_alpha = "The short-range tabulated interaction will be switched according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."

diff --git a/doc/data/system.md b/doc/data/system.md
@@ -1,6 +1,6 @@
 # System
 
-DeePMD-kit takes a **system** as the data structure. A snapshot of a system is called a **frame**. A system may contain multiple frames with the same atom types and numbers, i.e. the same formula (like `H2O`). To contains data with different formulas, one usually needs to divide data into multiple systems, which may sometimes result in sparse-frame systems. See a [new system format](../model/train-se-atten.md#data-format) to further combine different systems with the same atom numbers, when training with descriptor `se_atten`.
+DeePMD-kit takes a **system** as the data structure. A snapshot of a system is called a **frame**. A system may contain multiple frames with the same atom types and numbers, i.e. the same formula (like `H2O`). To contains data with different formulas, one usually needs to divide data into multiple systems, which may sometimes result in sparse-frame systems.
 
 A system should contain system properties, input frame properties, and labeled frame properties. The system property contains the following property:
 

diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md
@@ -112,9 +112,8 @@ The model compression interface requires the version of DeePMD-kit used in the o
 
 **Acceptable descriptor type**
 
-Descriptors with `se_e2_a`, `se_e3`, `se_e2_r` and `se_atten_v2` types are supported by the model compression feature. `Hybrid` mixed with the above descriptors is also supported.
-
-Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
+Not any descriptor supports model compression.
+See the documentation of a specific descriptor to see whether it supports model compression.
 
 **Available activation functions for descriptor:**
 

diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
@@ -21,3 +21,12 @@ otherwise the communication between GPU cards falls back to the slower CPU imple
 ## Data format
 
 DPA-2 supports both the [standard data format](../data/system.md) and the [mixed type data format](../data/system.md#mixed-type).
+
+## Tyoe embedding
+
+Type embedding is within this descriptor with the {ref}`tebd_dim <model[standard]/descriptor[dpa2]/tebd_dim>` argument.
+
+## Model compression
+
+Model compression is supported, but only the `repinit` part is compressed.
+The effect is limited.
diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md
@@ -48,3 +48,13 @@ A complete training input script of this example can be found in the directory
 ```bash
 $deepmd_source_dir/examples/water/hybrid/input.json
 ```
+
+## Tyoe embedding
+
+Type embedding is different between the TensorFlow backend and other backends.
+In the TensorFlow backend, all descriptors share the same descriptor that defined in the model level.
+In other backends, each descriptor has its own type embedding and their parameters may be different.
+
+## Model compression
+
+Model compression is supported if all sub-descriptors support model compression.
diff --git a/doc/model/train-se-a-mask.md b/doc/model/train-se-a-mask.md
@@ -84,3 +84,11 @@ And the `loss` section in the training input script should be set as follows.
     "_comment": " that's all"
   }
 ```
+
+## Tyoe embedding
+
+Same as [`se_e2_a`](./train-se-e2-a.md).
+
+## Model compression
+
+Same as [`se_e2_a`](./train-se-e2-a.md).
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
@@ -4,8 +4,6 @@
 **Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
-## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation
-
 ![ALT](../images/model_se_atten.png "model_se_atten")
 
 Here we propose DPA-1, a Deep Potential model with a novel attention mechanism, which is highly effective for representing the conformation and chemical spaces of atomic systems and learning the PES.
@@ -68,11 +66,9 @@ Then layer normalization is added in a residual way to finally obtain the self-a
 
 [^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
-## Introduction to new features of DPA-1
-
-Next, we will list the detailed settings in input.json and the data format, especially for large systems with dozens of elements. An example of DPA-1 input can be found [here](../../examples/water/se_atten/input.json).
+## Descriptor `"se_atten"`
 
-### Descriptor `"se_atten"`
+Next, we will list the detailed settings in input.json and the data format, especially for large systems with dozens of elements. An example of DPA-1 input can be found in `examples/water/se_atten/input.json`.
 
 The notation of `se_atten` is short for the smooth edition of Deep Potential with an attention mechanism.
 This descriptor was described in detail in [the DPA-1 paper](https://arxiv.org/abs/2208.08236) and the images above.
@@ -126,7 +122,7 @@ We highly recommend using the version 2.0 of the attention-based descriptor `"se
       "set_davg_zero": false
 ```
 
-You can use descriptor `"se_atten_v2"` and do not need to set `tebd_input_mode` and `smooth_type_embedding`. In `"se_atten_v2"`, `tebd_input_mode` is forced to be `"strip"` and `smooth_type_embedding` is forced to be `"true"`. When `tebd_input_mode` is `"strip"`, the embedding matrix $\mathcal{G}^i$ is constructed as:
+You can use descriptor `"se_atten_v2"` and is not allowed to set `tebd_input_mode` and `smooth_type_embedding`. In `"se_atten_v2"`, `tebd_input_mode` is forced to be `"strip"` and `smooth_type_embedding` is forced to be `"true"`. When `tebd_input_mode` is `"strip"`, the embedding matrix $\mathcal{G}^i$ is constructed as:
 
 ```math
    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})) + \mathcal{N}_{e,2}(s(r_{ij})) \odot ({N}_{e,2}(\{\mathcal{A}^i, \mathcal{A}^j\}) \odot s(r_{ij})) \quad \mathrm{or}
@@ -140,25 +136,28 @@ Practical evidence demonstrates that `"se_atten_v2"` offers better and more stab
 
 Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
 
-### Fitting `"ener"`
+## Type embedding
 
-DPA-1 only supports `"ener"` fitting type, and you can refer [here](train-energy.md) for detailed information.
+DPA-1 only supports models with type embeddings.
 
-### Type embedding
-
-DPA-1 only supports models with type embeddings. And the default setting is as follows:
+In the TensorFlow backend, the {ref}`type_embedding <model/type_embedding>` section will be used. If it is not set, the following default parameters will be used:
 
 ```json
 "type_embedding":{
-            "neuron":           [8],
-            "resnet_dt":        false,
-            "seed":             1
-        }
+    "neuron":           [8],
+    "resnet_dt":        false,
+    "seed":             1
+}
 ```
 
-You can add these settings in input.json if you want to change the default ones, see [here](train-se-e2-a-tebd.md) for detailed information.
+In other backends, type embedding is within this descriptor with the {ref}`tebd_dim <model[standard]/descriptor[se_atten_v2]/tebd_dim>` argument.
+
+## Difference between TensorFlow and other backends
 
-### Type map
+TensorFlow and other backends have different implementations for {ref}`smooth_type_embedding <model[standard]/descriptor[se_atten_v2]/smooth_type_embedding>`.
+The results are inconsistent when `smooth_type_embedding` is `true`.
+
+## Type map
 
 For training large systems, especially those with dozens of elements, the {ref}`type <model/type_map>` determines the element index of training data:
 
@@ -176,6 +175,10 @@ which should include all the elements in the dataset you want to train on.
 
 DPA-1 supports both the [standard data format](../data/system.md) and the [mixed type data format](../data/system.md#mixed-type).
 
+## Model compression
+
+Model compression is supported only when there is no attention layer (`attn_layer` is 0) and `tebd_input_mode` is `strip`.
+
 ## Training example
 
 Here we upload the AlMgCu example shown in the paper, you can download it here:

diff --git a/doc/model/train-se-e2-a-tebd.md b/doc/model/train-se-e2-a-tebd.md
@@ -1,7 +1,7 @@
-# Type embedding approach {{ tensorflow_icon }}
+# Type embedding approach {{ tensorflow_icon }} {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
 
 :::{note}
-**Supported backends**: TensorFlow {{ tensorflow_icon }}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
 :::
 
 We generate specific a type embedding vector for each atom type so that we can share one descriptor embedding net and one fitting net in total, which decline training complexity largely.
@@ -63,8 +63,9 @@ In this way, all chemical species share the same network parameters through the
 
 [^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
-## Instructions
+## Instructions for TensorFlow backend {{ tensorflow_icon }}
 
+In the TensorFlow backend, the type embedding is at the model level.
 The {ref}`model <model>` defines how the model is constructed, adding a section of type embedding net:
 
 ```json
@@ -106,6 +107,10 @@ $deepmd_source_dir/examples/water/se_e2_a_tebd/input.json
 
 See [here](../development/type-embedding.md) for further explanation of `type embedding`.
 
-:::{note}
-You can't apply the compression method while using the atom type embedding.
-:::
+See documentation for each descriptor for details.
+
+## Instructions for other backends
+
+In other backends, the type embedding is within the descriptor itself.
+
+See documentation for each descriptor for details.
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
@@ -94,3 +94,13 @@ The construction of the descriptor is given by section {ref}`descriptor <model[s
 - The {ref}`axis_neuron <model[standard]/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
 - If the option {ref}`resnet_dt <model[standard]/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
 - {ref}`seed <model[standard]/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+
+## Type embedding support
+
+Type embdding is only supported in the TensorFlow backends.
+`se_e2_a` with type embedding and [`se_atten`](./train-se-atten.md) (or its updated version) without any attention layer are mathematically equivalent, so `se_atten` can be a substitute in other backends.
+
+## Model compression
+
+Model compression is supported when type embedding is not used.
+To use model compression with type embedding in the TensorFlow backend, use `se_a_tebd_v2` instead.
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
@@ -69,3 +69,11 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 ```
 
 The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
+
+## Type embedding support
+
+Type embdding is only supported in the TensorFlow backends.
+
+## Model compression
+
+Model compression is supported when type embedding is not used.
diff --git a/doc/model/train-se-e3-tebd.md b/doc/model/train-se-e3-tebd.md
@@ -76,3 +76,11 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 ```
 
 The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
+
+## Tyoe embedding
+
+Type embedding is within this descriptor with the {ref}`tebd_dim <model[standard]/descriptor[se_e3_tebd]/tebd_dim>` argument.
+
+## Model compression
+
+Model compression is not supported.
diff --git a/doc/model/train-se-e3.md b/doc/model/train-se-e3.md
@@ -64,3 +64,11 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 ```
 
 The type of the descriptor is set by the key {ref}`type <model[standard]/descriptor/type>`.
+
+## Tyoe embedding
+
+Use [`se_e3_tebd`](./train-se-e3-tebd.md) for type embedding support.
+
+## Model compression
+
+Model compression is supported.
diff --git a/examples/water/dpa2/input_torch_large.json b/examples/water/dpa2/input_torch_large.json
@@ -19,15 +19,15 @@
         ],
         "axis_neuron": 12,
         "activation_function": "tanh",
-        "three_body_sel": 40,
+        "three_body_sel": 47,
         "three_body_rcut": 4.0,
         "three_body_rcut_smth": 3.5,
         "use_three_body": true
       },
       "repformer": {
         "rcut": 4.0,
         "rcut_smth": 3.5,
-        "nsel": 40,
+        "nsel": 47,
         "nlayers": 12,
         "g1_dim": 128,
         "g2_dim": 32,

diff --git a/examples/water/dpa2/input_torch_medium.json b/examples/water/dpa2/input_torch_medium.json
@@ -19,15 +19,15 @@
         ],
         "axis_neuron": 12,
         "activation_function": "tanh",
-        "three_body_sel": 40,
+        "three_body_sel": 47,
         "three_body_rcut": 4.0,
         "three_body_rcut_smth": 3.5,
         "use_three_body": true
       },
       "repformer": {
         "rcut": 4.0,
         "rcut_smth": 3.5,
-        "nsel": 40,
+        "nsel": 47,
         "nlayers": 6,
         "g1_dim": 128,
         "g2_dim": 32,

diff --git a/examples/water/dpa2/input_torch_small.json b/examples/water/dpa2/input_torch_small.json
@@ -19,15 +19,15 @@
         ],
         "axis_neuron": 12,
         "activation_function": "tanh",
-        "three_body_sel": 40,
+        "three_body_sel": 47,
         "three_body_rcut": 4.0,
         "three_body_rcut_smth": 3.5,
         "use_three_body": true
       },
       "repformer": {
         "rcut": 4.0,
         "rcut_smth": 3.5,
-        "nsel": 40,
+        "nsel": 47,
         "nlayers": 3,
         "g1_dim": 128,
         "g2_dim": 32,