diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index fda8b19474..2dbf495d14 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -284,9 +284,9 @@ def call( gg = self.cal_g(tr, tt) gg = np.mean(gg, axis=2) # nf x nloc x ng x 1 - xyz_scatter += gg + xyz_scatter += gg * (self.sel[tt] / self.nnei) - res_rescale = 1.0 / 10.0 + res_rescale = 1.0 / 5.0 res = xyz_scatter * res_rescale res = res.reshape(nf, nloc, -1).astype(GLOBAL_NP_FLOAT_PRECISION) return res, None, None, None, ww diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index 643d1ad558..27e459d861 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -320,9 +320,9 @@ def forward( # nfnl x nt x ng gg = ll.forward(ss) gg = torch.mean(gg, dim=1).unsqueeze(1) - xyz_scatter += gg + xyz_scatter += gg * (self.sel[ii] / self.nnei) - res_rescale = 1.0 / 10.0 + res_rescale = 1.0 / 5.0 result = xyz_scatter * res_rescale result = result.view(-1, nloc, self.filter_neuron[-1]) return ( diff --git a/doc/backend.md b/doc/backend.md index 8c720ac1c1..0b49f1ca00 100644 --- a/doc/backend.md +++ b/doc/backend.md @@ -51,3 +51,7 @@ For example, when the model filename ends with `.pb` (the ProtoBuf file), DeePMD ## Convert model files between backends If a model is supported by two backends, one can use [`dp convert-backend`](./cli.rst) to convert the model file between these two backends. + +:::{warning} +Currently, only the `se_e2_a` model fully supports the backend conversion between TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }}. +::: diff --git a/doc/freeze/freeze.md b/doc/freeze/freeze.md index ba0cd44606..151c0b3b44 100644 --- a/doc/freeze/freeze.md +++ b/doc/freeze/freeze.md @@ -1,10 +1,28 @@ # Freeze a model The trained neural network is extracted from a checkpoint and dumped into a protobuf(.pb) file. This process is called "freezing" a model. The idea and part of our code are from [Morgan](https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc). To freeze a model, typically one does + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +```bash +$ dp freeze -o model.pb +``` +in the folder where the model is trained. The output model is called `model.pb`. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + ```bash -$ dp freeze -o graph.pb +$ dp --pt freeze -o model.pth ``` -in the folder where the model is trained. The output model is called `graph.pb`. +in the folder where the model is trained. The output model is called `model.pth`. + +::: + +:::: In [multi-task mode](../train/multi-task-training.md): - This process will in default output several models, each of which contains the common descriptor and diff --git a/doc/model/pairtab.md b/doc/model/pairtab.md index 719bb95004..fee4d754a6 100644 --- a/doc/model/pairtab.md +++ b/doc/model/pairtab.md @@ -53,6 +53,10 @@ in the order of Type_0-Type_0, Type_0-Type_1, ..., Type_0-Type_N, Type_1-Type_1, The interaction should be smooth at the cut-off distance. +:::{note} +In instances where the interaction at the cut-off distance is not delineated within the table file, extrapolation will be conducted utilizing the available interaction data. This extrapolative procedure guarantees a smooth transition from the table-provided value to `0` whenever feasible. +::: + ## Interpolation with a short-range pairwise potential ```json diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md index 1ac1b33519..745c0d1720 100644 --- a/doc/model/train-se-atten.md +++ b/doc/model/train-se-atten.md @@ -70,6 +70,11 @@ $deepmd_source_dir/examples/water/se_atten/input.json With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory. An example of the DPA-1 descriptor is provided as follows + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + ```json "descriptor" :{ "type": "se_atten", @@ -86,6 +91,7 @@ An example of the DPA-1 descriptor is provided as follows "seed": 1 } ``` + * The {ref}`type ` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures. * {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. * **{ref}`sel `** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md)) @@ -98,6 +104,43 @@ An example of the DPA-1 descriptor is provided as follows * {ref}`attn_mask ` determines whether to mask the diagonal in the attention weights and False is recommended. * {ref}`attn_dotr ` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended. +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```json + "descriptor" :{ + "type": "dpa1", + "rcut_smth": 0.50, + "rcut": 6.00, + "sel": 120, + "neuron": [25, 50, 100], + "tebd_dim": 8, + "axis_neuron": 16, + "attn": 128, + "attn_layer": 2, + "attn_mask": false, + "attn_dotr": true, + "post_ln": true + } +``` + +* The {ref}`type ` of the descriptor is set to `"dpa1"`, which will use DPA-1 structures. +* {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. +* **{ref}`sel `** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md)) +* The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +* The {ref}`tebd_dim ` specifies the dimension of the type embedding. +* The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) +* {ref}`attn ` sets the length of a hidden vector during scale-dot attention computation. +* {ref}`attn_layer ` sets the number of layers in attention mechanism. +* {ref}`attn_mask ` determines whether to mask the diagonal in the attention weights and False is recommended. +* {ref}`attn_dotr ` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended. +* {ref}`post_ln ` determines whether to perform post layer norm. + +::: + +:::: + ### Descriptor `"se_atten_v2"` We highly recommend using the version 2.0 of the attention-based descriptor `"se_atten_v2"`, which is inherited from `"se_atten"` but with the following parameter modifications: ```json diff --git a/source/tests/consistent/descriptor/test_se_e2_a.py b/source/tests/consistent/descriptor/test_se_e2_a.py index 0243a77044..b8f4205d09 100644 --- a/source/tests/consistent/descriptor/test_se_e2_a.py +++ b/source/tests/consistent/descriptor/test_se_e2_a.py @@ -51,7 +51,7 @@ def data(self) -> dict: precision, ) = self.param return { - "sel": [10, 10], + "sel": [9, 10], "rcut_smth": 5.80, "rcut": 6.00, "neuron": [6, 12, 24], diff --git a/source/tests/consistent/descriptor/test_se_r.py b/source/tests/consistent/descriptor/test_se_r.py index 354ae1cc99..8b835f3b5c 100644 --- a/source/tests/consistent/descriptor/test_se_r.py +++ b/source/tests/consistent/descriptor/test_se_r.py @@ -51,7 +51,7 @@ def data(self) -> dict: precision, ) = self.param return { - "sel": [10, 10], + "sel": [9, 10], "rcut_smth": 5.80, "rcut": 6.00, "neuron": [6, 12, 24],