pd: support dpa1 (#4414)

Summary of this PR: 1. upload DPA-1 related code 2. merge much develop code 3. add all eager composite operators except `softmax_grad`, `p_norm_grad`, `split_grad`, and `concat_grad` to the composite operator blacklist(<https://github.com/deepmodeling/deepmd-kit/pull/4414/files#diff-e678abb052b278f8a479f8d13b839a9ec0effd9923478a850bc13758f918e1e9R134-R148>) to significantly improve model execution speed (reducing the time taken from 100% more than PyTorch to about 10% to 15% more). related PR: lanpa/tensorboardX#728 ### Training curve: ![training_curves_comparison_eager_opt](https://github.com/user-attachments/assets/3b71fc99-5abf-4353-a61a-38737d3c7f2c) ### Accuracy test(left: paddle, right: torch): ![image](https://github.com/user-attachments/assets/a42b4bfd-c0f8-4eb8-85eb-ff1adf981dbb) Ralated optimization of Paddle framework: - [x] PaddlePaddle/Paddle#69349 - [x] PaddlePaddle/Paddle#69333 - [x] PaddlePaddle/Paddle#69479 - [x] PaddlePaddle/Paddle#69515 - [x] PaddlePaddle/Paddle#69487 - [x] PaddlePaddle/Paddle#69661 - [x] PaddlePaddle/Paddle#69660 - [x] PaddlePaddle/Paddle#69596 - [x] PaddlePaddle/Paddle#69556  ## Summary by CodeRabbit ## Release Notes - **New Features** - Introduced several new classes for molecular descriptors, including `DescrptDPA1`, `DescrptBlockSeAtten`, and `LayerNorm`, enhancing the modeling capabilities for molecular simulations. - Added new JSON configuration files for model parameters and multitask models related to water simulations. - Implemented new test classes for validating the functionality of the `DPAtomicModel` and various descriptor classes. - Added new test classes for evaluating denoising models, including `TestDenoiseModelDPA1` and `TestDenoiseModelDPA2`. - Enhanced the `ModelWrapper` class to clarify the handling of model parameters and state management. - **Bug Fixes** - Improved internal logic for handling model state saving and loading, ensuring consistency in outputs. - **Documentation** - Enhanced type hints and return annotations across various classes and methods for better clarity. - **Tests** - Expanded the testing framework with new test cases for denoising models and descriptor functionalities, ensuring robust validation of features. - Activated previously skipped tests for energy models, improving test coverage. - Enhanced multitask training tests with new configuration handling and test classes.  --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
deepmodeling · Dec 17, 2024 · e8167ce · e8167ce
1 parent e21f985
commit e8167ce
Show file tree

Hide file tree

Showing 50 changed files with 4,740 additions and 442 deletions.
diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
@@ -230,7 +230,7 @@ def train(
     use_pretrain_script: bool = False,
     force_load: bool = False,
     output: str = "out.json",
-):
+) -> None:
     log.info("Configuration path: %s", input_file)
     SummaryPrinter()()
     with open(input_file) as fin:
@@ -321,18 +321,26 @@ def train(
     # save min_nbor_dist
     if min_nbor_dist is not None:
         if not multi_task:
-            trainer.model.min_nbor_dist = min_nbor_dist
+            trainer.model.min_nbor_dist = paddle.to_tensor(
+                min_nbor_dist,
+                dtype=paddle.float64,
+                place=DEVICE,
+            )
         else:
             for model_item in min_nbor_dist:
-                trainer.model[model_item].min_nbor_dist = min_nbor_dist[model_item]
+                trainer.model[model_item].min_nbor_dist = paddle.to_tensor(
+                    min_nbor_dist[model_item],
+                    dtype=paddle.float64,
+                    place=DEVICE,
+                )
     trainer.run()
 
 
 def freeze(
     model: str,
     output: str = "frozen_model.json",
     head: Optional[str] = None,
-):
+) -> None:
     paddle.set_flags(
         {
             "FLAGS_save_cf_stack_op": 1,
@@ -383,7 +391,7 @@ def change_bias(
     numb_batch: int = 0,
     model_branch: Optional[str] = None,
     output: Optional[str] = None,
-):
+) -> None:
     if input_file.endswith(".pd"):
         old_state_dict = paddle.load(input_file)
         model_state_dict = copy.deepcopy(old_state_dict.get("model", old_state_dict))

diff --git a/deepmd/pd/loss/ener.py b/deepmd/pd/loss/ener.py
@@ -10,7 +10,6 @@
     TaskLoss,
 )
 from deepmd.pd.utils import (
-    decomp,
     env,
 )
 from deepmd.pd.utils.env import (
@@ -224,10 +223,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
 
             if self.relative_f is not None:
                 force_label_3 = force_label.reshape([-1, 3])
-                # norm_f = force_label_3.norm(axis=1, keepdim=True) + self.relative_f
-                norm_f = (
-                    decomp.norm(force_label_3, axis=1, keepdim=True) + self.relative_f
-                )
+                norm_f = force_label_3.norm(axis=1, keepdim=True) + self.relative_f
                 diff_f_3 = diff_f.reshape([-1, 3])
                 diff_f_3 = diff_f_3 / norm_f
                 diff_f = diff_f_3.reshape([-1])

diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import copy
 import functools
 import logging
 from typing import (
@@ -52,7 +51,7 @@ def __init__(
         fitting,
         type_map: list[str],
         **kwargs,
-    ):
+    ) -> None:
         super().__init__(type_map, **kwargs)
         ntypes = len(type_map)
         self.type_map = type_map
@@ -201,7 +200,7 @@ def serialize(self) -> dict:
 
     @classmethod
     def deserialize(cls, data) -> "DPAtomicModel":
-        data = copy.deepcopy(data)
+        data = data.copy()
         check_version_compatibility(data.pop("@version", 1), 2, 1)
         data.pop("@class", None)
         data.pop("type", None)
@@ -212,6 +211,37 @@ def deserialize(cls, data) -> "DPAtomicModel":
         obj = super().deserialize(data)
         return obj
 
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+    ) -> None:
+        """Call descriptor enable_compression().
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        """
+        self.descriptor.enable_compression(
+            min_nbor_dist,
+            table_extrapolate,
+            table_stride_1,
+            table_stride_2,
+            check_frequency,
+        )
+
     def forward_atomic(
         self,
         extended_coord,
@@ -278,7 +308,7 @@ def compute_or_load_stat(
         self,
         sampled_func,
         stat_file_path: Optional[DPPath] = None,
-    ):
+    ) -> None:
         """
         Compute or load the statistics parameters of the model,
         such as mean and standard deviation of descriptors or the energy bias of the fitting net.

diff --git a/deepmd/pd/model/descriptor/__init__.py b/deepmd/pd/model/descriptor/__init__.py
@@ -5,6 +5,10 @@
 from .descriptor import (
     DescriptorBlock,
 )
+from .dpa1 import (
+    DescrptBlockSeAtten,
+    DescrptDPA1,
+)
 from .env_mat import (
     prod_env_mat,
 )
@@ -17,6 +21,8 @@
     "BaseDescriptor",
     "DescriptorBlock",
     "DescrptBlockSeA",
+    "DescrptBlockSeAtten",
+    "DescrptDPA1",
     "DescrptSeA",
     "prod_env_mat",
 ]