diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1922ea33f8..ca8be9147d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,67 +1,49 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: trailing-whitespace - exclude: "^.+\\.pbtxt$" - - id: end-of-file-fixer - exclude: "^.+\\.pbtxt$" - - id: check-yaml - #- id: check-json - - id: check-added-large-files - - id: check-merge-conflict - - id: check-symlinks - - id: check-toml -# Python -- repo: https://github.com/psf/black - rev: 23.3.0 - hooks: - - id: black-jupyter -- repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort - files: \.py$ -- repo: https://github.com/charliermarsh/ruff-pre-commit - # Ruff version. - rev: v0.0.269 - hooks: - - id: ruff - args: ["--fix"] -# numpydoc -- repo: https://github.com/Carreau/velin - rev: 0.0.12 - hooks: - - id: velin - args: ["--write"] -# Python inside docs -- repo: https://github.com/asottile/blacken-docs - rev: 1.13.0 - hooks: - - id: blacken-docs -# C++ -- repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.4 - hooks: - - id: clang-format - exclude: ^source/3rdparty|source/lib/src/cuda/cudart/.+\.inc -# CSS -- repo: https://github.com/pre-commit/mirrors-csslint - rev: v1.0.5 - hooks: - - id: csslint -# Shell -- repo: https://github.com/scop/pre-commit-shfmt - rev: v3.6.0-2 - hooks: - - id: shfmt -# CMake -- repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 - hooks: - - id: cmake-format - #- id: cmake-lint -ci: - autoupdate_branch: devel + - repo: https://github.com/PyCQA/isort + rev: 5.11.5 + hooks: + - id: isort + args: ["--multi-line=7", "--sl"] + + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +# - repo: https://github.com/charliermarsh/ruff-pre-commit +# rev: 'v0.0.272' +# hooks: +# - id: ruff + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + files: (?!.*paddle)^.*$ + - id: end-of-file-fixer + files: \.md$ + - id: trailing-whitespace + files: \.md$ + + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.0.1 + hooks: + - id: forbid-crlf + files: \.md$ + - id: remove-crlf + files: \.md$ + - id: forbid-tabs + files: \.md$ + - id: remove-tabs + files: \.md$ + +# - repo: local +# hooks: +# - id: clang-format +# name: clang-format +# description: Format files with ClangFormat +# entry: bash .clang_format.hook -i +# language: system +# files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ diff --git a/deepmd/common.py b/deepmd/common.py index d0afbf0784..2f8334f9f5 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -2,39 +2,29 @@ import json import warnings -from functools import ( - wraps, -) -from pathlib import ( - Path, -) -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - List, - Optional, - TypeVar, - Union, -) +from functools import wraps +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Optional +from typing import TypeVar +from typing import Union import numpy as np import tensorflow import yaml -from tensorflow.python.framework import ( - tensor_util, -) - -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - op_module, - tf, -) -from deepmd.utils.path import ( - DPPath, -) +from tensorflow.python.framework import tensor_util + +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +from deepmd.env import GLOBAL_PD_FLOAT_PRECISION +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import op_module +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.utils.path import DPPath if TYPE_CHECKING: _DICT_VAL = TypeVar("_DICT_VAL") @@ -50,11 +40,11 @@ # define constants PRECISION_DICT = { - "default": GLOBAL_TF_FLOAT_PRECISION, - "float16": tf.float16, - "float32": tf.float32, - "float64": tf.float64, - "bfloat16": tf.bfloat16, + "default": GLOBAL_PD_FLOAT_PRECISION, + "float16": paddle.float16, + "float32": paddle.float32, + "float64": paddle.float64, + "bfloat16": paddle.bfloat16, } @@ -119,11 +109,11 @@ def gelu_wrapper(x): data_requirement = {} ACTIVATION_FN_DICT = { - "relu": tf.nn.relu, - "relu6": tf.nn.relu6, - "softplus": tf.nn.softplus, - "sigmoid": tf.sigmoid, - "tanh": tf.nn.tanh, + "relu": paddle.nn.functional.relu, + "relu6": paddle.nn.functional.relu6, + "softplus": paddle.nn.functional.softplus, + "sigmoid": paddle.nn.functional.sigmoid, + "tanh": paddle.nn.functional.tanh, "gelu": gelu, "gelu_tf": gelu_tf, "None": None, diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index 641210f0d1..c29b667143 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -1,68 +1,42 @@ -from typing import ( - List, - Optional, - Tuple, -) +from typing import List +from typing import Optional +from typing import Tuple import numpy as np -from deepmd.common import ( - cast_precision, - get_activation_func, - get_precision, -) -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - default_tf_session_config, - op_module, - tf, -) -from deepmd.nvnmd.descriptor.se_a import ( - build_davg_dstd, - build_op_descriptor, - check_switch_range, - descrpt2r4, - filter_GR2D, - filter_lower_R42GR, -) -from deepmd.nvnmd.utils.config import ( - nvnmd_cfg, -) -from deepmd.utils.errors import ( - GraphWithoutTensorError, -) -from deepmd.utils.graph import ( - get_tensor_by_name_from_graph, -) -from deepmd.utils.network import ( - embedding_net, - embedding_net_rand_seed_shift, -) -from deepmd.utils.sess import ( - run_sess, -) -from deepmd.utils.spin import ( - Spin, -) -from deepmd.utils.tabulate import ( - DPTabulate, -) -from deepmd.utils.type_embed import ( - embed_atom_type, -) - -from .descriptor import ( - Descriptor, -) -from .se import ( - DescrptSe, -) - - -@Descriptor.register("se_e2_a") -@Descriptor.register("se_a") -class DescrptSeA(DescrptSe): +from deepmd.common import cast_precision +from deepmd.common import get_activation_func +from deepmd.common import get_precision +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +from deepmd.env import GLOBAL_PD_FLOAT_PRECISION +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import default_tf_session_config +from deepmd.env import op_module +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.nvnmd.descriptor.se_a import build_davg_dstd +from deepmd.nvnmd.descriptor.se_a import build_op_descriptor +from deepmd.nvnmd.descriptor.se_a import check_switch_range +from deepmd.nvnmd.descriptor.se_a import descrpt2r4 +from deepmd.nvnmd.descriptor.se_a import filter_GR2D +from deepmd.nvnmd.descriptor.se_a import filter_lower_R42GR +from deepmd.nvnmd.utils.config import nvnmd_cfg +from deepmd.utils.errors import GraphWithoutTensorError +from deepmd.utils.graph import get_tensor_by_name_from_graph +from deepmd.utils.network import EmbeddingNet # embedding_net, +from deepmd.utils.network import embedding_net_rand_seed_shift +from deepmd.utils.sess import run_sess +from deepmd.utils.spin import Spin +from deepmd.utils.tabulate import DPTabulate +from deepmd.utils.type_embed import embed_atom_type + +from .descriptor import Descriptor +from .se import DescrptSe + + +# @Descriptor.register("se_e2_a") +# @Descriptor.register("se_a") +class DescrptSeA(paddle.nn.Layer): r"""DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input. @@ -166,6 +140,7 @@ def __init__( spin: Optional[Spin] = None, ) -> None: """Constructor.""" + super().__init__() if rcut < rcut_smth: raise RuntimeError( f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!" @@ -190,6 +165,7 @@ def __init__( self.exclude_types.add((tt[1], tt[0])) self.set_davg_zero = set_davg_zero self.type_one_side = type_one_side + self.type_one_side = False self.spin = spin # extend sel_a for spin system @@ -215,49 +191,71 @@ def __init__( self.useBN = False self.dstd = None self.davg = None + + self.avg_zero = paddle.zeros([self.ntypes, self.ndescrpt], dtype="float32") + self.std_ones = paddle.ones([self.ntypes, self.ndescrpt], dtype="float32") + nets = [] + for type_input in range(self.ntypes): + layer = [] + for type_i in range(self.ntypes): + layer.append( + EmbeddingNet( + self.filter_neuron, + self.filter_precision, + self.filter_activation_fn, + self.filter_resnet_dt, + self.seed, + self.trainable, + name="filter_type_" + str(type_input) + str(type_i), + ) + ) + nets.append(paddle.nn.LayerList(layer)) + + self.embedding_nets = paddle.nn.LayerList(nets) + self.compress = False self.embedding_net_variables = None self.mixed_prec = None - self.place_holders = {} + # self.place_holders = {} self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) # like a mask - avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - std_ones = np.ones([self.ntypes, self.ndescrpt]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - sub_graph = tf.Graph() - with sub_graph.as_default(): - name_pfx = "d_sea_" - for ii in ["coord", "box"]: - self.place_holders[ii] = tf.placeholder( - GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii - ) - self.place_holders["type"] = tf.placeholder( - tf.int32, [None, None], name=name_pfx + "t_type" - ) - self.place_holders["natoms_vec"] = tf.placeholder( - tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms" - ) - self.place_holders["default_mesh"] = tf.placeholder( - tf.int32, [None], name=name_pfx + "t_mesh" - ) - self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( - self.place_holders["coord"], - self.place_holders["type"], - self.place_holders["natoms_vec"], - self.place_holders["box"], - self.place_holders["default_mesh"], - tf.constant(avg_zero), - tf.constant(std_ones), - rcut_a=self.rcut_a, - rcut_r=self.rcut_r, - rcut_r_smth=self.rcut_r_smth, - sel_a=self.sel_a, - sel_r=self.sel_r, - ) - self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) + # avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype( + # GLOBAL_NP_FLOAT_PRECISION + # ) + # std_ones = np.ones([self.ntypes, self.ndescrpt]).astype( + # GLOBAL_NP_FLOAT_PRECISION + # ) + # sub_graph = tf.Graph() + # with sub_graph.as_default(): + # name_pfx = "d_sea_" + # for ii in ["coord", "box"]: + # self.place_holders[ii] = tf.placeholder( + # GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii + # ) + # self.place_holders["type"] = tf.placeholder( + # tf.int32, [None, None], name=name_pfx + "t_type" + # ) + # self.place_holders["natoms_vec"] = tf.placeholder( + # tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms" + # ) + # self.place_holders["default_mesh"] = tf.placeholder( + # tf.int32, [None], name=name_pfx + "t_mesh" + # ) + # self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( + # self.place_holders["coord"], + # self.place_holders["type"], + # self.place_holders["natoms_vec"], + # self.place_holders["box"], + # self.place_holders["default_mesh"], + # self.avg_zero, + # self.std_ones, + # rcut_a=self.rcut_a, + # rcut_r=self.rcut_r, + # rcut_r_smth=self.rcut_r_smth, + # sel_a=self.sel_a, + # sel_r=self.sel_r, + # ) + # self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) self.original_sel = None self.multi_task = multi_task if multi_task: @@ -269,6 +267,20 @@ def __init__( "suma2": [], } + self.t_rcut = paddle.to_tensor( + np.max([self.rcut_r, self.rcut_a]), dtype="float32" + ) + self.t_ntypes = paddle.to_tensor(self.ntypes, dtype="int32") + self.t_ndescrpt = paddle.to_tensor(self.ndescrpt, dtype="int32") + self.t_sel = paddle.to_tensor(self.sel_a, dtype="int32") + + t_avg = paddle.to_tensor( + np.zeros([self.ntypes, self.ndescrpt]), dtype="float64" + ) + t_std = paddle.to_tensor(np.ones([self.ntypes, self.ndescrpt]), dtype="float64") + self.register_buffer("t_avg", t_avg) + self.register_buffer("t_std", t_std) + def get_rcut(self) -> float: """Returns the cut-off radius.""" return self.rcut_r @@ -285,7 +297,7 @@ def get_dim_rot_mat_1(self) -> int: """Returns the first dimension of the rotation matrix. The rotation is of shape dim_1 x 3.""" return self.filter_neuron[-1] - def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: + def get_nlist(self) -> Tuple[paddle.Tensor, paddle.Tensor, List[int], List[int]]: """Returns neighbor information. Returns @@ -360,6 +372,9 @@ def compute_input_stats( self.stat_dict["sumr2"] += sumr2 self.stat_dict["suma2"] += suma2 + self.t_avg = paddle.to_tensor(self.davg, dtype="float64") + self.t_std = paddle.to_tensor(self.dstd, dtype="float64") + def merge_input_stats(self, stat_dict): """Merge the statisitcs computed from compute_input_stats to obtain the self.davg and self.dstd. @@ -498,17 +513,17 @@ def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None: self.mixed_prec = mixed_prec self.filter_precision = get_precision(mixed_prec["output_prec"]) - def build( + def forward( self, - coord_: tf.Tensor, - atype_: tf.Tensor, - natoms: tf.Tensor, - box_: tf.Tensor, - mesh: tf.Tensor, + coord_: paddle.Tensor, + atype_: paddle.Tensor, + natoms: paddle.Tensor, + box_: paddle.Tensor, + mesh: paddle.Tensor, input_dict: dict, reuse: Optional[bool] = None, suffix: str = "", - ) -> tf.Tensor: + ) -> paddle.Tensor: """Build the computational graph for the descriptor. Parameters @@ -542,73 +557,114 @@ def build( """ davg = self.davg dstd = self.dstd - if nvnmd_cfg.enable: - if nvnmd_cfg.restore_descriptor: - davg, dstd = build_davg_dstd() - check_switch_range(davg, dstd) - with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse): - if davg is None: - davg = np.zeros([self.ntypes, self.ndescrpt]) - if dstd is None: - dstd = np.ones([self.ntypes, self.ndescrpt]) - t_rcut = tf.constant( - np.max([self.rcut_r, self.rcut_a]), - name="rcut", - dtype=GLOBAL_TF_FLOAT_PRECISION, - ) - t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32) - t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32) - t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32) - t_original_sel = tf.constant( - self.original_sel if self.original_sel is not None else self.sel_a, - name="original_sel", - dtype=tf.int32, - ) - self.t_avg = tf.get_variable( - "t_avg", - davg.shape, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(davg), - ) - self.t_std = tf.get_variable( - "t_std", - dstd.shape, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(dstd), - ) - - with tf.control_dependencies([t_sel, t_original_sel]): - coord = tf.reshape(coord_, [-1, natoms[1] * 3]) - box = tf.reshape(box_, [-1, 9]) - atype = tf.reshape(atype_, [-1, natoms[1]]) - - op_descriptor = ( - build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a - ) - self.descrpt, self.descrpt_deriv, self.rij, self.nlist = op_descriptor( + # if nvnmd_cfg.enable: + # if nvnmd_cfg.restore_descriptor: + # davg, dstd = build_davg_dstd() + # check_switch_range(davg, dstd) + # with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse): + if davg is None: + davg = np.zeros([self.ntypes, self.ndescrpt]) + if dstd is None: + dstd = np.ones([self.ntypes, self.ndescrpt]) + # t_rcut = tf.constant( + # np.max([self.rcut_r, self.rcut_a]), + # name="rcut", + # dtype=GLOBAL_TF_FLOAT_PRECISION, + # ) + # t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32) + # t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32) + # t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32) + # t_original_sel = paddle.to_tensor( + # self.original_sel if self.original_sel is not None else self.sel_a, + # ) + # self.t_avg = tf.get_variable( + # "t_avg", + # davg.shape, + # dtype=GLOBAL_TF_FLOAT_PRECISION, + # trainable=False, + # initializer=tf.constant_initializer(davg), + # ) + # self.t_std = tf.get_variable( + # "t_std", + # dstd.shape, + # dtype=GLOBAL_TF_FLOAT_PRECISION, + # trainable=False, + # initializer=tf.constant_initializer(dstd), + # ) + + coord = paddle.reshape(coord_, [-1, natoms[1] * 3]) + box = paddle.reshape(box_, [-1, 9]) + atype = paddle.reshape(atype_, [-1, natoms[1]]) + # op_descriptor = ( + # build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a + # ) + # print(coord.dtype) # paddle.float64 + # print(atype.dtype) # paddle.int32 + # print(box.dtype) # paddle.float64 + # print(mesh.dtype) # paddle.int32 + # print(self.t_avg.dtype) # paddle.float32 + # print(self.t_std.dtype) # paddle.float32 + # print(natoms) + # exit() + ( + self.descrpt, + self.descrpt_deriv, + self.rij, + self.nlist, + ) = op_module.prod_env_mat_a( coord, atype, - natoms, box, mesh, self.t_avg, self.t_std, + natoms, rcut_a=self.rcut_a, rcut_r=self.rcut_r, rcut_r_smth=self.rcut_r_smth, sel_a=self.sel_a, sel_r=self.sel_r, ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_descrpt", + # self.descrpt, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_descrpt_deriv", + # self.descrpt_deriv, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_rij", + # self.rij, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", + # self.nlist, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", + # self.nlist, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", + # self.nlist, + # ) + # exit() + # self.descrpt.shape = [1, 105984] # only used when tensorboard was set as true - tf.summary.histogram("descrpt", self.descrpt) - tf.summary.histogram("rij", self.rij) - tf.summary.histogram("nlist", self.nlist) - - self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt]) - self._identity_tensors(suffix=suffix) - + # tf.summary.histogram("descrpt", self.descrpt) + # tf.summary.histogram("rij", self.rij) + # tf.summary.histogram("nlist", self.nlist) + self.descrpt_reshape = paddle.reshape(self.descrpt, [-1, self.ndescrpt]) + # [1, 105984] --> [192, 552] + self.descrpt_reshape.stop_gradient = False + # self._identity_tensors(suffix=suffix) self.dout, self.qmat = self._pass_filter( self.descrpt_reshape, atype, @@ -618,18 +674,32 @@ def build( reuse=reuse, trainable=self.trainable, ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_dout", + # self.dout, + # ) + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # "deepmd-kit/examples/water/se_e2_a/align_input/pred_qmat", + # self.qmat, + # ) + # exit() # only used when tensorboard was set as true - tf.summary.histogram("embedding_net_output", self.dout) + # tf.summary.histogram("embedding_net_output", self.dout) + # print(self.dout.shape) + # np.save(f"/workspace/hesensen/deepmd_backend/infer_align/dout_pd.npy", self.dout) + # exit() return self.dout - def get_rot_mat(self) -> tf.Tensor: + def get_rot_mat(self) -> paddle.Tensor: """Get rotational matrix.""" return self.qmat def prod_force_virial( - self, atom_ener: tf.Tensor, natoms: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + self, atom_ener: paddle.Tensor, natoms: paddle.Tensor + ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: """Compute force and virial. Parameters @@ -651,11 +721,11 @@ def prod_force_virial( atom_virial The atomic virial """ - [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape) - tf.summary.histogram("net_derivative", net_deriv) - net_deriv_reshape = tf.reshape( + net_deriv = paddle.grad(atom_ener, self.descrpt_reshape, create_graph=True)[0] + # tf.summary.histogram("net_derivative", net_deriv) + net_deriv_reshape = paddle.reshape( net_deriv, - [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)], + [-1, natoms[0] * self.ndescrpt], ) force = op_module.prod_force_se_a( net_deriv_reshape, @@ -674,29 +744,43 @@ def prod_force_virial( n_a_sel=self.nnei_a, n_r_sel=self.nnei_r, ) - tf.summary.histogram("force", force) - tf.summary.histogram("virial", virial) - tf.summary.histogram("atom_virial", atom_virial) + # tf.summary.histogram("force", force) + # tf.summary.histogram("virial", virial) + # tf.summary.histogram("atom_virial", atom_virial) return force, virial, atom_virial def _pass_filter( self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True ): + # natoms = [192, 192, 64 , 128] if input_dict is not None: type_embedding = input_dict.get("type_embedding", None) else: type_embedding = None start_index = 0 - inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]) + # print(inputs.shape) # [192, 552] + inputs = paddle.reshape(inputs, [-1, int(natoms[0].item()), int(self.ndescrpt)]) + # print(inputs.shape) # [1, 192, 552] + # exit() output = [] output_qmat = [] + # print(self.type_one_side, type_embedding) + # exit() if not self.type_one_side and type_embedding is None: + # print("here", self.ntypes) for type_i in range(self.ntypes): - inputs_i = tf.slice( - inputs, [0, start_index, 0], [-1, natoms[2 + type_i], -1] - ) - inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt]) + inputs_i = paddle.slice( + inputs, + [0, 1, 2], + [0, start_index, 0], + [ + inputs.shape[0], + start_index + natoms[2 + type_i], + inputs.shape[2], + ], + ) # [1, 192, 552] --> [1, 64, 552] + inputs_i = paddle.reshape(inputs_i, [-1, self.ndescrpt]) # [64, 552] filter_name = "filter_type_" + str(type_i) + suffix layer, qmat = self._filter( inputs_i, @@ -707,13 +791,13 @@ def _pass_filter( trainable=trainable, activation_fn=self.filter_activation_fn, ) - layer = tf.reshape( - layer, [tf.shape(inputs)[0], natoms[2 + type_i], self.get_dim_out()] + layer = paddle.reshape( + layer, [inputs.shape[0], natoms[2 + type_i], self.get_dim_out()] ) - qmat = tf.reshape( + qmat = paddle.reshape( qmat, [ - tf.shape(inputs)[0], + inputs.shape[0], natoms[2 + type_i], self.get_dim_rot_mat_1() * 3, ], @@ -722,61 +806,97 @@ def _pass_filter( output_qmat.append(qmat) start_index += natoms[2 + type_i] else: - inputs_i = inputs - inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt]) - type_i = -1 - if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: - inputs_i = descrpt2r4(inputs_i, natoms) - if len(self.exclude_types): - atype_nloc = tf.reshape( - tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1] - ) # when nloc != nall, pass nloc to mask - mask = self.build_type_exclude_mask( - self.exclude_types, - self.ntypes, - self.sel_a, - self.ndescrpt, - atype_nloc, - tf.shape(inputs_i)[0], - ) - inputs_i *= mask - - layer, qmat = self._filter( - inputs_i, - type_i, - name="filter_type_all" + suffix, - natoms=natoms, - reuse=reuse, - trainable=trainable, - activation_fn=self.filter_activation_fn, - type_embedding=type_embedding, - ) - layer = tf.reshape( - layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()] - ) - qmat = tf.reshape( - qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3] - ) - output.append(layer) - output_qmat.append(qmat) - output = tf.concat(output, axis=1) - output_qmat = tf.concat(output_qmat, axis=1) + ... + # inputs_i = inputs + # inputs_i = paddle.reshape(inputs_i, [-1, self.ndescrpt]) + # type_i = -1 + # if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: + # inputs_i = descrpt2r4(inputs_i, natoms) + # if len(self.exclude_types): + # atype_nloc = paddle.reshape( + # paddle.slice(atype, [0, 0], [-1, natoms[0]]), [-1] + # ) # when nloc != nall, pass nloc to mask + # mask = self.build_type_exclude_mask( + # self.exclude_types, + # self.ntypes, + # self.sel_a, + # self.ndescrpt, + # atype_nloc, + # paddle.shape(inputs_i)[0], + # ) + # inputs_i *= mask + + # layer, qmat = self._filter( + # inputs_i, + # type_i, + # name="filter_type_all" + suffix, + # natoms=natoms, + # reuse=reuse, + # trainable=trainable, + # activation_fn=self.filter_activation_fn, + # type_embedding=type_embedding, + # ) + # layer = paddle.reshape( + # layer, [inputs.shape[0], natoms[0], self.get_dim_out()] + # ) + # qmat = paddle.reshape( + # qmat, [inputs.shape[0], natoms[0], self.get_dim_rot_mat_1() * 3] + # ) + # output.append(layer) + # output_qmat.append(qmat) + # print(f"len(output) = {len(output)}") + output = paddle.concat(output, axis=1) + output_qmat = paddle.concat(output_qmat, axis=1) return output, output_qmat def _compute_dstats_sys_smth( self, data_coord, data_box, data_atype, natoms_vec, mesh ): - dd_all = run_sess( - self.sub_sess, - self.stat_descrpt, - feed_dict={ - self.place_holders["coord"]: data_coord, - self.place_holders["type"]: data_atype, - self.place_holders["natoms_vec"]: natoms_vec, - self.place_holders["box"]: data_box, - self.place_holders["default_mesh"]: mesh, - }, + input_dict = {} + # dd_all = run_sess( + # self.sub_sess, + # self.stat_descrpt, + # feed_dict={ + # self.place_holders["coord"]: data_coord, + # self.place_holders["type"]: data_atype, + # self.place_holders["natoms_vec"]: natoms_vec, + # self.place_holders["box"]: data_box, + # self.place_holders["default_mesh"]: mesh, + # }, + # ) + input_dict["coord"] = paddle.to_tensor(data_coord, dtype="float32") + input_dict["box"] = paddle.to_tensor(data_box, dtype="float32") + input_dict["type"] = paddle.to_tensor(data_atype, dtype="int32") + input_dict["natoms_vec"] = paddle.to_tensor( + natoms_vec, dtype="int32", place="cpu" ) + input_dict["default_mesh"] = paddle.to_tensor(mesh, dtype="int32") + + # print(input_dict["coord"].dtype) # fp64 + # print(input_dict["type"].dtype) # int32 + # print(input_dict["natoms_vec"].dtype) # int32 + # print(input_dict["box"].dtype) # fp64 + # print(input_dict["default_mesh"].dtype) # int32 + # print(self.avg_zero) + # print(self.std_ones) + # print(self.sel_a) + # print(self.sel_r) + self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( + input_dict["coord"], # fp32 + input_dict["type"], # int32 + input_dict["box"], # fp32 + input_dict["default_mesh"], # int32 + self.avg_zero, + self.std_ones, + input_dict["natoms_vec"], # int32 + rcut_a=self.rcut_a, + rcut_r=self.rcut_r, + rcut_r_smth=self.rcut_r_smth, + sel_a=self.sel_a, + sel_r=self.sel_r, + ) + + dd_all = self.stat_descrpt.numpy() natoms = natoms_vec dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]]) start_index = 0 @@ -840,29 +960,30 @@ def _concat_type_embedding( embedding: environment of each atom represented by embedding. """ - te_out_dim = type_embedding.get_shape().as_list()[-1] - self.t_nei_type = tf.constant(self.nei_type, dtype=tf.int32) - nei_embed = tf.nn.embedding_lookup( - type_embedding, tf.cast(self.t_nei_type, dtype=tf.int32) + te_out_dim = type_embedding.shape[-1] + self.t_nei_type = paddle.to_tensor(self.nei_type, dtype=paddle.int32) + nei_embed = paddle.nn.functional.embedding( + paddle.cast(self.t_nei_type, dtype=paddle.int32), + type_embedding, ) # shape is [self.nnei, 1+te_out_dim] - nei_embed = tf.tile( + nei_embed = paddle.tile( nei_embed, (nframes * natoms[0], 1) ) # shape is [nframes*natoms[0]*self.nnei, te_out_dim] - nei_embed = tf.reshape(nei_embed, [-1, te_out_dim]) - embedding_input = tf.concat( + nei_embed = paddle.reshape(nei_embed, [-1, te_out_dim]) + embedding_input = paddle.concat( [xyz_scatter, nei_embed], 1 ) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim] if not self.type_one_side: atm_embed = embed_atom_type( self.ntypes, natoms, type_embedding ) # shape is [natoms[0], te_out_dim] - atm_embed = tf.tile( + atm_embed = paddle.tile( atm_embed, (nframes, self.nnei) ) # shape is [nframes*natoms[0], self.nnei*te_out_dim] - atm_embed = tf.reshape( + atm_embed = paddle.reshape( atm_embed, [-1, te_out_dim] ) # shape is [nframes*natoms[0]*self.nnei, te_out_dim] - embedding_input = tf.concat( + embedding_input = paddle.concat( [embedding_input, atm_embed], 1 ) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim] return embedding_input @@ -888,13 +1009,42 @@ def _filter_lower( outputs_size = [1] + self.filter_neuron # cut-out inputs # with natom x (nei_type_i x 4) - inputs_i = tf.slice(inputs, [0, start_index * 4], [-1, incrs_index * 4]) - shape_i = inputs_i.get_shape().as_list() - natom = tf.shape(inputs_i)[0] + # if not hasattr(self, "debug_inputs"): + # self.debug_inputs = inputs + # paddle.save(self.debug_inputs, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs.pddata") + # print(__file__, "inputs.shape", inputs.shape) + + inputs_i = paddle.slice( + inputs, + [0, 1], + [0, start_index * 4], + [inputs.shape[0], start_index * 4 + incrs_index * 4], + ) + # if not hasattr(self, "debug_inputs_i"): + # self.debug_inputs_i = inputs_i + # paddle.save(self.debug_inputs_i, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs_i.pddata") + # print(__file__, "inputs_i.shape", inputs_i.shape) + + shape_i = inputs_i.shape + natom = inputs_i.shape[0] + # with (natom x nei_type_i) x 4 - inputs_reshape = tf.reshape(inputs_i, [-1, 4]) + inputs_reshape = paddle.reshape(inputs_i, [-1, 4]) + # if not hasattr(self, "debug_inputs_reshape"): + # self.debug_inputs_reshape = inputs_reshape + # paddle.save(self.debug_inputs_reshape, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs_reshape.pddata") + # print(__file__, "inputs_reshape.shape", inputs_reshape.shape) + # with (natom x nei_type_i) x 1 - xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1]) + xyz_scatter = paddle.reshape( + paddle.slice(inputs_reshape, [0, 1], [0, 0], [inputs_reshape.shape[0], 1]), + [-1, 1], + ) + # if not hasattr(self, "debug_xyz_scatter"): + # self.debug_xyz_scatter = xyz_scatter + # paddle.save(self.debug_xyz_scatter, "/workspace/hesensen/deepmd_backend/small_case/debug_xyz_scatter.pddata") + # print(__file__, "xyz_scatter.shape", xyz_scatter.shape) + if type_embedding is not None: xyz_scatter = self._concat_type_embedding( xyz_scatter, nframes, natoms, type_embedding @@ -904,25 +1054,25 @@ def _filter_lower( "compression of type embedded descriptor is not supported at the moment" ) # natom x 4 x outputs_size - if nvnmd_cfg.enable: - return filter_lower_R42GR( - type_i, - type_input, - inputs_i, - is_exclude, - activation_fn, - bavg, - stddev, - trainable, - suffix, - self.seed, - self.seed_shift, - self.uniform_seed, - self.filter_neuron, - self.filter_precision, - self.filter_resnet_dt, - self.embedding_net_variables, - ) + # if nvnmd_cfg.enable: + # return filter_lower_R42GR( + # type_i, + # type_input, + # inputs_i, + # is_exclude, + # activation_fn, + # bavg, + # stddev, + # trainable, + # suffix, + # self.seed, + # self.seed_shift, + # self.uniform_seed, + # self.filter_neuron, + # self.filter_precision, + # self.filter_resnet_dt, + # self.embedding_net_variables, + # ) if self.compress and (not is_exclude): if self.type_one_side: net = "filter_-1_net_" + str(type_i) @@ -937,70 +1087,85 @@ def _filter_lower( self.table_config[3], ] return op_module.tabulate_fusion_se_a( - tf.cast(self.table.data[net], self.filter_precision), + paddle.cast(self.table.data[net], self.filter_precision), info, xyz_scatter, - tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), + paddle.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), last_layer_size=outputs_size[-1], ) else: if not is_exclude: # with (natom x nei_type_i) x out_size - xyz_scatter = embedding_net( - xyz_scatter, - self.filter_neuron, - self.filter_precision, - activation_fn=activation_fn, - resnet_dt=self.filter_resnet_dt, - name_suffix=suffix, - stddev=stddev, - bavg=bavg, - seed=self.seed, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.embedding_net_variables, - mixed_prec=self.mixed_prec, - ) + # if not hasattr(self, "xyz_scatter_input"): + # self.debug_xyz_scatter_input = xyz_scatter + # paddle.save(self.xyz_scatter_input, "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0_input.pddata") + # paddle.save(self.embedding_nets[type_input][type_i].state_dict(), "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0.pdparams") + # print(__file__, "saved") + xyz_scatter_out = self.embedding_nets[type_input][type_i](xyz_scatter) + # print(__file__, "xyz_scatter.shape", xyz_scatter.shape) + # if not hasattr(self, "xyz_scatter_output"): + # self.debug_xyz_scatter_output = xyz_scatter_out + # paddle.save(self.xyz_scatter_output, "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0_output.pddata") + # print(__file__, "saved") + + # xyz_scatter = embedding_net( + # xyz_scatter, + # self.filter_neuron, + # self.filter_precision, + # activation_fn=activation_fn, + # resnet_dt=self.filter_resnet_dt, + # name_suffix=suffix, + # stddev=stddev, + # bavg=bavg, + # seed=self.seed, + # trainable=trainable, + # uniform_seed=self.uniform_seed, + # initial_variables=self.embedding_net_variables, + # mixed_prec=self.mixed_prec, + # ) + # xyz_scatter = paddle.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1])) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift else: # we can safely return the final xyz_scatter filled with zero directly - return tf.cast( - tf.fill((natom, 4, outputs_size[-1]), 0.0), self.filter_precision + return paddle.cast( + paddle.fill((natom, 4, outputs_size[-1]), 0.0), + self.filter_precision, ) # natom x nei_type_i x out_size - xyz_scatter = tf.reshape( - xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]) + xyz_scatter_out = paddle.reshape( + xyz_scatter_out, (-1, shape_i[1] // 4, outputs_size[-1]) ) - # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below + # When using paddle.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below # [588 24] -> [588 6 4] correct # but if sel is zero # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4] - # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1 + # So we need to explicitly assign the shape to paddle.shape(inputs_i)[0] instead of -1 # natom x 4 x outputs_size - return tf.matmul( - tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), - xyz_scatter, - transpose_a=True, + return paddle.matmul( + paddle.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), + xyz_scatter_out, + transpose_x=True, ) - @cast_precision + # @cast_precision def _filter( self, inputs, type_input, natoms, type_embedding=None, - activation_fn=tf.nn.tanh, + activation_fn=paddle.nn.functional.tanh, stddev=1.0, bavg=0.0, name="linear", reuse=None, trainable=True, ): - nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0] + # nframes = paddle.shape(paddle.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0] + nframes = 1 # natom x (nei x 4) - shape = inputs.get_shape().as_list() + shape = inputs.shape outputs_size = [1] + self.filter_neuron outputs_size_2 = self.n_axis_neuron all_excluded = all( @@ -1009,101 +1174,112 @@ def _filter( for type_i in range(self.ntypes) ] ) + # print(__file__, all_excluded) if all_excluded: # all types are excluded so result and qmat should be zeros # we can safaly return a zero matrix... # See also https://stackoverflow.com/a/34725458/9567349 # result: natom x outputs_size x outputs_size_2 # qmat: natom x outputs_size x 3 - natom = tf.shape(inputs)[0] - result = tf.cast( - tf.fill((natom, outputs_size_2, outputs_size[-1]), 0.0), - GLOBAL_TF_FLOAT_PRECISION, + natom = paddle.shape(inputs)[0] + result = paddle.cast( + paddle.full((natom, outputs_size_2, outputs_size[-1]), 0.0), + GLOBAL_PD_FLOAT_PRECISION, ) - qmat = tf.cast( - tf.fill((natom, outputs_size[-1], 3), 0.0), GLOBAL_TF_FLOAT_PRECISION + qmat = paddle.cast( + paddle.full((natom, outputs_size[-1], 3), 0.0), + GLOBAL_PD_FLOAT_PRECISION, ) return result, qmat - with tf.variable_scope(name, reuse=reuse): - start_index = 0 - type_i = 0 - # natom x 4 x outputs_size - if type_embedding is None: - rets = [] - for type_i in range(self.ntypes): - ret = self._filter_lower( - type_i, - type_input, - start_index, - self.sel_a[type_i], - inputs, - nframes, - natoms, - type_embedding=type_embedding, - is_exclude=(type_input, type_i) in self.exclude_types, - activation_fn=activation_fn, - stddev=stddev, - bavg=bavg, - trainable=trainable, - suffix="_" + str(type_i), - ) - if (type_input, type_i) not in self.exclude_types: - # add zero is meaningless; skip - rets.append(ret) - start_index += self.sel_a[type_i] - # faster to use accumulate_n than multiple add - xyz_scatter_1 = tf.accumulate_n(rets) - else: - xyz_scatter_1 = self._filter_lower( + # with tf.variable_scope(name, reuse=reuse): + start_index = 0 + type_i = 0 + # natom x 4 x outputs_size + if type_embedding is None: + rets = [] + for type_i in range(self.ntypes): + ret = self._filter_lower( type_i, type_input, start_index, - np.cumsum(self.sel_a)[-1], + self.sel_a[type_i], inputs, nframes, natoms, type_embedding=type_embedding, - is_exclude=False, + is_exclude=(type_input, type_i) in self.exclude_types, activation_fn=activation_fn, stddev=stddev, bavg=bavg, trainable=trainable, + suffix="_" + str(type_i), ) - if nvnmd_cfg.enable: - return filter_GR2D(xyz_scatter_1) - # natom x nei x outputs_size - # xyz_scatter = tf.concat(xyz_scatter_total, axis=1) - # natom x nei x 4 - # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4]) - # natom x 4 x outputs_size - # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True) - if self.original_sel is None: - # shape[1] = nnei * 4 - nnei = shape[1] / 4 - else: - nnei = tf.cast( - tf.Variable( - np.sum(self.original_sel), - dtype=tf.int32, - trainable=False, - name="nnei", - ), - self.filter_precision, - ) - xyz_scatter_1 = xyz_scatter_1 / nnei - # natom x 4 x outputs_size_2 - xyz_scatter_2 = tf.slice(xyz_scatter_1, [0, 0, 0], [-1, -1, outputs_size_2]) - # # natom x 3 x outputs_size_2 - # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1]) - # natom x 3 x outputs_size_1 - qmat = tf.slice(xyz_scatter_1, [0, 1, 0], [-1, 3, -1]) - # natom x outputs_size_1 x 3 - qmat = tf.transpose(qmat, perm=[0, 2, 1]) - # natom x outputs_size x outputs_size_2 - result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a=True) - # natom x (outputs_size x outputs_size_2) - result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]]) + if (type_input, type_i) not in self.exclude_types: + # add zero is meaningless; skip + rets.append(ret) + start_index += self.sel_a[type_i] + # faster to use accumulate_n than multiple add + xyz_scatter_1 = paddle.add_n(rets) + else: + xyz_scatter_1 = self._filter_lower( + type_i, + type_input, + start_index, + np.cumsum(self.sel_a)[-1], + inputs, + nframes, + natoms, + type_embedding=type_embedding, + is_exclude=False, + activation_fn=activation_fn, + stddev=stddev, + bavg=bavg, + trainable=trainable, + ) + # if nvnmd_cfg.enable: + # return filter_GR2D(xyz_scatter_1) + # natom x nei x outputs_size + # xyz_scatter = tf.concat(xyz_scatter_total, axis=1) + # natom x nei x 4 + # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4]) + # natom x 4 x outputs_size + # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True) + if self.original_sel is None: + # shape[1] = nnei * 4 + nnei = shape[1] / 4 + else: + nnei = paddle.cast( + paddle.to_tensor( + np.sum(self.original_sel), + dtype=paddle.int32, + stop_gradient=True, + ), + self.filter_precision, + ) + xyz_scatter_1 = xyz_scatter_1 / nnei + # natom x 4 x outputs_size_2 + xyz_scatter_2 = paddle.slice( + xyz_scatter_1, + [0, 1, 2], + [0, 0, 0], + [xyz_scatter_1.shape[0], xyz_scatter_1.shape[1], outputs_size_2], + ) + # # natom x 3 x outputs_size_2 + # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1]) + # natom x 3 x outputs_size_1 + qmat = paddle.slice( + xyz_scatter_1, + [0, 1, 2], + [0, 1, 0], + [xyz_scatter_1.shape[0], 1 + 3, xyz_scatter_1.shape[2]], + ) + # natom x outputs_size_1 x 3 + qmat = paddle.transpose(qmat, perm=[0, 2, 1]) + # natom x outputs_size x outputs_size_2 + result = paddle.matmul(xyz_scatter_1, xyz_scatter_2, transpose_x=True) + # natom x (outputs_size x outputs_size_2) + result = paddle.reshape(result, [-1, outputs_size_2 * outputs_size[-1]]) return result, qmat diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py index 9f6547998f..121b6c77a6 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/entrypoints/freeze.py @@ -8,36 +8,22 @@ import json import logging -from os.path import ( - abspath, -) -from typing import ( - List, - Optional, - Union, -) +from os.path import abspath +from typing import List +from typing import Optional +from typing import Union import google.protobuf.message # load grad of force module import deepmd.op # noqa: F401 -from deepmd.env import ( - FITTING_NET_PATTERN, - REMOVE_SUFFIX_DICT, - tf, -) -from deepmd.nvnmd.entrypoints.freeze import ( - save_weight, -) -from deepmd.utils.errors import ( - GraphTooLargeError, -) -from deepmd.utils.graph import ( - get_pattern_nodes_from_graph_def, -) -from deepmd.utils.sess import ( - run_sess, -) +from deepmd.env import FITTING_NET_PATTERN +from deepmd.env import REMOVE_SUFFIX_DICT +from deepmd.env import tf +from deepmd.nvnmd.entrypoints.freeze import save_weight +from deepmd.utils.errors import GraphTooLargeError +from deepmd.utils.graph import get_pattern_nodes_from_graph_def +from deepmd.utils.sess import run_sess __all__ = ["freeze"] @@ -320,14 +306,16 @@ def _make_node_names( def freeze_graph( - sess, - input_graph, - input_node, - freeze_type, - modifier, - out_graph_name, - node_names=None, - out_suffix="", + model_file: str, + output: str, + # sess, + # input_graph, + # input_node, + # freeze_type, + # modifier, + # out_graph_name, + # node_names=None, + # out_suffix="", ): """Freeze the single graph with chosen out_suffix. @@ -350,40 +338,94 @@ def freeze_graph( out_suffix : str The chosen suffix to freeze in the input_graph. """ - output_node = _make_node_names( - freeze_type, modifier, out_suffix=out_suffix, node_names=node_names - ) - different_set = set(output_node) - set(input_node) - if different_set: - log.warning( - "The following nodes are not in the graph: %s. " - "Skip freezeing these nodes. You may be freezing " - "a checkpoint generated by an old version." % different_set - ) - # use intersection as output list - output_node = list(set(output_node) & set(input_node)) - log.info(f"The following nodes will be frozen: {output_node}") - # We use a built-in TF helper to export variables to constants - output_graph_def = tf.graph_util.convert_variables_to_constants( - sess, # The session is used to retrieve the weights - input_graph, # The graph_def is used to retrieve the nodes - output_node, # The output node names are used to select the usefull nodes + # output_node = _make_node_names( + # freeze_type, modifier, out_suffix=out_suffix, node_names=node_names + # ) + # different_set = set(output_node) - set(input_node) + # if different_set: + # log.warning( + # "The following nodes are not in the graph: %s. " + # "Skip freezeing these nodes. You may be freezing " + # "a checkpoint generated by an old version." % different_set + # ) + # # use intersection as output list + # output_node = list(set(output_node) & set(input_node)) + # log.info(f"The following nodes will be frozen: {output_node}") + # # We use a built-in TF helper to export variables to constants + # output_graph_def = tf.graph_util.convert_variables_to_constants( + # sess, # The session is used to retrieve the weights + # input_graph, # The graph_def is used to retrieve the nodes + # output_node, # The output node names are used to select the usefull nodes + # ) + # # if multi-task, change fitting_net suffix and model_type + # if out_suffix != "": + # output_graph_def = _modify_model_suffix( + # output_graph_def, out_suffix, freeze_type + # ) + + # # If we need to transfer the fitting net variables + # output_graph_def = _transfer_fitting_net_trainable_variables( + # sess, output_graph_def, input_graph + # ) + + # # Finally we serialize and dump the output graph to the filesystem + # with tf.gfile.GFile(out_graph_name, "wb") as f: + # f.write(output_graph_def.SerializeToString()) + # log.info(f"{len(output_graph_def.node):d} ops in the final graph.") + import paddle + + from deepmd.infer import DeepPot + + dp = DeepPot( + model_file, + load_prefix="load", + default_tf_graph=False, ) - # if multi-task, change fitting_net suffix and model_type - if out_suffix != "": - output_graph_def = _modify_model_suffix( - output_graph_def, out_suffix, freeze_type - ) - - # If we need to transfer the fitting net variables - output_graph_def = _transfer_fitting_net_trainable_variables( - sess, output_graph_def, input_graph + # print(dp.model.descrpt.embedding_nets[0][0].weight[0]) + # for w in dp.model.descrpt.embedding_nets[0][0].weight: + # print(f"w {w.shape} {w.mean().item()} {w.var().item()}") + # print("从state_dict打印载入的参数") + # for k, v in dp.model.state_dict().items(): + # print(f"{k} {v.shape} {v.dtype} {v.mean().item()} {v.var().item()}") + # exit() + # for b in dp.model.descrpt.embedding_nets[0][0].bias: + # print(f"b {b.shape} {b.mean().item()} {b.var().item()}") + dp.model.eval() + from paddle.static import InputSpec + + st_model = paddle.jit.to_static( + dp.model, + input_spec=[ + InputSpec(shape=[None], dtype="float64"), # coord_ + InputSpec(shape=[None], dtype="int32"), # atype_ + InputSpec(shape=[4], dtype="int32"), # natoms + InputSpec(shape=[None], dtype="float64"), # box + InputSpec(shape=[6], dtype="int32"), # mesh + { + # "coord": InputSpec( + # shape=[2880], + # dtype="float64" + # ), + # "type": InputSpec( + # shape=[960], + # dtype="int32" + # ), + # "natoms_vec": InputSpec( + # shape=[4], + # dtype="int32" + # ), + "box": InputSpec(shape=[None], dtype="float64"), + # "default_mesh": InputSpec( + # shape=[6], + # dtype="int32" + # ), + }, + "", + False, + ], ) - - # Finally we serialize and dump the output graph to the filesystem - with tf.gfile.GFile(out_graph_name, "wb") as f: - f.write(output_graph_def.SerializeToString()) - log.info(f"{len(output_graph_def.node):d} ops in the final graph.") + paddle.jit.save(st_model, output) + print(f"Saved to path: {output}") def freeze_graph_multi( @@ -464,11 +506,12 @@ def freeze_graph_multi( def freeze( *, - checkpoint_folder: str, + # checkpoint_folder: str, + input_file: str, output: str, - node_names: Optional[str] = None, - nvnmd_weight: Optional[str] = None, - united_model: bool = False, + # node_names: Optional[str] = None, + # nvnmd_weight: Optional[str] = None, + # united_model: bool = False, **kwargs, ): """Freeze the graph in supplied folder. @@ -489,75 +532,77 @@ def freeze( other arguments """ # We retrieve our checkpoint fullpath - checkpoint = tf.train.get_checkpoint_state(checkpoint_folder) - input_checkpoint = checkpoint.model_checkpoint_path - - # expand the output file to full path - output_graph = abspath(output) - - # Before exporting our graph, we need to precise what is our output node - # This is how TF decides what part of the Graph he has to keep - # and what part it can dump - # NOTE: this variable is plural, because you can have multiple output nodes - # node_names = "energy_test,force_test,virial_test,t_rcut" - - # We clear devices to allow TensorFlow to control - # on which device it will load operations - clear_devices = True - - # We import the meta graph and retrieve a Saver - try: - # In case paralle training - import horovod.tensorflow as _ # noqa: F401 - except ImportError: - pass - saver = tf.train.import_meta_graph( - f"{input_checkpoint}.meta", clear_devices=clear_devices + # checkpoint = tf.train.get_checkpoint_state(checkpoint_folder) + # input_checkpoint = checkpoint.model_checkpoint_path + + # # expand the output file to full path + # output_graph = abspath(output) + + # # Before exporting our graph, we need to precise what is our output node + # # This is how TF decides what part of the Graph he has to keep + # # and what part it can dump + # # NOTE: this variable is plural, because you can have multiple output nodes + # # node_names = "energy_test,force_test,virial_test,t_rcut" + + # # We clear devices to allow TensorFlow to control + # # on which device it will load operations + # clear_devices = True + + # # We import the meta graph and retrieve a Saver + # try: + # # In case paralle training + # import horovod.tensorflow as _ # noqa: F401 + # except ImportError: + # pass + # saver = tf.train.import_meta_graph( + # f"{input_checkpoint}.meta", clear_devices=clear_devices + # ) + + # # We retrieve the protobuf graph definition + # graph = tf.get_default_graph() + # try: + # input_graph_def = graph.as_graph_def() + # except google.protobuf.message.DecodeError as e: + # raise GraphTooLargeError( + # "The graph size exceeds 2 GB, the hard limitation of protobuf." + # " Then a DecodeError was raised by protobuf. You should " + # "reduce the size of your model." + # ) from e + # nodes = [n.name for n in input_graph_def.node] + + # # We start a session and restore the graph weights + # with tf.Session() as sess: + # saver.restore(sess, input_checkpoint) + # model_type = run_sess(sess, "model_attr/model_type:0", feed_dict={}).decode( + # "utf-8" + # ) + # if "modifier_attr/type" in nodes: + # modifier_type = run_sess(sess, "modifier_attr/type:0", feed_dict={}).decode( + # "utf-8" + # ) + # else: + # modifier_type = None + # if nvnmd_weight is not None: + # save_weight(sess, nvnmd_weight) # nvnmd + # if model_type != "multi_task": + freeze_graph( + input_file, + output, + # sess, + # input_graph_def, + # nodes, + # model_type, + # modifier_type, + # output_graph, + # node_names, ) - - # We retrieve the protobuf graph definition - graph = tf.get_default_graph() - try: - input_graph_def = graph.as_graph_def() - except google.protobuf.message.DecodeError as e: - raise GraphTooLargeError( - "The graph size exceeds 2 GB, the hard limitation of protobuf." - " Then a DecodeError was raised by protobuf. You should " - "reduce the size of your model." - ) from e - nodes = [n.name for n in input_graph_def.node] - - # We start a session and restore the graph weights - with tf.Session() as sess: - saver.restore(sess, input_checkpoint) - model_type = run_sess(sess, "model_attr/model_type:0", feed_dict={}).decode( - "utf-8" - ) - if "modifier_attr/type" in nodes: - modifier_type = run_sess(sess, "modifier_attr/type:0", feed_dict={}).decode( - "utf-8" - ) - else: - modifier_type = None - if nvnmd_weight is not None: - save_weight(sess, nvnmd_weight) # nvnmd - if model_type != "multi_task": - freeze_graph( - sess, - input_graph_def, - nodes, - model_type, - modifier_type, - output_graph, - node_names, - ) - else: - freeze_graph_multi( - sess, - input_graph_def, - nodes, - modifier_type, - output_graph, - node_names, - united_model=united_model, - ) + # else: + # freeze_graph_multi( + # sess, + # input_graph_def, + # nodes, + # modifier_type, + # output_graph, + # node_names, + # united_model=united_model, + # ) diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 587bdaace7..ba008f9908 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -3,37 +3,23 @@ import argparse import logging import textwrap -from pathlib import ( - Path, -) -from typing import ( - List, - Optional, -) - -from deepmd import ( - __version__, -) -from deepmd.common import ( - clear_session, -) -from deepmd.entrypoints import ( - compress, - convert, - doc_train_input, - freeze, - make_model_devi, - neighbor_stat, - test, - train_dp, - transfer, -) -from deepmd.loggers import ( - set_log_handles, -) -from deepmd.nvnmd.entrypoints.train import ( - train_nvnmd, -) +from pathlib import Path +from typing import List +from typing import Optional + +from deepmd import __version__ +from deepmd.common import clear_session +from deepmd.entrypoints import compress +from deepmd.entrypoints import convert +from deepmd.entrypoints import doc_train_input +from deepmd.entrypoints import freeze +from deepmd.entrypoints import make_model_devi +from deepmd.entrypoints import neighbor_stat +from deepmd.entrypoints import test +from deepmd.entrypoints import train_dp +from deepmd.entrypoints import transfer +from deepmd.loggers import set_log_handles +from deepmd.nvnmd.entrypoints.train import train_nvnmd __all__ = ["main", "parse_args", "get_ll", "main_parser"] @@ -217,8 +203,8 @@ def main_parser() -> argparse.ArgumentParser: ), ) parser_frz.add_argument( - "-c", - "--checkpoint-folder", + "-i", + "--input_file", type=str, default=".", help="path to checkpoint folder", @@ -230,26 +216,26 @@ def main_parser() -> argparse.ArgumentParser: default="frozen_model.pb", help="name of graph, will output to the checkpoint folder", ) - parser_frz.add_argument( - "-n", - "--node-names", - type=str, - default=None, - help="the frozen nodes, if not set, determined from the model type", - ) - parser_frz.add_argument( - "-w", - "--nvnmd-weight", - type=str, - default=None, - help="the name of weight file (.npy), if set, save the model's weight into the file", - ) - parser_frz.add_argument( - "--united-model", - action="store_true", - default=False, - help="When in multi-task mode, freeze all nodes into one united model", - ) + # parser_frz.add_argument( + # "-n", + # "--node-names", + # type=str, + # default=None, + # help="the frozen nodes, if not set, determined from the model type", + # ) + # parser_frz.add_argument( + # "-w", + # "--nvnmd-weight", + # type=str, + # default=None, + # help="the name of weight file (.npy), if set, save the model's weight into the file", + # ) + # parser_frz.add_argument( + # "--united-model", + # action="store_true", + # default=False, + # help="When in multi-task mode, freeze all nodes into one united model", + # ) # * test script ******************************************************************** parser_tst = subparsers.add_parser( diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index 2ecc52ebe4..fde5139e1e 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -1,43 +1,27 @@ """Test trained DeePMD model.""" import logging -from pathlib import ( - Path, -) -from typing import ( - TYPE_CHECKING, - Dict, - List, - Optional, - Tuple, -) +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple import numpy as np -from deepmd import ( - DeepPotential, -) -from deepmd.common import ( - expand_sys_str, -) +from deepmd import DeepPotential +from deepmd.common import expand_sys_str from deepmd.utils import random as dp_random -from deepmd.utils.data import ( - DeepmdData, -) -from deepmd.utils.weight_avg import ( - weighted_average, -) +from deepmd.utils.data import DeepmdData +from deepmd.utils.weight_avg import weighted_average if TYPE_CHECKING: - from deepmd.infer import ( - DeepDipole, - DeepDOS, - DeepPolar, - DeepPot, - DeepWFC, - ) - from deepmd.infer.deep_tensor import ( - DeepTensor, - ) + from deepmd.infer import DeepDipole + from deepmd.infer import DeepDOS + from deepmd.infer import DeepPolar + from deepmd.infer import DeepPot + from deepmd.infer import DeepWFC + from deepmd.infer.deep_tensor import DeepTensor __all__ = ["test"] @@ -260,7 +244,7 @@ def test_ener( data.add("energy", 1, atomic=False, must=False, high_prec=True) data.add("force", 3, atomic=True, must=False, high_prec=False) data.add("virial", 9, atomic=False, must=False, high_prec=False) - if dp.has_efield: + if dp.has_efield: # False data.add("efield", 3, atomic=True, must=True, high_prec=False) if has_atom_ener: data.add("atom_ener", 1, atomic=True, must=True, high_prec=False) @@ -298,6 +282,13 @@ def test_ener( else: aparam = None + # print(type(coord)) + # print(type(box)) + # print(type(atype)) + # np.save("/workspace/hesensen/deepmd_backend/infer_align/coord_pd.npy", coord) + # np.save("/workspace/hesensen/deepmd_backend/infer_align/box_pd.npy", box) + # np.save("/workspace/hesensen/deepmd_backend/infer_align/atype_pd.npy", atype) + # exit() ret = dp.eval( coord, box, @@ -341,6 +332,40 @@ def test_ener( )[1] diff_e = energy - test_data["energy"][:numb_test].reshape([-1, 1]) + # print(energy) + """ + [[-29857.71310608] + [-29863.80820815] + [-29860.15135615] + [-29854.51192426] + [-29863.13812543] + [-29855.93205087] + [-29855.50978599] + [-29865.49989375] + [-29859.1466963 ] + [-29857.09336879] + [-29862.98884167] + [-29859.11198703] + [-29861.66000458] + [-29861.923259 ] + [-29865.03699558] + [-29860.04100619] + [-29858.07084488] + [-29865.77369217] + [-29856.55031266] + [-29856.55155207] + [-29855.50095994] + [-29855.1020719 ] + [-29855.39086308] + [-29863.13015616] + [-29858.15176772] + [-29860.35238411] + [-29855.99364597] + [-29862.08350903] + [-29861.07073953] + [-29862.65406131]] + """ + # exit() mae_e = mae(diff_e) rmse_e = rmse(diff_e) diff_f = force - test_data["force"][:numb_test] diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index c806fb3804..05c1af4b8b 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -6,57 +6,31 @@ import json import logging import time -from typing import ( - Any, - Dict, - Optional, -) - -from deepmd.common import ( - data_requirement, - expand_sys_str, - j_loader, - j_must_have, -) -from deepmd.env import ( - GLOBAL_ENER_FLOAT_PRECISION, - reset_default_tf_session_config, - tf, -) -from deepmd.infer.data_modifier import ( - DipoleChargeModifier, -) -from deepmd.train.run_options import ( - BUILD, - CITATION, - WELCOME, - RunOptions, -) -from deepmd.train.trainer import ( - DPTrainer, -) +from typing import Any +from typing import Dict +from typing import Optional + +from deepmd.common import data_requirement +from deepmd.common import expand_sys_str +from deepmd.common import j_loader +from deepmd.common import j_must_have +from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION +from deepmd.env import reset_default_tf_session_config +from deepmd.env import tf +from deepmd.infer.data_modifier import DipoleChargeModifier +from deepmd.train.run_options import BUILD +from deepmd.train.run_options import CITATION +from deepmd.train.run_options import WELCOME +from deepmd.train.run_options import RunOptions +from deepmd.train.trainer import DPTrainer from deepmd.utils import random as dp_random -from deepmd.utils.argcheck import ( - normalize, -) -from deepmd.utils.compat import ( - update_deepmd_input, -) -from deepmd.utils.data_system import ( - DeepmdDataSystem, -) -from deepmd.utils.finetune import ( - replace_model_params_with_pretrained_model, -) -from deepmd.utils.multi_init import ( - replace_model_params_with_frz_multi_model, -) -from deepmd.utils.neighbor_stat import ( - NeighborStat, -) -from deepmd.utils.path import ( - DPPath, -) +from deepmd.utils.argcheck import normalize +from deepmd.utils.compat import update_deepmd_input +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.finetune import replace_model_params_with_pretrained_model +from deepmd.utils.multi_init import replace_model_params_with_frz_multi_model +from deepmd.utils.neighbor_stat import NeighborStat +from deepmd.utils.path import DPPath __all__ = ["train"] @@ -202,7 +176,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal dp_random.seed(seed) # setup data modifier - modifier = get_modifier(jdata["model"].get("modifier", None)) + modifier = get_modifier(jdata["model"].get("modifier", None)) # None # check the multi-task mode multi_task_mode = "fitting_net_dict" in jdata["model"] @@ -270,12 +244,13 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal origin_type_map = get_data( jdata["training"]["training_data"], rcut, None, modifier ).get_type_map() + print("model.build") model.build(train_data, stop_batch, origin_type_map=origin_type_map) if not is_compress: # train the model with the provided systems in a cyclic way start_time = time.time() - model.train(train_data, valid_data) + model.train(train_data, valid_data, stop_batch) end_time = time.time() log.info("finished training") log.info(f"wall time: {(end_time - start_time):.3f} s") @@ -371,7 +346,7 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False): if type_map and len(type_map) == 0: type_map = None multi_task_mode = "data_dict" in jdata["training"] - if not multi_task_mode: + if not multi_task_mode: # here train_data = get_data( jdata["training"]["training_data"], max_rcut, type_map, None ) @@ -411,17 +386,20 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False): neistat = NeighborStat(ntypes, rcut, one_type=one_type) - min_nbor_dist, max_nbor_size = neistat.get_stat(train_data) + min_nbor_dist, max_nbor_size = neistat.get_stat( + train_data + ) # 0.8854385688525511, [38 72] + # paddle: 0.8854385614395142 [38 72] # moved from traier.py as duplicated # TODO: this is a simple fix but we should have a clear # architecture to call neighbor stat - tf.constant( - min_nbor_dist, - name="train_attr/min_nbor_dist", - dtype=GLOBAL_ENER_FLOAT_PRECISION, - ) - tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32) + # tf.constant( + # min_nbor_dist, + # name="train_attr/min_nbor_dist", + # dtype=GLOBAL_ENER_FLOAT_PRECISION, + # ) + # tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32) return min_nbor_dist, max_nbor_size @@ -467,8 +445,10 @@ def update_one_sel(jdata, descriptor): if descriptor["type"] == "loc_frame": return descriptor rcut = descriptor["rcut"] - tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",)) - sel = descriptor["sel"] + tmp_sel = get_sel( + jdata, rcut, one_type=descriptor["type"] in ("se_atten",) + ) # [38 72],每个原子截断半径内,最多的邻域原子个数 + sel = descriptor["sel"] # [46, 92] if isinstance(sel, int): # convert to list and finnally convert back to int sel = [sel] @@ -486,6 +466,25 @@ def update_one_sel(jdata, descriptor): "not less than %d, but you set it to %d. The accuracy" " of your model may get worse." % (ii, tt, dd) ) + """ + descriptor: + { + 'type': 'se_e2_a', + 'sel': [46, 92], + 'rcut_smth': 0.5, + 'rcut': 6.0, + 'neuron': [25, 50, 100], + 'resnet_dt': False, + 'axis_neuron': 16, + 'seed': 1, + 'activation_function': 'tanh', + 'type_one_side': False, + 'precision': 'default', + 'trainable': True, + 'exclude_types': [], + 'set_davg_zero': False + } + """ if descriptor["type"] in ("se_atten",): descriptor["sel"] = sel = sum(sel) return descriptor @@ -499,7 +498,7 @@ def update_sel(jdata): if descrpt_data["type"] == "hybrid": for ii in range(len(descrpt_data["list"])): descrpt_data["list"][ii] = update_one_sel(jdata, descrpt_data["list"][ii]) - else: + else: # here descrpt_data = update_one_sel(jdata, descrpt_data) jdata["model"]["descriptor"] = descrpt_data return jdata diff --git a/deepmd/env.py b/deepmd/env.py index 2917fff1e8..a34fa32897 100644 --- a/deepmd/env.py +++ b/deepmd/env.py @@ -4,32 +4,20 @@ import logging import os import platform -from configparser import ( - ConfigParser, -) -from importlib import ( - import_module, - reload, -) -from pathlib import ( - Path, -) -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Tuple, -) +from configparser import ConfigParser +from importlib import import_module +from importlib import reload +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Any +from typing import Dict +from typing import Tuple import numpy as np -from packaging.version import ( - Version, -) +from packaging.version import Version if TYPE_CHECKING: - from types import ( - ModuleType, - ) + from types import ModuleType def dlopen_library(module: str, filename: str): @@ -67,6 +55,7 @@ def dlopen_library(module: str, filename: str): # import tensorflow v1 compatability try: + import paddle import tensorflow.compat.v1 as tf tf.disable_v2_behavior() @@ -105,6 +94,7 @@ def dlopen_library(module: str, filename: str): # Python library version try: tf_py_version = tf.version.VERSION + pd_py_version = paddle.version.commit except AttributeError: tf_py_version = tf.__version__ @@ -370,7 +360,26 @@ def get_module(module_name: str) -> "ModuleType": raise FileNotFoundError(f"module {module_name} does not exist") else: try: - module = tf.load_op_library(str(module_file)) + # module = tf.load_op_library(str(module_file)) + import paddle_deepmd_lib + from paddle.utils import cpp_extension + + # module = cpp_extension.load( + # name="paddle_custom_ops", + # sources=[ + # "/workspace/hesensen/deepmd-kit/source/op/paddle/neighbor_stat.cc", + # "/workspace/hesensen/deepmd-kit/source/op/paddle/prod_env_mat.cc", + # ], + # extra_include_paths=[ + # "/workspace/hesensen/deepmd-kit/source/lib/include/", + # "/usr/local/cuda/lib64/", + # "/workspace/hesensen/deepmd-kit/source/op/paddle/cub/", + # "/usr/local/cuda/include/" + # ], + # verbose=True, + # ) + module = paddle_deepmd_lib + except tf.errors.NotFoundError as e: # check CXX11_ABI_FLAG is compatiblity # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html @@ -452,9 +461,9 @@ def _get_package_constants( GLOBAL_CONFIG = _get_package_constants() -MODEL_VERSION = GLOBAL_CONFIG["model_version"] -TF_VERSION = GLOBAL_CONFIG["tf_version"] -TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"]) +MODEL_VERSION = 0 +TF_VERSION = 0 +TF_CXX11_ABI_FLAG = 0 op_module = get_module("deepmd_op") op_grads_module = get_module("op_grads") @@ -464,11 +473,13 @@ def _get_package_constants( if dp_float_prec in ("high", ""): # default is high GLOBAL_TF_FLOAT_PRECISION = tf.float64 + GLOBAL_PD_FLOAT_PRECISION = paddle.float64 GLOBAL_NP_FLOAT_PRECISION = np.float64 GLOBAL_ENER_FLOAT_PRECISION = np.float64 global_float_prec = "double" elif dp_float_prec == "low": GLOBAL_TF_FLOAT_PRECISION = tf.float32 + GLOBAL_PD_FLOAT_PRECISION = paddle.float32 GLOBAL_NP_FLOAT_PRECISION = np.float32 GLOBAL_ENER_FLOAT_PRECISION = np.float64 global_float_prec = "float" @@ -496,17 +507,33 @@ def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor: return tf.cast(xx, GLOBAL_TF_FLOAT_PRECISION) -def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor: +def global_cvt_2_pd_float(xx: paddle.Tensor) -> paddle.Tensor: + """Cast tensor to globally set TF precision. + + Parameters + ---------- + xx : paddle.Tensor + input tensor + + Returns + ------- + paddle.Tensor + output tensor cast to `GLOBAL_TF_FLOAT_PRECISION` + """ + return paddle.cast(xx, GLOBAL_PD_FLOAT_PRECISION) + + +def global_cvt_2_ener_float(xx: paddle.Tensor) -> paddle.Tensor: """Cast tensor to globally set energy precision. Parameters ---------- - xx : tf.Tensor + xx : paddle.Tensor input tensor Returns ------- - tf.Tensor + paddle.Tensor output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION` """ - return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION) + return paddle.cast(xx, GLOBAL_ENER_FLOAT_PRECISION) diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py index f482173495..036bdc54f9 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/fit/ener.py @@ -1,54 +1,37 @@ import logging -from typing import ( - List, - Optional, -) +from typing import List +from typing import Optional import numpy as np - -from deepmd.common import ( - add_data_requirement, - cast_precision, - get_activation_func, - get_precision, -) -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - global_cvt_2_tf_float, - tf, -) -from deepmd.fit.fitting import ( - Fitting, -) -from deepmd.infer import ( - DeepPotential, -) -from deepmd.nvnmd.fit.ener import ( - one_layer_nvnmd, -) -from deepmd.nvnmd.utils.config import ( - nvnmd_cfg, -) -from deepmd.utils.errors import ( - GraphWithoutTensorError, -) -from deepmd.utils.graph import ( - get_fitting_net_variables_from_graph_def, - get_tensor_by_name_from_graph, -) +from paddle import nn + +from deepmd.common import add_data_requirement +from deepmd.common import cast_precision +from deepmd.common import get_activation_func +from deepmd.common import get_precision +from deepmd.env import GLOBAL_PD_FLOAT_PRECISION +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import global_cvt_2_pd_float +from deepmd.env import global_cvt_2_tf_float +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.fit.fitting import Fitting +from deepmd.infer import DeepPotential +from deepmd.nvnmd.fit.ener import one_layer_nvnmd +from deepmd.nvnmd.utils.config import nvnmd_cfg +from deepmd.utils.errors import GraphWithoutTensorError +from deepmd.utils.graph import get_fitting_net_variables_from_graph_def +from deepmd.utils.graph import get_tensor_by_name_from_graph +from deepmd.utils.network import OneLayer as OneLayer_deepmd from deepmd.utils.network import one_layer as one_layer_deepmd -from deepmd.utils.network import ( - one_layer_rand_seed_shift, -) -from deepmd.utils.spin import ( - Spin, -) +from deepmd.utils.network import one_layer_rand_seed_shift +from deepmd.utils.spin import Spin log = logging.getLogger(__name__) -@Fitting.register("ener") -class EnerFitting(Fitting): +# @Fitting.register("ener") +class EnerFitting(nn.Layer): r"""Fitting the energy of the system. The force and the virial can also be trained. The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`: @@ -121,7 +104,7 @@ class EnerFitting(Fitting): def __init__( self, - descrpt: tf.Tensor, + descrpt: paddle.Tensor, neuron: List[int] = [120, 120, 120], resnet_dt: bool = True, numb_fparam: int = 0, @@ -138,6 +121,7 @@ def __init__( use_aparam_as_mask: bool = False, spin: Optional[Spin] = None, ) -> None: + super().__init__(name_scope="EnerFitting") """Constructor.""" # model param self.ntypes = descrpt.get_ntypes() @@ -180,13 +164,15 @@ def __init__( self.atom_ener_v = atom_ener for at, ae in enumerate(atom_ener): if ae is not None: - self.atom_ener.append( - tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name="atom_%d_ener" % at) - ) + self.atom_ener.append(paddle.to_tensor(ae, GLOBAL_PD_FLOAT_PRECISION)) else: self.atom_ener.append(None) self.useBN = False self.bias_atom_e = np.zeros(self.ntypes, dtype=np.float64) + self.register_buffer( + "t_bias_atom_e", + paddle.to_tensor(self.bias_atom_e), + ) # data requirement if self.numb_fparam > 0: add_data_requirement( @@ -212,6 +198,96 @@ def __init__( len(self.layer_name) == len(self.n_neuron) + 1 ), "length of layer_name should be that of n_neuron + 1" + type_suffix = "" + suffix = "" + self.one_layers = nn.LayerList() + self.final_layers = nn.LayerList() + ntypes_atom = self.ntypes - self.ntypes_spin + for type_i in range(0, ntypes_atom): + type_i_layers = nn.LayerList() + for ii in range(0, len(self.n_neuron)): + if self.layer_name is not None and self.layer_name[ii] is not None: + layer_suffix = "share_" + self.layer_name[ii] + type_suffix + else: + layer_suffix = "layer_" + str(ii) + type_suffix + suffix + + if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]: + type_i_layers.append( + OneLayer_deepmd( + self.n_neuron[ii - 1], + self.n_neuron[ii], + activation_fn=self.fitting_activation_fn, + precision=self.fitting_precision, + name=layer_suffix, + seed=self.seed, + use_timestep=self.resnet_dt, + trainable=self.trainable[ii], + ) + ) + else: + type_i_layers.append( + OneLayer_deepmd( + self.dim_descrpt + self.numb_fparam + self.numb_aparam, + self.n_neuron[ii], + activation_fn=self.fitting_activation_fn, + precision=self.fitting_precision, + name=layer_suffix, + seed=self.seed, + trainable=self.trainable[ii], + ) + ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift + + self.one_layers.append(type_i_layers) + self.final_layers.append( + OneLayer_deepmd( + self.n_neuron[-1], + 1, + activation_fn=None, + precision=self.fitting_precision, + bavg=self.bias_atom_e, + name=layer_suffix, + seed=self.seed, + trainable=self.trainable[-1], + ) + ) + + # print("create bias_atom_e", self.bias_atom_e.shape, self.bias_atom_e) + # self.register_buffer( + # "t_bias_atom_e", + # paddle.to_tensor(self.bias_atom_e), + # ) + if self.numb_fparam > 0: + if self.fparam_avg is None: + self.fparam_avg = 0.0 + if self.fparam_inv_std is None: + self.fparam_inv_std = 1.0 + if self.numb_aparam > 0: + if self.aparam_avg is None: + self.aparam_avg = 0.0 + if self.aparam_inv_std is None: + self.aparam_inv_std = 1.0 + + if self.numb_fparam > 0: + self.register_buffer( + "t_fparam_avg", + paddle.to_tensor(self.fparam_avg), + ) + self.register_buffer( + "t_fparam_istd", + paddle.to_tensor(self.fparam_inv_std), + ) + if self.numb_aparam > 0: + self.register_buffer( + "t_aparam_avg", + paddle.to_tensor(self.aparam_avg), + ) + self.register_buffer( + "t_aparam_istd", + paddle.to_tensor(self.aparam_inv_std), + ) + def get_numb_fparam(self) -> int: """Get the number of frame parameters.""" return self.numb_fparam @@ -237,6 +313,11 @@ def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None self.bias_atom_e = self._compute_output_stats( all_stat, rcond=self.rcond, mixed_type=mixed_type ) + paddle.assign(self.bias_atom_e, self.t_bias_atom_e) + # self.register_buffer( + # "t_bias_atom_e", + # paddle.to_tensor(self.bias_atom_e), + # ) def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False): data = all_stat["energy"] @@ -335,7 +416,7 @@ def compute_input_stats(self, all_stat: dict, protection: float = 1e-2) -> None: def _compute_std(self, sumv2, sumv, sumn): return np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn)) - @cast_precision + # @cast_precision def _build_lower( self, start_index, @@ -346,103 +427,91 @@ def _build_lower( bias_atom_e=0.0, type_suffix="", suffix="", - reuse=None, + # reuse=None, + type_i=None, ): # cut-out inputs - inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1]) - inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt]) + inputs_i = paddle.slice( + inputs, + [0, 1, 2], + [0, start_index, 0], + [inputs.shape[0], start_index + natoms, inputs.shape[2]], + ) + inputs_i = paddle.reshape(inputs_i, [-1, self.dim_descrpt]) layer = inputs_i if fparam is not None: - ext_fparam = tf.tile(fparam, [1, natoms]) - ext_fparam = tf.reshape(ext_fparam, [-1, self.numb_fparam]) - ext_fparam = tf.cast(ext_fparam, self.fitting_precision) - layer = tf.concat([layer, ext_fparam], axis=1) + ext_fparam = paddle.tile(fparam, [1, natoms]) + ext_fparam = paddle.reshape(ext_fparam, [-1, self.numb_fparam]) + ext_fparam = paddle.cast(ext_fparam, self.fitting_precision) + layer = paddle.concat([layer, ext_fparam], axis=1) if aparam is not None: - ext_aparam = tf.slice( + ext_aparam = paddle.slice( aparam, + [0, 1], [0, start_index * self.numb_aparam], - [-1, natoms * self.numb_aparam], + [ + aparam.shape[0], + start_index * self.numb_aparam + natoms * self.numb_aparam, + ], ) - ext_aparam = tf.reshape(ext_aparam, [-1, self.numb_aparam]) - ext_aparam = tf.cast(ext_aparam, self.fitting_precision) - layer = tf.concat([layer, ext_aparam], axis=1) - - if nvnmd_cfg.enable: - one_layer = one_layer_nvnmd - else: - one_layer = one_layer_deepmd + ext_aparam = paddle.reshape(ext_aparam, [-1, self.numb_aparam]) + ext_aparam = paddle.cast(ext_aparam, self.fitting_precision) + layer = paddle.concat([layer, ext_aparam], axis=1) + + # if nvnmd_cfg.enable: + # one_layer = one_layer_nvnmd + # else: + # one_layer = one_layer_deepmd for ii in range(0, len(self.n_neuron)): - if self.layer_name is not None and self.layer_name[ii] is not None: - layer_suffix = "share_" + self.layer_name[ii] + type_suffix - layer_reuse = tf.AUTO_REUSE - else: - layer_suffix = "layer_" + str(ii) + type_suffix + suffix - layer_reuse = reuse + # if self.layer_name is not None and self.layer_name[ii] is not None: + # layer_suffix = "share_" + self.layer_name[ii] + type_suffix + # layer_reuse = tf.AUTO_REUSE + # else: + # layer_suffix = "layer_" + str(ii) + type_suffix + suffix + # layer_reuse = reuse if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]: - layer += one_layer( - layer, - self.n_neuron[ii], - name=layer_suffix, - reuse=layer_reuse, - seed=self.seed, - use_timestep=self.resnet_dt, - activation_fn=self.fitting_activation_fn, - precision=self.fitting_precision, - trainable=self.trainable[ii], - uniform_seed=self.uniform_seed, - initial_variables=self.fitting_net_variables, - mixed_prec=self.mixed_prec, - ) + layer += self.one_layers[type_i][ii](layer) else: - layer = one_layer( - layer, - self.n_neuron[ii], - name=layer_suffix, - reuse=layer_reuse, - seed=self.seed, - activation_fn=self.fitting_activation_fn, - precision=self.fitting_precision, - trainable=self.trainable[ii], - uniform_seed=self.uniform_seed, - initial_variables=self.fitting_net_variables, - mixed_prec=self.mixed_prec, - ) + layer = self.one_layers[type_i][ii](layer) + # print(f"use {ii} of {len(self.one_layers)}_{type_i}") + # if (not self.uniform_seed) and (self.seed is not None): + # self.seed += self.seed_shift + # if self.layer_name is not None and self.layer_name[-1] is not None: + # layer_suffix = "share_" + self.layer_name[-1] + type_suffix + # layer_reuse = tf.AUTO_REUSE + # else: + # layer_suffix = "final_layer" + type_suffix + suffix + # layer_reuse = reuse if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift - if self.layer_name is not None and self.layer_name[-1] is not None: - layer_suffix = "share_" + self.layer_name[-1] + type_suffix - layer_reuse = tf.AUTO_REUSE - else: - layer_suffix = "final_layer" + type_suffix + suffix - layer_reuse = reuse - final_layer = one_layer( + final_layer = self.final_layers[type_i]( layer, - 1, - activation_fn=None, - bavg=bias_atom_e, - name=layer_suffix, - reuse=layer_reuse, - seed=self.seed, - precision=self.fitting_precision, - trainable=self.trainable[-1], - uniform_seed=self.uniform_seed, - initial_variables=self.fitting_net_variables, - mixed_prec=self.mixed_prec, - final_layer=True, + # 1, + # activation_fn=None, + # bavg=bias_atom_e, + # name=layer_suffix, + # reuse=layer_reuse, + # seed=self.seed, + # precision=self.fitting_precision, + # trainable=self.trainable[-1], + # uniform_seed=self.uniform_seed, + # initial_variables=self.fitting_net_variables, + # mixed_prec=self.mixed_prec, + # final_layer=True, ) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift return final_layer - def build( + def forward( self, - inputs: tf.Tensor, - natoms: tf.Tensor, + inputs: paddle.Tensor, + natoms: paddle.Tensor, input_dict: Optional[dict] = None, reuse: Optional[bool] = None, suffix: str = "", - ) -> tf.Tensor: + ) -> paddle.Tensor: """Build the computational graph for fitting net. Parameters @@ -504,59 +573,18 @@ def build( self.bias_atom_e[type_i] = self.bias_atom_e[type_i] self.bias_atom_e = self.bias_atom_e[:ntypes_atom] - with tf.variable_scope("fitting_attr" + suffix, reuse=reuse): - t_dfparam = tf.constant(self.numb_fparam, name="dfparam", dtype=tf.int32) - t_daparam = tf.constant(self.numb_aparam, name="daparam", dtype=tf.int32) - self.t_bias_atom_e = tf.get_variable( - "t_bias_atom_e", - self.bias_atom_e.shape, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(self.bias_atom_e), - ) - if self.numb_fparam > 0: - t_fparam_avg = tf.get_variable( - "t_fparam_avg", - self.numb_fparam, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(self.fparam_avg), - ) - t_fparam_istd = tf.get_variable( - "t_fparam_istd", - self.numb_fparam, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(self.fparam_inv_std), - ) - if self.numb_aparam > 0: - t_aparam_avg = tf.get_variable( - "t_aparam_avg", - self.numb_aparam, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(self.aparam_avg), - ) - t_aparam_istd = tf.get_variable( - "t_aparam_istd", - self.numb_aparam, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(self.aparam_inv_std), - ) - - inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) + inputs = paddle.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) if len(self.atom_ener): # only for atom_ener nframes = input_dict.get("nframes") if nframes is not None: # like inputs, but we don't want to add a dependency on inputs - inputs_zero = tf.zeros( + inputs_zero = paddle.zeros( (nframes, natoms[0], self.dim_descrpt), - dtype=GLOBAL_TF_FLOAT_PRECISION, + dtype=GLOBAL_PD_FLOAT_PRECISION, ) else: - inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION) + inputs_zero = paddle.zeros_like(inputs, dtype=GLOBAL_PD_FLOAT_PRECISION) if bias_atom_e is not None: assert len(bias_atom_e) == self.ntypes @@ -564,37 +592,42 @@ def build( fparam = None if self.numb_fparam > 0: fparam = input_dict["fparam"] - fparam = tf.reshape(fparam, [-1, self.numb_fparam]) - fparam = (fparam - t_fparam_avg) * t_fparam_istd + fparam = paddle.reshape(fparam, [-1, self.numb_fparam]) + fparam = (fparam - self.t_fparam_avg) * self.t_fparam_istd aparam = None if not self.use_aparam_as_mask: if self.numb_aparam > 0: aparam = input_dict["aparam"] - aparam = tf.reshape(aparam, [-1, self.numb_aparam]) - aparam = (aparam - t_aparam_avg) * t_aparam_istd - aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]]) + aparam = paddle.reshape(aparam, [-1, self.numb_aparam]) + aparam = (aparam - self.t_aparam_avg) * self.t_aparam_istd + aparam = paddle.reshape(aparam, [-1, self.numb_aparam * natoms[0]]) - atype_nall = tf.reshape(atype, [-1, natoms[1]]) - self.atype_nloc = tf.slice( - atype_nall, [0, 0], [-1, natoms[0]] + atype_nall = paddle.reshape(atype, [-1, natoms[1]]) + self.atype_nloc = paddle.slice( + atype_nall, [0, 1], [0, 0], [atype_nall.shape[0], natoms[0]] ) ## lammps will make error - atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION) - self.atype_nloc = tf.reshape(self.atype_nloc, [-1]) + atype_filter = paddle.cast(self.atype_nloc >= 0, GLOBAL_PD_FLOAT_PRECISION) + self.atype_nloc = paddle.reshape(self.atype_nloc, [-1]) # prevent embedding_lookup error, # but the filter will be applied anyway - self.atype_nloc = tf.clip_by_value(self.atype_nloc, 0, self.ntypes - 1) + self.atype_nloc = paddle.clip(self.atype_nloc, 0, self.ntypes - 1) ## if spin is used if self.spin is not None: - self.atype_nloc = tf.slice( - atype_nall, [0, 0], [-1, tf.reduce_sum(natoms[2 : 2 + ntypes_atom])] + self.atype_nloc = paddle.slice( + atype_nall, + [0, 1], + [0, 0], + [-1, paddle.sum(natoms[2 : 2 + ntypes_atom]).item()], ) - atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION) - self.atype_nloc = tf.reshape(self.atype_nloc, [-1]) + atype_filter = paddle.cast(self.atype_nloc >= 0, GLOBAL_PD_FLOAT_PRECISION) + self.atype_nloc = paddle.reshape(self.atype_nloc, [-1]) if type_embedding is not None: - atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc) + atype_embed = paddle.nn.functional.embedding( + self.atype_nloc, type_embedding + ) else: atype_embed = None @@ -604,6 +637,18 @@ def build( start_index = 0 outs_list = [] for type_i in range(ntypes_atom): + # final_layer = inputs + # for layer_j in range(type_i * ntypes_atom, (type_i + 1) * ntypes_atom): + # final_layer = self.one_layers[layer_j](final_layer) + # final_layer = self.final_layers[type_i](final_layer) + # print(final_layer.shape) + + # # concat the results + # if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None: + # zero_layer = inputs_zero + # for layer_j in range(type_i * ntypes_atom, (type_i + 1) * ntypes_atom): + # zero_layer = self.one_layers[layer_j](zero_layer) + # zero_layer = self.final_layers[type_i](zero_layer) final_layer = self._build_lower( start_index, natoms[2 + type_i], @@ -613,7 +658,8 @@ def build( bias_atom_e=0.0, type_suffix="_type_" + str(type_i), suffix=suffix, - reuse=reuse, + # reuse=reuse, + type_i=type_i, ) # concat the results if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None: @@ -626,82 +672,80 @@ def build( bias_atom_e=0.0, type_suffix="_type_" + str(type_i), suffix=suffix, - reuse=True, + # reuse=True, + type_i=type_i, ) final_layer -= zero_layer - final_layer = tf.reshape( - final_layer, [tf.shape(inputs)[0], natoms[2 + type_i]] + final_layer = paddle.reshape( + final_layer, [paddle.shape(inputs)[0], natoms[2 + type_i]] ) outs_list.append(final_layer) start_index += natoms[2 + type_i] # concat the results # concat once may be faster than multiple concat - outs = tf.concat(outs_list, axis=1) + outs = paddle.concat(outs_list, axis=1) # with type embedding else: - atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) - type_shape = atype_embed.get_shape().as_list() - inputs = tf.concat( - [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 + atype_embed = paddle.cast(atype_embed, GLOBAL_PD_FLOAT_PRECISION) + type_shape = atype_embed.shape + inputs = paddle.concat( + [paddle.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 ) original_dim_descrpt = self.dim_descrpt self.dim_descrpt = self.dim_descrpt + type_shape[1] - inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) - final_layer = self._build_lower( - 0, - natoms[0], - inputs, - fparam, - aparam, - bias_atom_e=0.0, - suffix=suffix, - reuse=reuse, - ) + inputs = paddle.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) + final_layer = inputs + for layer_j in range(0 * ntypes_atom, (0 + 1) * ntypes_atom): + final_layer = self.one_layers[layer_j](final_layer) + final_layer = self.final_layers[0](final_layer) if len(self.atom_ener): # remove contribution in vacuum - inputs_zero = tf.concat( - [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed], + inputs_zero = paddle.concat( + [ + paddle.reshape(inputs_zero, [-1, original_dim_descrpt]), + atype_embed, + ], axis=1, ) - inputs_zero = tf.reshape(inputs_zero, [-1, natoms[0], self.dim_descrpt]) - zero_layer = self._build_lower( - 0, - natoms[0], - inputs_zero, - fparam, - aparam, - bias_atom_e=0.0, - suffix=suffix, - reuse=True, + inputs_zero = paddle.reshape( + inputs_zero, [-1, natoms[0], self.dim_descrpt] ) - # atomic energy will be stored in `self.t_bias_atom_e` which is not trainable + zero_layer = inputs_zero + for layer_j in range(0 * ntypes_atom, (0 + 1) * ntypes_atom): + zero_layer = self.one_layers[layer_j](zero_layer) + zero_layer = self.final_layers[0](zero_layer) + final_layer -= zero_layer - outs = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[0]]) + outs = paddle.reshape(final_layer, [paddle.shape(inputs)[0], natoms[0]]) # add bias self.atom_ener_before = outs * atype_filter - self.add_type = tf.reshape( - tf.nn.embedding_lookup(self.t_bias_atom_e, self.atype_nloc), - [tf.shape(inputs)[0], tf.reduce_sum(natoms[2 : 2 + ntypes_atom])], + self.add_type = paddle.reshape( + paddle.nn.functional.embedding( + self.atype_nloc, self.t_bias_atom_e.reshape([2, -1]) + ), + [paddle.shape(inputs)[0], paddle.sum(natoms[2 : 2 + ntypes_atom]).item()], ) + # print(__file__, self.t_bias_atom_e) + # exit() outs = outs + self.add_type outs *= atype_filter self.atom_ener_after = outs if self.tot_ener_zero: force_tot_ener = 0.0 - outs = tf.reshape(outs, [-1, tf.reduce_sum(natoms[2 : 2 + ntypes_atom])]) - outs_mean = tf.reshape(tf.reduce_mean(outs, axis=1), [-1, 1]) - outs_mean = outs_mean - tf.ones_like( - outs_mean, dtype=GLOBAL_TF_FLOAT_PRECISION + outs = paddle.reshape( + outs, [-1, paddle.sum(natoms[2 : 2 + ntypes_atom]).item()] + ) + outs_mean = paddle.reshape(paddle.mean(outs, axis=1), [-1, 1]) + outs_mean = outs_mean - paddle.ones_like( + outs_mean, dtype=GLOBAL_PD_FLOAT_PRECISION ) * ( force_tot_ener - / global_cvt_2_tf_float(tf.reduce_sum(natoms[2 : 2 + ntypes_atom])) + / global_cvt_2_pd_float(paddle.sum(natoms[2 : 2 + ntypes_atom])) ) outs = outs - outs_mean - outs = tf.reshape(outs, [-1]) - - tf.summary.histogram("fitting_net_output", outs) - return tf.reshape(outs, [-1]) + outs = paddle.reshape(outs, [-1]) + return paddle.reshape(outs, [-1]) def init_variables( self, diff --git a/deepmd/fit/ener_tf.py b/deepmd/fit/ener_tf.py new file mode 100644 index 0000000000..1f77d2fdb6 --- /dev/null +++ b/deepmd/fit/ener_tf.py @@ -0,0 +1,888 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import List +from typing import Optional + +import numpy as np + +from deepmd.common import add_data_requirement +from deepmd.common import cast_precision +from deepmd.common import get_activation_func +from deepmd.common import get_precision +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import global_cvt_2_tf_float +from deepmd.env import tf +from deepmd.fit.fitting import Fitting +from deepmd.infer import DeepPotential +from deepmd.loss.ener import EnerDipoleLoss +from deepmd.loss.ener import EnerSpinLoss +from deepmd.loss.ener import EnerStdLoss +from deepmd.loss.loss import Loss +from deepmd.nvnmd.fit.ener import one_layer_nvnmd +from deepmd.nvnmd.utils.config import nvnmd_cfg +from deepmd.utils.errors import GraphWithoutTensorError +from deepmd.utils.graph import get_fitting_net_variables_from_graph_def +from deepmd.utils.graph import get_tensor_by_name_from_graph +from deepmd.utils.network import one_layer as one_layer_deepmd +from deepmd.utils.network import one_layer_rand_seed_shift +from deepmd.utils.spin import Spin + +log = logging.getLogger(__name__) + + +@Fitting.register("ener") +class EnerFitting(Fitting): + r"""Fitting the energy of the system. The force and the virial can also be trained. + + The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`: + + .. math:: + E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)} + \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)} + + The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by + + .. math:: + \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})= + \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}) + + where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` + is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and + :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, + both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}` + is the activation function. + + The output layer :math:`\mathcal{L}^{(n)}` is given by + + .. math:: + \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})= + \mathbf{x}^T\mathbf{w}+\mathbf{b} + + where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` + is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and + :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively, + both of which are trainable if `trainable[n]` is `True`. + + Parameters + ---------- + descrpt + The descrptor :math:`\mathcal{D}` + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + seed + Random seed for initializing the network parameters. + atom_ener + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + uniform_seed + Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + """ + + def __init__( + self, + descrpt: tf.Tensor, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + seed: Optional[int] = None, + atom_ener: List[float] = [], + activation_function: str = "tanh", + precision: str = "default", + uniform_seed: bool = False, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Optional[Spin] = None, + **kwargs, + ) -> None: + """Constructor.""" + # model param + self.ntypes = descrpt.get_ntypes() + self.dim_descrpt = descrpt.get_dim_out() + self.use_aparam_as_mask = use_aparam_as_mask + # args = ()\ + # .add('numb_fparam', int, default = 0)\ + # .add('numb_aparam', int, default = 0)\ + # .add('neuron', list, default = [120,120,120], alias = 'n_neuron')\ + # .add('resnet_dt', bool, default = True)\ + # .add('rcond', float, default = 1e-3) \ + # .add('tot_ener_zero', bool, default = False) \ + # .add('seed', int) \ + # .add('atom_ener', list, default = [])\ + # .add("activation_function", str, default = "tanh")\ + # .add("precision", str, default = "default")\ + # .add("trainable", [list, bool], default = True) + self.numb_fparam = numb_fparam + self.numb_aparam = numb_aparam + self.n_neuron = neuron + self.resnet_dt = resnet_dt + self.rcond = rcond + self.seed = seed + self.uniform_seed = uniform_seed + self.spin = spin + self.ntypes_spin = self.spin.get_ntypes_spin() if self.spin is not None else 0 + self.seed_shift = one_layer_rand_seed_shift() + self.tot_ener_zero = tot_ener_zero + self.fitting_activation_fn = get_activation_func(activation_function) + self.fitting_precision = get_precision(precision) + self.trainable = trainable + if self.trainable is None: + self.trainable = [True for ii in range(len(self.n_neuron) + 1)] + if isinstance(self.trainable, bool): + self.trainable = [self.trainable] * (len(self.n_neuron) + 1) + assert ( + len(self.trainable) == len(self.n_neuron) + 1 + ), "length of trainable should be that of n_neuron + 1" + self.atom_ener = [] + self.atom_ener_v = atom_ener + for at, ae in enumerate(atom_ener): + if ae is not None: + self.atom_ener.append( + tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name="atom_%d_ener" % at) + ) + else: + self.atom_ener.append(None) + self.useBN = False + self.bias_atom_e = np.zeros(self.ntypes, dtype=np.float64) + # data requirement + if self.numb_fparam > 0: + add_data_requirement( + "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False + ) + self.fparam_avg = None + self.fparam_std = None + self.fparam_inv_std = None + if self.numb_aparam > 0: + add_data_requirement( + "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False + ) + self.aparam_avg = None + self.aparam_std = None + self.aparam_inv_std = None + + self.fitting_net_variables = None + self.mixed_prec = None + self.layer_name = layer_name + if self.layer_name is not None: + assert isinstance(self.layer_name, list), "layer_name should be a list" + assert ( + len(self.layer_name) == len(self.n_neuron) + 1 + ), "length of layer_name should be that of n_neuron + 1" + + def get_numb_fparam(self) -> int: + """Get the number of frame parameters.""" + return self.numb_fparam + + def get_numb_aparam(self) -> int: + """Get the number of atomic parameters.""" + return self.numb_fparam + + def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None: + """Compute the ouput statistics. + + Parameters + ---------- + all_stat + must have the following components: + all_stat['energy'] of shape n_sys x n_batch x n_frame + can be prepared by model.make_stat_input + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. + """ + self.bias_atom_e = self._compute_output_stats( + all_stat, rcond=self.rcond, mixed_type=mixed_type + ) + + def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False): + data = all_stat["energy"] + # data[sys_idx][batch_idx][frame_idx] + sys_ener = [] + for ss in range(len(data)): + sys_data = [] + for ii in range(len(data[ss])): + for jj in range(len(data[ss][ii])): + sys_data.append(data[ss][ii][jj]) + sys_data = np.concatenate(sys_data) + sys_ener.append(np.average(sys_data)) + sys_ener = np.array(sys_ener) + sys_tynatom = [] + if mixed_type: + data = all_stat["real_natoms_vec"] + nsys = len(data) + for ss in range(len(data)): + tmp_tynatom = [] + for ii in range(len(data[ss])): + for jj in range(len(data[ss][ii])): + tmp_tynatom.append(data[ss][ii][jj].astype(np.float64)) + tmp_tynatom = np.average(np.array(tmp_tynatom), axis=0) + sys_tynatom.append(tmp_tynatom) + else: + data = all_stat["natoms_vec"] + nsys = len(data) + for ss in range(len(data)): + sys_tynatom.append(data[ss][0].astype(np.float64)) + sys_tynatom = np.array(sys_tynatom) + sys_tynatom = np.reshape(sys_tynatom, [nsys, -1]) + sys_tynatom = sys_tynatom[:, 2:] + if len(self.atom_ener) > 0: + # Atomic energies stats are incorrect if atomic energies are assigned. + # In this situation, we directly use these assigned energies instead of computing stats. + # This will make the loss decrease quickly + assigned_atom_ener = np.array( + [ee for ee in self.atom_ener_v if ee is not None] + ) + assigned_ener_idx = [ + ii for ii, ee in enumerate(self.atom_ener_v) if ee is not None + ] + # np.dot out size: nframe + sys_ener -= np.dot(sys_tynatom[:, assigned_ener_idx], assigned_atom_ener) + sys_tynatom[:, assigned_ener_idx] = 0.0 + energy_shift, resd, rank, s_value = np.linalg.lstsq( + sys_tynatom, sys_ener, rcond=rcond + ) + if len(self.atom_ener) > 0: + for ii in assigned_ener_idx: + energy_shift[ii] = self.atom_ener_v[ii] + return energy_shift + + def compute_input_stats(self, all_stat: dict, protection: float = 1e-2) -> None: + """Compute the input statistics. + + Parameters + ---------- + all_stat + if numb_fparam > 0 must have all_stat['fparam'] + if numb_aparam > 0 must have all_stat['aparam'] + can be prepared by model.make_stat_input + protection + Divided-by-zero protection + """ + # stat fparam + if self.numb_fparam > 0: + cat_data = np.concatenate(all_stat["fparam"], axis=0) + cat_data = np.reshape(cat_data, [-1, self.numb_fparam]) + self.fparam_avg = np.average(cat_data, axis=0) + self.fparam_std = np.std(cat_data, axis=0) + for ii in range(self.fparam_std.size): + if self.fparam_std[ii] < protection: + self.fparam_std[ii] = protection + self.fparam_inv_std = 1.0 / self.fparam_std + # stat aparam + if self.numb_aparam > 0: + sys_sumv = [] + sys_sumv2 = [] + sys_sumn = [] + for ss_ in all_stat["aparam"]: + ss = np.reshape(ss_, [-1, self.numb_aparam]) + sys_sumv.append(np.sum(ss, axis=0)) + sys_sumv2.append(np.sum(np.multiply(ss, ss), axis=0)) + sys_sumn.append(ss.shape[0]) + sumv = np.sum(sys_sumv, axis=0) + sumv2 = np.sum(sys_sumv2, axis=0) + sumn = np.sum(sys_sumn) + self.aparam_avg = (sumv) / sumn + self.aparam_std = self._compute_std(sumv2, sumv, sumn) + for ii in range(self.aparam_std.size): + if self.aparam_std[ii] < protection: + self.aparam_std[ii] = protection + self.aparam_inv_std = 1.0 / self.aparam_std + + def _compute_std(self, sumv2, sumv, sumn): + return np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn)) + + @cast_precision + def _build_lower( + self, + start_index, + natoms, + inputs, + fparam=None, + aparam=None, + bias_atom_e=0.0, + type_suffix="", + suffix="", + reuse=None, + ): + # cut-out inputs + inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1]) + inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt]) + layer = inputs_i + if fparam is not None: + ext_fparam = tf.tile(fparam, [1, natoms]) + ext_fparam = tf.reshape(ext_fparam, [-1, self.numb_fparam]) + ext_fparam = tf.cast(ext_fparam, self.fitting_precision) + layer = tf.concat([layer, ext_fparam], axis=1) + if aparam is not None: + ext_aparam = tf.slice( + aparam, + [0, start_index * self.numb_aparam], + [-1, natoms * self.numb_aparam], + ) + ext_aparam = tf.reshape(ext_aparam, [-1, self.numb_aparam]) + ext_aparam = tf.cast(ext_aparam, self.fitting_precision) + layer = tf.concat([layer, ext_aparam], axis=1) + + if nvnmd_cfg.enable: + one_layer = one_layer_nvnmd + else: + one_layer = one_layer_deepmd + for ii in range(0, len(self.n_neuron)): + if self.layer_name is not None and self.layer_name[ii] is not None: + layer_suffix = "share_" + self.layer_name[ii] + type_suffix + layer_reuse = tf.AUTO_REUSE + else: + layer_suffix = "layer_" + str(ii) + type_suffix + suffix + layer_reuse = reuse + if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]: + layer += one_layer( + layer, + self.n_neuron[ii], + name=layer_suffix, + reuse=layer_reuse, + seed=self.seed, + use_timestep=self.resnet_dt, + activation_fn=self.fitting_activation_fn, + precision=self.fitting_precision, + trainable=self.trainable[ii], + uniform_seed=self.uniform_seed, + initial_variables=self.fitting_net_variables, + mixed_prec=self.mixed_prec, + ) + else: + layer = one_layer( + layer, + self.n_neuron[ii], + name=layer_suffix, + reuse=layer_reuse, + seed=self.seed, + activation_fn=self.fitting_activation_fn, + precision=self.fitting_precision, + trainable=self.trainable[ii], + uniform_seed=self.uniform_seed, + initial_variables=self.fitting_net_variables, + mixed_prec=self.mixed_prec, + ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift + if self.layer_name is not None and self.layer_name[-1] is not None: + layer_suffix = "share_" + self.layer_name[-1] + type_suffix + layer_reuse = tf.AUTO_REUSE + else: + layer_suffix = "final_layer" + type_suffix + suffix + layer_reuse = reuse + final_layer = one_layer( + layer, + 1, + activation_fn=None, + bavg=bias_atom_e, + name=layer_suffix, + reuse=layer_reuse, + seed=self.seed, + precision=self.fitting_precision, + trainable=self.trainable[-1], + uniform_seed=self.uniform_seed, + initial_variables=self.fitting_net_variables, + mixed_prec=self.mixed_prec, + final_layer=True, + ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift + + return final_layer + + def build( + self, + inputs: tf.Tensor, + natoms: tf.Tensor, + input_dict: Optional[dict] = None, + reuse: Optional[bool] = None, + suffix: str = "", + ) -> tf.Tensor: + """Build the computational graph for fitting net. + + Parameters + ---------- + inputs + The input descriptor + input_dict + Additional dict for inputs. + if numb_fparam > 0, should have input_dict['fparam'] + if numb_aparam > 0, should have input_dict['aparam'] + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + reuse + The weights in the networks should be reused when get the variable. + suffix + Name suffix to identify this descriptor + + Returns + ------- + ener + The system energy + """ + if input_dict is None: + input_dict = {} + bias_atom_e = self.bias_atom_e + type_embedding = input_dict.get("type_embedding", None) + atype = input_dict.get("atype", None) + if self.numb_fparam > 0: + if self.fparam_avg is None: + self.fparam_avg = 0.0 + if self.fparam_inv_std is None: + self.fparam_inv_std = 1.0 + if self.numb_aparam > 0: + if self.aparam_avg is None: + self.aparam_avg = 0.0 + if self.aparam_inv_std is None: + self.aparam_inv_std = 1.0 + + ntypes_atom = self.ntypes - self.ntypes_spin + if self.spin is not None: + for type_i in range(ntypes_atom): + if self.bias_atom_e.shape[0] != self.ntypes: + self.bias_atom_e = np.pad( + self.bias_atom_e, + (0, self.ntypes_spin), + "constant", + constant_values=(0, 0), + ) + bias_atom_e = self.bias_atom_e + if self.spin.use_spin[type_i]: + self.bias_atom_e[type_i] = ( + self.bias_atom_e[type_i] + + self.bias_atom_e[type_i + ntypes_atom] + ) + else: + self.bias_atom_e[type_i] = self.bias_atom_e[type_i] + self.bias_atom_e = self.bias_atom_e[:ntypes_atom] + + with tf.variable_scope("fitting_attr" + suffix, reuse=reuse): + # t_dfparam = tf.constant(self.numb_fparam, name="dfparam", dtype=tf.int32) + # t_daparam = tf.constant(self.numb_aparam, name="daparam", dtype=tf.int32) + self.t_bias_atom_e = tf.get_variable( + "t_bias_atom_e", + self.bias_atom_e.shape, + dtype=GLOBAL_TF_FLOAT_PRECISION, + trainable=False, + initializer=tf.constant_initializer(self.bias_atom_e), + ) + if self.numb_fparam > 0: + t_fparam_avg = tf.get_variable( + "t_fparam_avg", + self.numb_fparam, + dtype=GLOBAL_TF_FLOAT_PRECISION, + trainable=False, + initializer=tf.constant_initializer(self.fparam_avg), + ) + t_fparam_istd = tf.get_variable( + "t_fparam_istd", + self.numb_fparam, + dtype=GLOBAL_TF_FLOAT_PRECISION, + trainable=False, + initializer=tf.constant_initializer(self.fparam_inv_std), + ) + if self.numb_aparam > 0: + t_aparam_avg = tf.get_variable( + "t_aparam_avg", + self.numb_aparam, + dtype=GLOBAL_TF_FLOAT_PRECISION, + trainable=False, + initializer=tf.constant_initializer(self.aparam_avg), + ) + t_aparam_istd = tf.get_variable( + "t_aparam_istd", + self.numb_aparam, + dtype=GLOBAL_TF_FLOAT_PRECISION, + trainable=False, + initializer=tf.constant_initializer(self.aparam_inv_std), + ) + + inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) + if len(self.atom_ener): + # only for atom_ener + nframes = input_dict.get("nframes") + if nframes is not None: + # like inputs, but we don't want to add a dependency on inputs + inputs_zero = tf.zeros( + (nframes, natoms[0], self.dim_descrpt), + dtype=GLOBAL_TF_FLOAT_PRECISION, + ) + else: + inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION) + + if bias_atom_e is not None: + assert len(bias_atom_e) == self.ntypes + + fparam = None + if self.numb_fparam > 0: + fparam = input_dict["fparam"] + fparam = tf.reshape(fparam, [-1, self.numb_fparam]) + fparam = (fparam - t_fparam_avg) * t_fparam_istd + + aparam = None + if not self.use_aparam_as_mask: + if self.numb_aparam > 0: + aparam = input_dict["aparam"] + aparam = tf.reshape(aparam, [-1, self.numb_aparam]) + aparam = (aparam - t_aparam_avg) * t_aparam_istd + aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]]) + + atype_nall = tf.reshape(atype, [-1, natoms[1]]) + self.atype_nloc = tf.slice( + atype_nall, [0, 0], [-1, natoms[0]] + ) ## lammps will make error + atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION) + self.atype_nloc = tf.reshape(self.atype_nloc, [-1]) + # prevent embedding_lookup error, + # but the filter will be applied anyway + self.atype_nloc = tf.clip_by_value(self.atype_nloc, 0, self.ntypes - 1) + + ## if spin is used + if self.spin is not None: + self.atype_nloc = tf.slice( + atype_nall, [0, 0], [-1, tf.reduce_sum(natoms[2 : 2 + ntypes_atom])] + ) + atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION) + self.atype_nloc = tf.reshape(self.atype_nloc, [-1]) + if ( + nvnmd_cfg.enable + and nvnmd_cfg.quantize_descriptor + and nvnmd_cfg.restore_descriptor + and (nvnmd_cfg.version == 1) + ): + type_embedding = nvnmd_cfg.map["t_ebd"] + if type_embedding is not None: + atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc) + else: + atype_embed = None + + self.atype_embed = atype_embed + + if atype_embed is None: + start_index = 0 + outs_list = [] + for type_i in range(ntypes_atom): + final_layer = self._build_lower( + start_index, + natoms[2 + type_i], + inputs, + fparam, + aparam, + bias_atom_e=0.0, + type_suffix="_type_" + str(type_i), + suffix=suffix, + reuse=reuse, + ) + # concat the results + if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None: + zero_layer = self._build_lower( + start_index, + natoms[2 + type_i], + inputs_zero, + fparam, + aparam, + bias_atom_e=0.0, + type_suffix="_type_" + str(type_i), + suffix=suffix, + reuse=True, + ) + final_layer -= zero_layer + final_layer = tf.reshape( + final_layer, [tf.shape(inputs)[0], natoms[2 + type_i]] + ) + outs_list.append(final_layer) + start_index += natoms[2 + type_i] + # concat the results + # concat once may be faster than multiple concat + outs = tf.concat(outs_list, axis=1) + # with type embedding + else: + atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) + type_shape = atype_embed.get_shape().as_list() + inputs = tf.concat( + [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 + ) + original_dim_descrpt = self.dim_descrpt + self.dim_descrpt = self.dim_descrpt + type_shape[1] + inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) + final_layer = self._build_lower( + 0, + natoms[0], + inputs, + fparam, + aparam, + bias_atom_e=0.0, + suffix=suffix, + reuse=reuse, + ) + if len(self.atom_ener): + # remove contribution in vacuum + inputs_zero = tf.concat( + [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed], + axis=1, + ) + inputs_zero = tf.reshape(inputs_zero, [-1, natoms[0], self.dim_descrpt]) + zero_layer = self._build_lower( + 0, + natoms[0], + inputs_zero, + fparam, + aparam, + bias_atom_e=0.0, + suffix=suffix, + reuse=True, + ) + # atomic energy will be stored in `self.t_bias_atom_e` which is not trainable + final_layer -= zero_layer + outs = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[0]]) + # add bias + self.atom_ener_before = outs * atype_filter + # atomic bias energy from data statistics + self.atom_bias_ener = tf.reshape( + tf.nn.embedding_lookup(self.t_bias_atom_e, self.atype_nloc), + [tf.shape(inputs)[0], tf.reduce_sum(natoms[2 : 2 + ntypes_atom])], + ) + outs = outs + self.atom_bias_ener + outs *= atype_filter + self.atom_bias_ener *= atype_filter + self.atom_ener_after = outs + + if self.tot_ener_zero: + force_tot_ener = 0.0 + outs = tf.reshape(outs, [-1, tf.reduce_sum(natoms[2 : 2 + ntypes_atom])]) + outs_mean = tf.reshape(tf.reduce_mean(outs, axis=1), [-1, 1]) + outs_mean = outs_mean - tf.ones_like( + outs_mean, dtype=GLOBAL_TF_FLOAT_PRECISION + ) * ( + force_tot_ener + / global_cvt_2_tf_float(tf.reduce_sum(natoms[2 : 2 + ntypes_atom])) + ) + outs = outs - outs_mean + outs = tf.reshape(outs, [-1]) + + tf.summary.histogram("fitting_net_output", outs) + return tf.reshape(outs, [-1]) + + def init_variables( + self, + graph: tf.Graph, + graph_def: tf.GraphDef, + suffix: str = "", + ) -> None: + """Init the fitting net variables with the given dict. + + Parameters + ---------- + graph : tf.Graph + The input frozen model graph + graph_def : tf.GraphDef + The input frozen model graph_def + suffix : str + suffix to name scope + """ + self.fitting_net_variables = get_fitting_net_variables_from_graph_def( + graph_def, suffix=suffix + ) + if self.layer_name is not None: + # shared variables have no suffix + shared_variables = get_fitting_net_variables_from_graph_def( + graph_def, suffix="" + ) + self.fitting_net_variables.update(shared_variables) + if self.numb_fparam > 0: + self.fparam_avg = get_tensor_by_name_from_graph( + graph, "fitting_attr%s/t_fparam_avg" % suffix + ) + self.fparam_inv_std = get_tensor_by_name_from_graph( + graph, "fitting_attr%s/t_fparam_istd" % suffix + ) + if self.numb_aparam > 0: + self.aparam_avg = get_tensor_by_name_from_graph( + graph, "fitting_attr%s/t_aparam_avg" % suffix + ) + self.aparam_inv_std = get_tensor_by_name_from_graph( + graph, "fitting_attr%s/t_aparam_istd" % suffix + ) + try: + self.bias_atom_e = get_tensor_by_name_from_graph( + graph, "fitting_attr%s/t_bias_atom_e" % suffix + ) + except GraphWithoutTensorError: + # for compatibility, old models has no t_bias_atom_e + pass + + def change_energy_bias( + self, + data, + frozen_model, + origin_type_map, + full_type_map, + bias_shift="delta", + ntest=10, + ) -> None: + """Change the energy bias according to the input data and the pretrained model. + + Parameters + ---------- + data : DeepmdDataSystem + The training data. + frozen_model : str + The path file of frozen model. + origin_type_map : list + The original type_map in dataset, they are targets to change the energy bias. + full_type_map : str + The full type_map in pretrained model + bias_shift : str + The mode for changing energy bias : ['delta', 'statistic'] + 'delta' : perform predictions on energies of target dataset, + and do least sqaure on the errors to obtain the target shift as bias. + 'statistic' : directly use the statistic energy bias in the target dataset. + ntest : int + The number of test samples in a system to change the energy bias. + """ + type_numbs = [] + energy_ground_truth = [] + energy_predict = [] + sorter = np.argsort(full_type_map) + idx_type_map = sorter[ + np.searchsorted(full_type_map, origin_type_map, sorter=sorter) + ] + mixed_type = data.mixed_type + numb_type = len(full_type_map) + dp = None + if bias_shift == "delta": + # init model + dp = DeepPotential(frozen_model) + for sys in data.data_systems: + test_data = sys.get_test() + nframes = test_data["box"].shape[0] + numb_test = min(nframes, ntest) + if mixed_type: + atype = test_data["type"][:numb_test].reshape([numb_test, -1]) + else: + atype = test_data["type"][0] + assert np.array( + [i in idx_type_map for i in list(set(atype.reshape(-1)))] + ).all(), "Some types are not in 'type_map'!" + energy_ground_truth.append( + test_data["energy"][:numb_test].reshape([numb_test, 1]) + ) + if mixed_type: + type_numbs.append( + np.array( + [(atype == i).sum(axis=-1) for i in idx_type_map], + dtype=np.int32, + ).T + ) + else: + type_numbs.append( + np.tile( + np.bincount(atype, minlength=numb_type)[idx_type_map], + (numb_test, 1), + ) + ) + if bias_shift == "delta": + coord = test_data["coord"][:numb_test].reshape([numb_test, -1]) + if sys.pbc: + box = test_data["box"][:numb_test] + else: + box = None + ret = dp.eval(coord, box, atype, mixed_type=mixed_type) + energy_predict.append(ret[0].reshape([numb_test, 1])) + type_numbs = np.concatenate(type_numbs) + energy_ground_truth = np.concatenate(energy_ground_truth) + old_bias = self.bias_atom_e[idx_type_map] + if bias_shift == "delta": + energy_predict = np.concatenate(energy_predict) + bias_diff = energy_ground_truth - energy_predict + delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0] + unbias_e = energy_predict + type_numbs @ delta_bias + atom_numbs = type_numbs.sum(-1) + rmse_ae = ( + np.sqrt(np.square(unbias_e - energy_ground_truth)) / atom_numbs + ).mean() + self.bias_atom_e[idx_type_map] += delta_bias.reshape(-1) + log.info( + f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom." + ) + elif bias_shift == "statistic": + statistic_bias = np.linalg.lstsq( + type_numbs, energy_ground_truth, rcond=None + )[0] + self.bias_atom_e[idx_type_map] = statistic_bias.reshape(-1) + else: + raise RuntimeError("Unknown bias_shift mode: " + bias_shift) + log.info( + "Change energy bias of {} from {} to {}.".format( + str(origin_type_map), str(old_bias), str(self.bias_atom_e[idx_type_map]) + ) + ) + + def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None: + """Reveive the mixed precision setting. + + Parameters + ---------- + mixed_prec + The mixed precision setting used in the embedding net + """ + self.mixed_prec = mixed_prec + self.fitting_precision = get_precision(mixed_prec["output_prec"]) + + def get_loss(self, loss: dict, lr) -> Loss: + """Get the loss function. + + Parameters + ---------- + loss : dict + The loss function parameters. + lr : LearningRateExp + The learning rate. + + Returns + ------- + Loss + The loss function. + """ + _loss_type = loss.pop("type", "ener") + loss["starter_learning_rate"] = lr.start_lr() + if _loss_type == "ener": + return EnerStdLoss(**loss) + elif _loss_type == "ener_dipole": + return EnerDipoleLoss(**loss) + elif _loss_type == "ener_spin": + return EnerSpinLoss(**loss, use_spin=self.spin.use_spin) + else: + raise RuntimeError("unknown loss type") diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index 799cd6fd3b..6d41b91506 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -1,31 +1,28 @@ -from functools import ( - lru_cache, -) -from typing import ( - TYPE_CHECKING, - List, - Optional, - Union, -) - +from functools import lru_cache +from typing import TYPE_CHECKING +from typing import List +from typing import Optional +from typing import Union + +# from deepmd.descriptor.descriptor import ( +# Descriptor, +# ) import numpy as np -from deepmd.env import ( - MODEL_VERSION, - default_tf_session_config, - tf, -) -from deepmd.utils.batch_size import ( - AutoBatchSize, -) -from deepmd.utils.sess import ( - run_sess, -) +from deepmd.common import data_requirement +from deepmd.common import expand_sys_str +from deepmd.common import j_loader +from deepmd.common import j_must_have +from deepmd.env import MODEL_VERSION +from deepmd.env import default_tf_session_config +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.model import EnerModel +from deepmd.utils.batch_size import AutoBatchSize +from deepmd.utils.sess import run_sess if TYPE_CHECKING: - from pathlib import ( - Path, - ) + from pathlib import Path class DeepEval: @@ -53,18 +50,71 @@ def __init__( default_tf_graph: bool = False, auto_batch_size: Union[bool, int, AutoBatchSize] = False, ): - self.graph = self._load_graph( - model_file, prefix=load_prefix, default_tf_graph=default_tf_graph + jdata = j_loader("input.json") + model_param = j_must_have(jdata, "model") + + descrpt_param = j_must_have(model_param, "descriptor") + from deepmd.descriptor import DescrptSeA + + descrpt_param.pop("type", None) + descrpt_param.pop("_comment", None) + self.spin = None + descrpt_param["spin"] = self.spin + self.descrpt = DescrptSeA(**descrpt_param) + + self.multi_task_mode = "fitting_net_dict" in model_param + fitting_param = ( + j_must_have(model_param, "fitting_net") + if not self.multi_task_mode + else j_must_have(model_param, "fitting_net_dict") + ) + from deepmd.fit import EnerFitting + + # fitting_param.pop("type", None) + fitting_param.pop("_comment", None) + fitting_param["descrpt"] = self.descrpt + self.fitting = EnerFitting(**fitting_param) + + self.typeebd = None + + self.model = EnerModel( + self.descrpt, + self.fitting, + self.typeebd, + model_param.get("type_map"), + model_param.get("data_stat_nbatch", 10), + model_param.get("data_stat_protect", 1e-2), + model_param.get("use_srtab"), + model_param.get("smin_alpha"), + model_param.get("sw_rmin"), + model_param.get("sw_rmax"), + self.spin, ) + load_state_dict = paddle.load(str(model_file)) + for k, v in load_state_dict.items(): + if k in self.model.state_dict(): + if load_state_dict[k].dtype != self.model.state_dict()[k].dtype: + # print(f"convert dtype from {load_state_dict[k].dtype} to {self.model.state_dict()[k].dtype}") + load_state_dict[k] = load_state_dict[k].astype( + self.model.state_dict()[k].dtype + ) + if list(load_state_dict[k].shape) != list( + self.model.state_dict()[k].shape + ): + # print(f"convert shape from {load_state_dict[k].shape} to {self.model.state_dict()[k].shape}") + load_state_dict[k] = load_state_dict[k].reshape( + self.model.state_dict()[k].shape + ) + self.model.set_state_dict(load_state_dict) self.load_prefix = load_prefix # graph_compatable should be called after graph and prefix are set - if not self._graph_compatable(): - raise RuntimeError( - f"model in graph (version {self.model_version}) is incompatible" - f"with the model (version {MODEL_VERSION}) supported by the current code." - "See https://deepmd.rtfd.io/compatability/ for details." - ) + # if not self._graph_compatable(): + # raise RuntimeError( + # f"model in graph (version {self.model_version}) is incompatible" + # f"with the model (version {MODEL_VERSION}) supported by the current code." + # "See https://deepmd.rtfd.io/compatability/ for details." + # ) # set default to False, as subclasses may not support if isinstance(auto_batch_size, bool): @@ -82,13 +132,15 @@ def __init__( @property @lru_cache(maxsize=None) def model_type(self) -> str: + return "ener" """Get type of model. :type:str """ - t_mt = self._get_tensor("model_attr/model_type:0") - [mt] = run_sess(self.sess, [t_mt], feed_dict={}) - return mt.decode("utf-8") + # t_mt = self._get_tensor("model_attr/model_type:0") + # [mt] = run_sess(self.sess, [t_mt], feed_dict={}) + # return mt.decode("utf-8") + self._model_type = self.model.t_mt @property @lru_cache(maxsize=None) @@ -100,6 +152,7 @@ def model_version(self) -> str: str version of model """ + return "0.1.0" try: t_mt = self._get_tensor("model_attr/model_version:0") except KeyError: @@ -117,6 +170,7 @@ def sess(self) -> tf.Session: return tf.Session(graph=self.graph, config=default_tf_session_config) def _graph_compatable(self) -> bool: + return True """Check the model compatability. Returns @@ -135,7 +189,7 @@ def _graph_compatable(self) -> bool: else: return True - def _get_tensor( + def _get_value( self, tensor_name: str, attr_name: Optional[str] = None ) -> tf.Tensor: """Get TF graph tensor and assign it to class namespace. @@ -154,8 +208,10 @@ def _get_tensor( loaded tensor """ # do not use os.path.join as it doesn't work on Windows - tensor_path = "/".join((self.load_prefix, tensor_name)) - tensor = self.graph.get_tensor_by_name(tensor_path) + value = None + for name, tensor in self.model.named_buffers(): + if tensor_name in name: + value = tensor.numpy()[0] if tensor.shape == [1] else tensor.numpy() if attr_name: setattr(self, attr_name, tensor) return tensor @@ -194,6 +250,16 @@ def _load_graph( name=prefix, producer_op_list=None, ) + # with tf.Session() as sess: + # constant_ops = [op for op in graph.get_operations() if op.type == "Const"] + # for constant_op in constant_ops: + # param = sess.run(constant_op.outputs[0]) + # # print(type(param)) + # if hasattr(param, 'shape'): + # # print(param.shape) + # if param.shape == (2,): + # print(constant_op.outputs[0], param) + # exit() return graph diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py index 10fed52497..909a9d23ac 100644 --- a/deepmd/infer/deep_pot.py +++ b/deepmd/infer/deep_pot.py @@ -1,35 +1,22 @@ import logging -from typing import ( - TYPE_CHECKING, - Callable, - List, - Optional, - Tuple, - Union, -) +from typing import TYPE_CHECKING +from typing import Callable +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union import numpy as np -from deepmd.common import ( - make_default_mesh, -) -from deepmd.infer.data_modifier import ( - DipoleChargeModifier, -) -from deepmd.infer.deep_eval import ( - DeepEval, -) -from deepmd.utils.batch_size import ( - AutoBatchSize, -) -from deepmd.utils.sess import ( - run_sess, -) +from deepmd.common import make_default_mesh +from deepmd.env import paddle +from deepmd.infer.data_modifier import DipoleChargeModifier +from deepmd.infer.deep_eval import DeepEval +from deepmd.utils.batch_size import AutoBatchSize +from deepmd.utils.sess import run_sess if TYPE_CHECKING: - from pathlib import ( - Path, - ) + from pathlib import Path log = logging.getLogger(__name__) @@ -81,125 +68,173 @@ def __init__( self.tensors = dict( { # descrpt attrs - "t_ntypes": "descrpt_attr/ntypes:0", - "t_rcut": "descrpt_attr/rcut:0", + "ntypes": "descrpt.ntypes", + "rcut": "descrpt.rcut", # fitting attrs - "t_dfparam": "fitting_attr/dfparam:0", - "t_daparam": "fitting_attr/daparam:0", - # model attrs - "t_tmap": "model_attr/tmap:0", - # inputs - "t_coord": "t_coord:0", - "t_type": "t_type:0", - "t_natoms": "t_natoms:0", - "t_box": "t_box:0", - "t_mesh": "t_mesh:0", - # add output tensors - "t_energy": "o_energy:0", - "t_force": "o_force:0", - "t_virial": "o_virial:0", - "t_ae": "o_atom_energy:0", - "t_av": "o_atom_virial:0", - "t_descriptor": "o_descriptor:0", + "dfparam": "fitting.t_dfparam", + "daparam": "fitting.t_daparam", + # # fitting attrs + # "t_dfparam": "fitting_attr/dfparam:0", + # "t_daparam": "fitting_attr/daparam:0", + # # model attrs + # "t_tmap": "model_attr/tmap:0", + # # inputs + # "t_coord": "t_coord:0", + # "t_type": "t_type:0", + # "t_natoms": "t_natoms:0", + # "t_box": "t_box:0", + # "t_mesh": "t_mesh:0", + # # add output tensors + # "t_energy": "o_energy:0", + # "t_force": "o_force:0", + # "t_virial": "o_virial:0", + # "t_ae": "o_atom_energy:0", + # "t_av": "o_atom_virial:0", + # "t_descriptor": "o_descriptor:0", }, ) DeepEval.__init__( self, model_file, load_prefix=load_prefix, - default_tf_graph=default_tf_graph, + # default_tf_graph=default_tf_graph, auto_batch_size=auto_batch_size, ) - # load optional tensors - operations = [op.name for op in self.graph.get_operations()] - # check if the graph has these operations: - # if yes add them - if "t_efield" in operations: - self._get_tensor("t_efield:0", "t_efield") - self.has_efield = True - else: - log.debug("Could not get tensor 't_efield:0'") - self.t_efield = None - self.has_efield = False - - if "load/t_fparam" in operations: - self.tensors.update({"t_fparam": "t_fparam:0"}) - self.has_fparam = True - else: - log.debug("Could not get tensor 't_fparam:0'") - self.t_fparam = None - self.has_fparam = False - - if "load/t_aparam" in operations: - self.tensors.update({"t_aparam": "t_aparam:0"}) - self.has_aparam = True - else: - log.debug("Could not get tensor 't_aparam:0'") - self.t_aparam = None - self.has_aparam = False - - if "load/spin_attr/ntypes_spin" in operations: - self.tensors.update({"t_ntypes_spin": "spin_attr/ntypes_spin:0"}) - self.has_spin = True - else: - self.ntypes_spin = 0 - self.has_spin = False + # # load optional tensors + # operations = [op.name for op in self.graph.get_operations()] + # # check if the graph has these operations: + # # if yes add them + # if "t_efield" in operations: + # # self._get_tensor("t_efield:0", "t_efield") + # if self._get_value("t_efield") is not None: + # self._get_value("t_efield", "t_efield") + # self.has_efield = True + # else: + # log.debug("Could not get tensor 't_efield'") + # self.t_efield = None + self.has_efield = False + + # if self._get_value("load/t_fparam") is not None: + # self.tensors.update({"t_fparam": "t_fparam"}) + # self.has_fparam = True + # else: + # log.debug("Could not get tensor 't_fparam'") + # self.t_fparam = None + self.has_fparam = False + + # if self._get_value("load/t_aparam") is not None: + # self.tensors.update({"t_aparam": "t_aparam"}) + # self.has_aparam = True + # else: + # log.debug("Could not get tensor 't_aparam'") + # self.t_aparam = None + self.has_aparam = False + + # if self._get_value("load/spin_attr/ntypes_spin") is not None: + # self.tensors.update({"t_ntypes_spin": "spin_attr/ntypes_spin"}) + # self.has_spin = True + # else: + self.ntypes_spin = 0 + self.has_spin = False # now load tensors to object attributes for attr_name, tensor_name in self.tensors.items(): try: - self._get_tensor(tensor_name, attr_name) + self._get_value(tensor_name, attr_name) except KeyError: if attr_name != "t_descriptor": raise - self._run_default_sess() - self.tmap = self.tmap.decode("UTF-8").split() + # self._run_default_sess() + # self.tmap = self.tmap.decode("UTF-8").split() + self.ntypes = 2 + self.rcut = 6.0 + self.dfparam = 0 + self.daparam = 0 + # self.t_tmap = self.model.t_tmap.split() + self.t_tmap = ["O", "H"] # setup modifier try: - t_modifier_type = self._get_tensor("modifier_attr/type:0") - self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8") + # t_modifier_type = self._get_tensor("modifier_attr/type:0") + # self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8") + self.modifier_type = self._get_value("modifier_attr.type") except (ValueError, KeyError): self.modifier_type = None - - try: - t_jdata = self._get_tensor("train_attr/training_script:0") - jdata = run_sess(self.sess, t_jdata).decode("UTF-8") - import json - - jdata = json.loads(jdata) - self.descriptor_type = jdata["model"]["descriptor"]["type"] - except (ValueError, KeyError): - self.descriptor_type = None - - if self.modifier_type == "dipole_charge": - t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0") - t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0") - t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0") - t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0") - t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0") - [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess( - self.sess, - [ - t_mdl_name, - t_mdl_charge_map, - t_sys_charge_map, - t_ewald_h, - t_ewald_beta, - ], - ) - mdl_name = mdl_name.decode("UTF-8") - mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()] - sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()] - self.dm = DipoleChargeModifier( - mdl_name, - mdl_charge_map, - sys_charge_map, - ewald_h=ewald_h, - ewald_beta=ewald_beta, + self.modifier_type = None + self.descriptor_type = "se_e2_a" + + # try: + # t_jdata = self._get_tensor("train_attr/training_script") + # jdata = run_sess(self.sess, t_jdata).decode("UTF-8") + # import json + + # jdata = json.loads(jdata) + # self.descriptor_type = jdata["model"]["descriptor"]["type"] + # except (ValueError, KeyError): + # self.descriptor_type = None + + # if self.modifier_type == "dipole_charge": + # t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0") + # t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0") + # t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0") + # t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0") + # t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0") + # [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess( + # self.sess, + # [ + # t_mdl_name, + # t_mdl_charge_map, + # t_sys_charge_map, + # t_ewald_h, + # t_ewald_beta, + # ], + # ) + # mdl_name = mdl_name.decode("UTF-8") + # mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()] + # sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()] + # self.dm = DipoleChargeModifier( + # mdl_name, + # mdl_charge_map, + # sys_charge_map, + # ewald_h=ewald_h, + # ewald_beta=ewald_beta, + # ) + + # NOTE: 使用静态图模型推理 + if not hasattr(self, "st_model"): + self.st_model = paddle.jit.load( + "/workspace/hesensen/deepmd_backend/deepmd-kit/examples/water/se_e2_a/Model_1000000" ) + # for k, v in self.st_model.named_parameters(): + # print(f"{k} {v.shape} {v.mean().item()} {v.var().item()}") + # """ + # param_0 [1, 25] 0.9498768667019655 0.7340928425051493 + # param_1 [1, 50] 1.1214760345730344 0.9621536430386503 + # param_2 [1, 100] 1.168418946306086 1.0411743399117217 + # param_3 [1, 25] 0.002546645920014433 0.27806176560439083 + # param_4 [25, 50] -0.015372691466039676 0.10679961485782502 + # param_5 [50, 100] -0.0010681208730640539 0.09950205346985407 + # param_6 [1, 25] 1.0639599744616117 0.917256936729768 + # param_7 [1, 50] 1.142691803888668 0.9639366693005659 + # param_8 [1, 100] 1.1471394365452061 1.0091294911290036 + # param_9 [1, 25] 0.019013792716200625 0.1450311660373793 + # param_10 [25, 50] -0.006747145320748169 0.028971429954693633 + # param_11 [50, 100] -0.03750622755877242 0.04714041793007081 + # param_12 [1, 25] 1.0380588819220322 0.8904020425094114 + # param_13 [1, 50] 1.1245407895732316 0.9234643810098301 + # param_14 [1, 100] 1.1430567514092813 0.9876968977916372 + # param_15 [1, 25] 0.03272738992064966 0.1751917732380509 + # param_16 [25, 50] -0.017871745658352124 0.0384813911462805 + # param_17 [50, 100] -0.07345191324160481 0.1768254187693918 + # param_18 [1, 25] 1.0147830400771964 0.9070964180637516 + # param_19 [1, 50] 1.1198266551333698 1.034746190888665 + # param_20 [1, 100] 1.1410748813679754 1.0428001731414345 + # param_21 [1, 25] -0.022862385119536602 0.18038150422614693 + # param_22 [25, 50] -0.024970130750642985 0.07176423978220656 + # param_23 [50, 100] -0.012309303874398866 0.07227932085917015 + # """ def _run_default_sess(self): if self.has_spin is True: @@ -247,7 +282,7 @@ def get_rcut(self) -> float: def get_type_map(self) -> List[str]: """Get the type map (element name of the atom types) of this model.""" - return self.tmap + return self.t_tmap def get_sel_type(self) -> List[int]: """Unsupported in this model.""" @@ -259,11 +294,11 @@ def get_descriptor_type(self) -> List[int]: def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this DP.""" - return self.dfparam + return self.model.fitting.numb_fparam def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" - return self.daparam + return self.model.fitting.numb_aparam def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: """Wrapper method with auto batch size. @@ -369,7 +404,7 @@ def eval( # reshape coords before getting shape natoms, numb_test = self._get_natoms_and_nframes( coords, atom_types, mixed_type=mixed_type - ) + ) # 192, 30 output = self._eval_func(self._eval_inner, numb_test, natoms)( coords, cells, @@ -381,7 +416,7 @@ def eval( mixed_type=mixed_type, ) - if self.modifier_type is not None: + if self.modifier_type is not None: # 这里不会运行 if atomic: raise RuntimeError("modifier does not support atomic modification") me, mf, mv = self.dm.eval(coords, cells, atom_types) @@ -460,46 +495,46 @@ def _prepare_feed_dict( ) # sort inputs - coords, atom_types, imap = self.sort_input( - coords, atom_types, mixed_type=mixed_type - ) - if self.has_efield: - efield = np.reshape(efield, [nframes, natoms, 3]) - efield = efield[:, imap, :] - efield = np.reshape(efield, [nframes, natoms * 3]) + # coords, atom_types, imap = self.sort_input( + # coords, atom_types, mixed_type=mixed_type + # ) + # if self.has_efield: + # efield = np.reshape(efield, [nframes, natoms, 3]) + # efield = efield[:, imap, :] + # efield = np.reshape(efield, [nframes, natoms * 3]) # make natoms_vec and default_mesh natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) assert natoms_vec[0] == natoms # evaluate - feed_dict_test = {} - feed_dict_test[self.t_natoms] = natoms_vec - if mixed_type: - feed_dict_test[self.t_type] = atom_types.reshape([-1]) - else: - feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - [-1] - ) - feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - - if len(self.t_box.shape) == 1: - feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - elif len(self.t_box.shape) == 2: - feed_dict_test[self.t_box] = cells - else: - raise RuntimeError - if self.has_efield: - feed_dict_test[self.t_efield] = np.reshape(efield, [-1]) - if pbc: - feed_dict_test[self.t_mesh] = make_default_mesh(cells) - else: - feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32) - if self.has_fparam: - feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1]) - if self.has_aparam: - feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1]) - return feed_dict_test, imap, natoms_vec + # feed_dict_test = {} + # feed_dict_test[self.t_natoms] = natoms_vec + # if mixed_type: + # feed_dict_test[self.t_type] = atom_types.reshape([-1]) + # else: + # feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( + # [-1] + # ) + # feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) + + # if len(self.t_box.shape) == 1: + # feed_dict_test[self.t_box] = np.reshape(cells, [-1]) + # elif len(self.t_box.shape) == 2: + # feed_dict_test[self.t_box] = cells + # else: + # raise RuntimeError + # if self.has_efield: + # feed_dict_test[self.t_efield] = np.reshape(efield, [-1]) + # if pbc: + # feed_dict_test[self.t_mesh] = make_default_mesh(cells) + # else: + # feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32) + # if self.has_fparam: + # feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1]) + # if self.has_aparam: + # feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1]) + return None, None, natoms_vec def _eval_inner( self, @@ -519,41 +554,130 @@ def _eval_inner( coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type ) - t_out = [self.t_energy, self.t_force, self.t_virial] - if atomic: - t_out += [self.t_ae, self.t_av] - - v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) - energy = v_out[0] - force = v_out[1] - virial = v_out[2] - if atomic: - ae = v_out[3] - av = v_out[4] + # t_out = [self.t_energy, self.t_force, self.t_virial] + # if atomic: + # t_out += [self.t_ae, self.t_av] + + # v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) + # energy = v_out[0] + # force = v_out[1] + # virial = v_out[2] + # if atomic: + # ae = v_out[3] + # av = v_out[4] + + # if self.has_spin: + # ntypes_real = self.ntypes - self.ntypes_spin + # natoms_real = sum( + # [ + # np.count_nonzero(np.array(atom_types) == ii) + # for ii in range(ntypes_real) + # ] + # ) + # else: + # natoms_real = natoms + + # # reverse map of the outputs + # force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap) + # if atomic: + # ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap[:natoms_real]) + # av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap) + + # energy = np.reshape(energy, [nframes, 1]) + # force = np.reshape(force, [nframes, natoms, 3]) + # virial = np.reshape(virial, [nframes, 9]) + # if atomic: + # ae = np.reshape(ae, [nframes, natoms_real, 1]) + # av = np.reshape(av, [nframes, natoms, 9]) + # return energy, force, virial, ae, av + # else: + # atom_types = np.array(atom_types, dtype=int).reshape([-1]) + # natoms = atom_types.size + # coords = np.reshape(np.array(coords), [-1, natoms * 3]) + # nframes = coords.shape[0] + + eval_inputs = {} + eval_inputs["coord"] = paddle.to_tensor( + np.reshape(coords, [-1]), dtype="float64" + ) + eval_inputs["type"] = paddle.to_tensor( + np.tile(atom_types, [nframes, 1]).reshape([-1]), dtype="int32" + ) + eval_inputs["natoms_vec"] = paddle.to_tensor( + natoms_vec, dtype="int32", place="cpu" + ) + eval_inputs["box"] = paddle.to_tensor(np.reshape(cells, [-1]), dtype="float64") + # print(eval_inputs['coord'].shape) # [2880] + # print(eval_inputs['type'].shape) # [960] + # print(eval_inputs['natoms_vec'].shape) # [4] + # print(eval_inputs['box'].shape) # [45] + # exit() - if self.has_spin: - ntypes_real = self.ntypes - self.ntypes_spin - natoms_real = sum( - [ - np.count_nonzero(np.array(atom_types) == ii) - for ii in range(ntypes_real) - ] + if self.has_fparam: + eval_inputs["fparam"] = paddle.to_tensor( + np.reshape(fparam, [-1], dtype="float64") ) + if self.has_aparam: + eval_inputs["aparam"] = paddle.to_tensor( + np.reshape(aparam, [-1], dtype="float64") + ) + # if se.pbc: + eval_inputs["default_mesh"] = paddle.to_tensor( + make_default_mesh(cells), dtype="int32" + ) + # else: + # eval_inputs['default_mesh'] = paddle.to_tensor(np.array([], dtype = np.int32)) + + if hasattr(self, "st_model"): + eval_outputs = self.st_model( + eval_inputs["coord"], # [2880] paddle.float64 + eval_inputs["type"], # [960] paddle.int32 + eval_inputs["natoms_vec"], # [4] paddle.int32 + eval_inputs["box"], # [45] paddle.float64 + eval_inputs["default_mesh"], # [6] paddle.int32 + ) + eval_outputs = { + "atom_ener": eval_outputs[0], + "atom_virial": eval_outputs[1], + "atype": eval_outputs[2], + "coord": eval_outputs[3], + "energy": eval_outputs[4], + "force": eval_outputs[5], + "virial": eval_outputs[6], + # "z00_hidden1": eval_outputs[7], + # "z00_hidden2": eval_outputs[8], + # "z00_hidden3": eval_outputs[9], + # "z00_xx1": eval_outputs[7], + # "z00_xx2": eval_outputs[8], + # "z00_xx3": eval_outputs[9], + # "z00_xx4": eval_outputs[10], + # "weight_0": eval_outputs[7], + # "bias_0": eval_outputs[8], + # "xx1": eval_outputs[9], + # "hidden1": eval_outputs[10], + } else: - natoms_real = natoms - - # reverse map of the outputs - force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap) - if atomic: - ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap[:natoms_real]) - av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap) - - energy = np.reshape(energy, [nframes, 1]) - force = np.reshape(force, [nframes, natoms, 3]) - virial = np.reshape(virial, [nframes, 9]) + eval_outputs = self.model( + eval_inputs["coord"], # [2880] paddle.float64 + eval_inputs["type"], # [960] paddle.int32 + eval_inputs["natoms_vec"], # [4] paddle.int32 + eval_inputs["box"], # [45] paddle.float64 + eval_inputs["default_mesh"], # [6] paddle.int32 + eval_inputs, + # eval_inputs.coord: [2880] paddle.float64 + # eval_inputs.type: [960] paddle.int32 + # eval_inputs.natoms_vec: [4] paddle.int32 + # eval_inputs.box: [45] paddle.float64 + # eval_inputs.default_mesh: [6] paddle.int32 + suffix="", + reuse=False, + ) + energy = eval_outputs["energy"].numpy() + force = eval_outputs["force"].numpy() + virial = eval_outputs["virial"].numpy() if atomic: - ae = np.reshape(ae, [nframes, natoms_real, 1]) - av = np.reshape(av, [nframes, natoms, 9]) + ae = eval_outputs["atom_ener"].numpy() + av = eval_outputs["atom_virial"].numpy() return energy, force, virial, ae, av else: return energy, force, virial diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py index 07c97b09bc..e1e7d89626 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/loss/ener.py @@ -1,24 +1,16 @@ -from typing import ( - Optional, -) +from typing import Optional import numpy as np -from deepmd.common import ( - add_data_requirement, -) -from deepmd.env import ( - global_cvt_2_ener_float, - global_cvt_2_tf_float, - tf, -) -from deepmd.utils.sess import ( - run_sess, -) +from deepmd.common import add_data_requirement +from deepmd.env import global_cvt_2_ener_float +from deepmd.env import global_cvt_2_pd_float +from deepmd.env import global_cvt_2_tf_float +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.utils.sess import run_sess -from .loss import ( - Loss, -) +from .loss import Loss class EnerStdLoss(Loss): @@ -82,11 +74,12 @@ def __init__( default=1.0, ) - def build(self, learning_rate, natoms, model_dict, label_dict, suffix): + def compute_loss(self, learning_rate, natoms, model_dict, label_dict, suffix): energy = model_dict["energy"] force = model_dict["force"] virial = model_dict["virial"] atom_ener = model_dict["atom_ener"] + energy_hat = label_dict["energy"] force_hat = label_dict["force"] virial_hat = label_dict["virial"] @@ -108,152 +101,187 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): # E = - E(A) - E(B) + E(C) + E(D) # A, B, C, D could be put far away from each other atom_ener_coeff = label_dict["atom_ener_coeff"] - atom_ener_coeff = tf.reshape(atom_ener_coeff, tf.shape(atom_ener)) - energy = tf.reduce_sum(atom_ener_coeff * atom_ener, 1) + atom_ener_coeff = paddle.reshape(atom_ener_coeff, paddle.shape(atom_ener)) + energy = paddle.sum(atom_ener_coeff * atom_ener, axis=1) if self.has_e: - l2_ener_loss = tf.reduce_mean( - tf.square(energy - energy_hat), name="l2_" + suffix + l2_ener_loss = paddle.mean( + paddle.square(energy - energy_hat), name="l2_" + suffix ) if self.has_f or self.has_pf or self.relative_f: - force_reshape = tf.reshape(force, [-1]) - force_hat_reshape = tf.reshape(force_hat, [-1]) + force_reshape = paddle.reshape(force, [-1]) + force_hat_reshape = paddle.reshape(force_hat, [-1]) diff_f = force_hat_reshape - force_reshape if self.relative_f is not None: - force_hat_3 = tf.reshape(force_hat, [-1, 3]) - norm_f = tf.reshape(tf.norm(force_hat_3, axis=1), [-1, 1]) + self.relative_f - diff_f_3 = tf.reshape(diff_f, [-1, 3]) + force_hat_3 = paddle.reshape(force_hat, [-1, 3]) + norm_f = ( + paddle.reshape(paddle.linalg.norm(force_hat_3, axis=1), [-1, 1]) + + self.relative_f + ) + diff_f_3 = paddle.reshape(diff_f, [-1, 3]) diff_f_3 = diff_f_3 / norm_f - diff_f = tf.reshape(diff_f_3, [-1]) + diff_f = paddle.reshape(diff_f_3, [-1]) if self.has_f: - l2_force_loss = tf.reduce_mean(tf.square(diff_f), name="l2_force_" + suffix) + l2_force_loss = paddle.mean( + paddle.square(diff_f), name="l2_force_" + suffix + ) if self.has_pf: - atom_pref_reshape = tf.reshape(atom_pref, [-1]) - l2_pref_force_loss = tf.reduce_mean( - tf.multiply(tf.square(diff_f), atom_pref_reshape), + atom_pref_reshape = paddle.reshape(atom_pref, [-1]) + l2_pref_force_loss = paddle.mean( + paddle.multiply(paddle.square(diff_f), atom_pref_reshape), name="l2_pref_force_" + suffix, ) if self.has_v: - virial_reshape = tf.reshape(virial, [-1]) - virial_hat_reshape = tf.reshape(virial_hat, [-1]) - l2_virial_loss = tf.reduce_mean( - tf.square(virial_hat_reshape - virial_reshape), + virial_reshape = paddle.reshape(virial, [-1]) + virial_hat_reshape = paddle.reshape(virial_hat, [-1]) + l2_virial_loss = paddle.mean( + paddle.square(virial_hat_reshape - virial_reshape), name="l2_virial_" + suffix, ) if self.has_ae: - atom_ener_reshape = tf.reshape(atom_ener, [-1]) - atom_ener_hat_reshape = tf.reshape(atom_ener_hat, [-1]) - l2_atom_ener_loss = tf.reduce_mean( - tf.square(atom_ener_hat_reshape - atom_ener_reshape), + atom_ener_reshape = paddle.reshape(atom_ener, [-1]) + atom_ener_hat_reshape = paddle.reshape(atom_ener_hat, [-1]) + l2_atom_ener_loss = paddle.mean( + paddle.square(atom_ener_hat_reshape - atom_ener_reshape), name="l2_atom_ener_" + suffix, ) - atom_norm = 1.0 / global_cvt_2_tf_float(natoms[0]) - atom_norm_ener = 1.0 / global_cvt_2_ener_float(natoms[0]) - pref_e = global_cvt_2_ener_float( - find_energy - * ( - self.limit_pref_e - + (self.start_pref_e - self.limit_pref_e) - * learning_rate - / self.starter_learning_rate - ) - ) - pref_f = global_cvt_2_tf_float( - find_force - * ( - self.limit_pref_f - + (self.start_pref_f - self.limit_pref_f) - * learning_rate - / self.starter_learning_rate - ) - ) - pref_v = global_cvt_2_tf_float( - find_virial - * ( - self.limit_pref_v - + (self.start_pref_v - self.limit_pref_v) - * learning_rate - / self.starter_learning_rate - ) - ) - pref_ae = global_cvt_2_tf_float( - find_atom_ener - * ( - self.limit_pref_ae - + (self.start_pref_ae - self.limit_pref_ae) - * learning_rate - / self.starter_learning_rate - ) - ) - pref_pf = global_cvt_2_tf_float( - find_atom_pref - * ( - self.limit_pref_pf - + (self.start_pref_pf - self.limit_pref_pf) - * learning_rate - / self.starter_learning_rate - ) + atom_norm = 1.0 / (natoms[0]) + atom_norm_ener = 1.0 / (natoms[0]) + pref_e = find_energy * ( + self.limit_pref_e + + (self.start_pref_e - self.limit_pref_e) + * learning_rate + / self.starter_learning_rate + ) + pref_f = find_force * ( + self.limit_pref_f + + (self.start_pref_f - self.limit_pref_f) + * learning_rate + / self.starter_learning_rate + ) + pref_v = find_virial * ( + self.limit_pref_v + + (self.start_pref_v - self.limit_pref_v) + * learning_rate + / self.starter_learning_rate + ) + pref_ae = find_atom_ener * ( + self.limit_pref_ae + + (self.start_pref_ae - self.limit_pref_ae) + * learning_rate + / self.starter_learning_rate + ) + pref_pf = find_atom_pref * ( + self.limit_pref_pf + + (self.start_pref_pf - self.limit_pref_pf) + * learning_rate + / self.starter_learning_rate ) l2_loss = 0 more_loss = {} - if self.has_e: + # print(self.has_e) + # print(self.has_f) + # print(self.has_v) + # print(self.has_ae) + # print(self.has_pf) + if self.has_e: # true l2_loss += atom_norm_ener * (pref_e * l2_ener_loss) more_loss["l2_ener_loss"] = l2_ener_loss - if self.has_f: - l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss) + if self.has_f: # true + l2_loss += pref_f * l2_force_loss more_loss["l2_force_loss"] = l2_force_loss - if self.has_v: - l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss)) + if self.has_v: # false + l2_loss += atom_norm * (pref_v * l2_virial_loss) more_loss["l2_virial_loss"] = l2_virial_loss - if self.has_ae: - l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss) + if self.has_ae: # false + l2_loss += pref_ae * l2_atom_ener_loss more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss - if self.has_pf: - l2_loss += global_cvt_2_ener_float(pref_pf * l2_pref_force_loss) + if self.has_pf: # false + l2_loss += pref_pf * l2_pref_force_loss more_loss["l2_pref_force_loss"] = l2_pref_force_loss # only used when tensorboard was set as true - self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss)) - if self.has_e: - self.l2_loss_ener_summary = tf.summary.scalar( - "l2_ener_loss_" + suffix, - global_cvt_2_tf_float(tf.sqrt(l2_ener_loss)) - / global_cvt_2_tf_float(natoms[0]), - ) - if self.has_f: - self.l2_loss_force_summary = tf.summary.scalar( - "l2_force_loss_" + suffix, tf.sqrt(l2_force_loss) - ) - if self.has_v: - self.l2_loss_virial_summary = tf.summary.scalar( - "l2_virial_loss_" + suffix, - tf.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0]), - ) + # self.l2_loss_summary = paddle.summary.scalar("l2_loss_" + suffix, paddle.sqrt(l2_loss)) + # if self.has_e: + # self.l2_loss_ener_summary = paddle.summary.scalar( + # "l2_ener_loss_" + suffix, + # global_cvt_2_tf_float(paddle.sqrt(l2_ener_loss)) + # / global_cvt_2_tf_float(natoms[0]), + # ) + # if self.has_f: + # self.l2_loss_force_summary = paddle.summary.scalar( + # "l2_force_loss_" + suffix, paddle.sqrt(l2_force_loss) + # ) + # if self.has_v: + # self.l2_loss_virial_summary = paddle.summary.scalar( + # "l2_virial_loss_" + suffix, + # paddle.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0]), + # ) self.l2_l = l2_loss self.l2_more = more_loss return l2_loss, more_loss - def eval(self, sess, feed_dict, natoms): - placeholder = self.l2_l + def eval(self, model, batch_data, natoms): + # placeholder = self.l2_l + + model_inputs = {} + for kk in batch_data.keys(): + if kk == "find_type" or kk == "type": + continue + prec = "float64" + if "find_" in kk: + model_inputs[kk] = paddle.to_tensor(batch_data[kk], dtype="float64") + else: + model_inputs[kk] = paddle.to_tensor( + np.reshape(batch_data[kk], [-1]), dtype=prec + ) + + for ii in ["type"]: + model_inputs[ii] = paddle.to_tensor( + np.reshape(batch_data[ii], [-1]), dtype="int32" + ) + for ii in ["natoms_vec", "default_mesh"]: + model_inputs[ii] = paddle.to_tensor(batch_data[ii], dtype="int32") + model_inputs["is_training"] = paddle.to_tensor(False) + model_inputs["natoms_vec"] = paddle.to_tensor( + model_inputs["natoms_vec"], place="cpu" + ) + + model_pred = model( + model_inputs["coord"], + model_inputs["type"], + model_inputs["natoms_vec"], + model_inputs["box"], + model_inputs["default_mesh"], + model_inputs, + suffix="", + reuse=False, + ) + l2_l, l2_more = self.compute_loss( + # 0.0, natoms, model_dict, batch_data + 0.0, + model_inputs["natoms_vec"], + model_pred, + model_inputs, + suffix="test", + ) run_data = [ - self.l2_l, - self.l2_more["l2_ener_loss"] if self.has_e else placeholder, - self.l2_more["l2_force_loss"] if self.has_f else placeholder, - self.l2_more["l2_virial_loss"] if self.has_v else placeholder, - self.l2_more["l2_atom_ener_loss"] if self.has_ae else placeholder, - self.l2_more["l2_pref_force_loss"] if self.has_pf else placeholder, + (float(l2_l)), + (float(l2_more["l2_ener_loss"]) if self.has_e else 0.0), + (float(l2_more["l2_force_loss"]) if self.has_f else 0.0), + (float(l2_more["l2_virial_loss"]) if self.has_v else 0.0), + (float(l2_more["l2_atom_ener_loss"]) if self.has_ae else 0.0), + (float(l2_more["l2_pref_force_loss"]) if self.has_pf else 0.0), ] - error, error_e, error_f, error_v, error_ae, error_pf = run_sess( - sess, run_data, feed_dict=feed_dict - ) + error, error_e, error_f, error_v, error_ae, error_pf = run_data results = {"natoms": natoms[0], "rmse": np.sqrt(error)} if self.has_e: results["rmse_e"] = np.sqrt(error_e) / natoms[0] diff --git a/deepmd/loss/loss.py b/deepmd/loss/loss.py index f666445e6e..7a9d55e106 100644 --- a/deepmd/loss/loss.py +++ b/deepmd/loss/loss.py @@ -1,21 +1,15 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from typing import ( - Dict, - Tuple, -) +from abc import ABCMeta +from abc import abstractmethod +from typing import Dict +from typing import Tuple -from deepmd.env import ( - tf, -) +from deepmd.env import tf class Loss(metaclass=ABCMeta): """The abstract class for the loss function.""" - @abstractmethod + # @abstractmethod def build( self, learning_rate: tf.Tensor, @@ -46,6 +40,7 @@ def build( dict[str, tf.Tensor] A dictionary that maps loss keys to more loss tensors """ + pass @abstractmethod def eval( diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py index f9387c67fc..0c4e890e01 100644 --- a/deepmd/model/ener.py +++ b/deepmd/model/ener.py @@ -1,33 +1,22 @@ -from typing import ( - List, - Optional, -) +from typing import List +from typing import Optional import numpy as np -from deepmd.env import ( - MODEL_VERSION, - global_cvt_2_ener_float, - op_module, - tf, -) -from deepmd.utils.pair_tab import ( - PairTab, -) -from deepmd.utils.spin import ( - Spin, -) - -from .model import ( - Model, -) -from .model_stat import ( - make_stat_input, - merge_sys_stat, -) - - -class EnerModel(Model): +from deepmd.env import MODEL_VERSION +from deepmd.env import global_cvt_2_ener_float +from deepmd.env import op_module +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.utils.pair_tab import PairTab +from deepmd.utils.spin import Spin + +from .model import Model +from .model_stat import make_stat_input +from .model_stat import merge_sys_stat + + +class EnerModel(Model, paddle.nn.Layer): """Energy model. Parameters @@ -69,6 +58,8 @@ def __init__( sw_rmax: Optional[float] = None, spin: Optional[Spin] = None, ) -> None: + super().__init__() + # super(EnerModel, self).__init__(name_scope="EnerModel") """Constructor.""" # descriptor self.descrpt = descrpt @@ -97,6 +88,11 @@ def __init__( else: self.srtab = None + # self.type_map = " ".join(self.type_map) + self.t_tmap = " ".join(self.type_map) + self.t_mt = self.model_type + self.t_ver = MODEL_VERSION + def get_rcut(self): return self.rcut @@ -144,7 +140,7 @@ def _compute_output_stat(self, all_stat, mixed_type=False): else: self.fitting.compute_output_stats(all_stat) - def build( + def forward( self, coord_, atype_, @@ -157,172 +153,199 @@ def build( suffix="", reuse=None, ): + # print(__file__, coord_.shape) + # print(__file__, atype_.shape) + # print(__file__, natoms.shape) + # print(__file__, box.shape) + # print(__file__, mesh.shape) + # for k, v in input_dict.items(): + # print(f"{__file__} {k} {v.shape}") + if input_dict is None: input_dict = {} - with tf.variable_scope("model_attr" + suffix, reuse=reuse): - t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string) - t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string) - t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string) - - if self.srtab is not None: - tab_info, tab_data = self.srtab.get() - self.tab_info = tf.get_variable( - "t_tab_info", - tab_info.shape, - dtype=tf.float64, - trainable=False, - initializer=tf.constant_initializer(tab_info, dtype=tf.float64), - ) - self.tab_data = tf.get_variable( - "t_tab_data", - tab_data.shape, - dtype=tf.float64, - trainable=False, - initializer=tf.constant_initializer(tab_data, dtype=tf.float64), - ) - - coord = tf.reshape(coord_, [-1, natoms[1] * 3]) - atype = tf.reshape(atype_, [-1, natoms[1]]) - input_dict["nframes"] = tf.shape(coord)[0] + # if self.srtab is not None: + # tab_info, tab_data = self.srtab.get() + # self.tab_info = tf.get_variable( + # "t_tab_info", + # tab_info.shape, + # dtype=tf.float64, + # trainable=False, + # initializer=tf.constant_initializer(tab_info, dtype=tf.float64), + # ) + # self.tab_data = tf.get_variable( + # "t_tab_data", + # tab_data.shape, + # dtype=tf.float64, + # trainable=False, + # initializer=tf.constant_initializer(tab_data, dtype=tf.float64), + # ) + + coord = paddle.reshape(coord_, [-1, natoms[1] * 3]) + atype = paddle.reshape(atype_, [-1, natoms[1]]) + # input_dict["nframes"] = paddle.shape(coord)[0] # 推理模型导出的时候注释掉这里,否则会报错 # type embedding if any - if self.typeebd is not None: - type_embedding = self.typeebd.build( - self.ntypes, - reuse=reuse, - suffix=suffix, - ) - input_dict["type_embedding"] = type_embedding + # if self.typeebd is not None: + # type_embedding = self.typeebd.build( + # self.ntypes, + # reuse=reuse, + # suffix=suffix, + # ) + # input_dict["type_embedding"] = type_embedding # spin if any - if self.spin is not None: - type_spin = self.spin.build( - reuse=reuse, - suffix=suffix, - ) + # if self.spin is not None: + # type_spin = self.spin.build( + # reuse=reuse, + # suffix=suffix, + # ) input_dict["atype"] = atype_ - dout = self.build_descrpt( + dout = self.descrpt( coord, atype, natoms, box, mesh, input_dict, - frz_model=frz_model, - ckpt_meta=ckpt_meta, + # frz_model=frz_model, + # ckpt_meta=ckpt_meta, suffix=suffix, reuse=reuse, ) + # self.dout = dout - if self.srtab is not None: - nlist, rij, sel_a, sel_r = self.descrpt.get_nlist() - nnei_a = np.cumsum(sel_a)[-1] - nnei_r = np.cumsum(sel_r)[-1] + # if self.srtab is not None: + # nlist, rij, sel_a, sel_r = self.descrpt.get_nlist() + # nnei_a = np.cumsum(sel_a)[-1] + # nnei_r = np.cumsum(sel_r)[-1] - atom_ener = self.fitting.build( - dout, natoms, input_dict, reuse=reuse, suffix=suffix - ) + atom_ener = self.fitting(dout, natoms, input_dict, reuse=reuse, suffix=suffix) self.atom_ener = atom_ener - if self.srtab is not None: - sw_lambda, sw_deriv = op_module.soft_min_switch( - atype, - rij, - nlist, - natoms, - sel_a=sel_a, - sel_r=sel_r, - alpha=self.smin_alpha, - rmin=self.sw_rmin, - rmax=self.sw_rmax, - ) - inv_sw_lambda = 1.0 - sw_lambda - # NOTICE: - # atom energy is not scaled, - # force and virial are scaled - tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab( - self.tab_info, - self.tab_data, - atype, - rij, - nlist, - natoms, - sw_lambda, - sel_a=sel_a, - sel_r=sel_r, - ) - energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]]) - tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape( - tab_atom_ener, [-1] - ) - atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener - energy_raw = tab_atom_ener + atom_ener - else: - energy_raw = atom_ener + # if self.srtab is not None: + # sw_lambda, sw_deriv = op_module.soft_min_switch( + # atype, + # rij, + # nlist, + # natoms, + # sel_a=sel_a, + # sel_r=sel_r, + # alpha=self.smin_alpha, + # rmin=self.sw_rmin, + # rmax=self.sw_rmax, + # ) + # inv_sw_lambda = 1.0 - sw_lambda + # # NOTICE: + # # atom energy is not scaled, + # # force and virial are scaled + # tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab( + # self.tab_info, + # self.tab_data, + # atype, + # rij, + # nlist, + # natoms, + # sw_lambda, + # sel_a=sel_a, + # sel_r=sel_r, + # ) + # energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]]) + # tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape( + # tab_atom_ener, [-1] + # ) + # atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener + # energy_raw = tab_atom_ener + atom_ener + # else: + energy_raw = atom_ener nloc_atom = ( natoms[0] if self.spin is None - else tf.reduce_sum(natoms[2 : 2 + len(self.spin.use_spin)]) + else paddle.sum(natoms[2 : 2 + len(self.spin.use_spin)]).item() ) - energy_raw = tf.reshape( + energy_raw = paddle.reshape( energy_raw, [-1, nloc_atom], name="o_atom_energy" + suffix ) - energy = tf.reduce_sum( - global_cvt_2_ener_float(energy_raw), axis=1, name="o_energy" + suffix - ) + energy = paddle.sum(energy_raw, axis=1, name="o_energy" + suffix) force, virial, atom_virial = self.descrpt.prod_force_virial(atom_ener, natoms) - if self.srtab is not None: - sw_force = op_module.soft_min_force( - energy_diff, sw_deriv, nlist, natoms, n_a_sel=nnei_a, n_r_sel=nnei_r - ) - force = force + sw_force + tab_force + # if self.srtab is not None: + # sw_force = op_module.soft_min_force( + # energy_diff, sw_deriv, nlist, natoms, n_a_sel=nnei_a, n_r_sel=nnei_r + # ) + # force = force + sw_force + tab_force - force = tf.reshape(force, [-1, 3 * natoms[1]]) + force = paddle.reshape(force, [-1, 3 * natoms[1]]) if self.spin is not None: # split and concatenate force to compute local atom force and magnetic force - judge = tf.equal(natoms[0], natoms[1]) - force = tf.cond( + judge = paddle.equal(natoms[0], natoms[1]) + force = paddle.where( judge, - lambda: self.natoms_match(force, natoms), - lambda: self.natoms_not_match(force, natoms, atype), + self.natoms_match(force, natoms), + self.natoms_not_match(force, natoms, atype), ) - force = tf.reshape(force, [-1, 3 * natoms[1]], name="o_force" + suffix) - - if self.srtab is not None: - sw_virial, sw_atom_virial = op_module.soft_min_virial( - energy_diff, - sw_deriv, - rij, - nlist, - natoms, - n_a_sel=nnei_a, - n_r_sel=nnei_r, - ) - atom_virial = atom_virial + sw_atom_virial + tab_atom_virial - virial = ( - virial - + sw_virial - + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1) - ) - - virial = tf.reshape(virial, [-1, 9], name="o_virial" + suffix) - atom_virial = tf.reshape( + force = paddle.reshape(force, [-1, 3 * natoms[1]], name="o_force" + suffix) + + # if self.srtab is not None: + # sw_virial, sw_atom_virial = op_module.soft_min_virial( + # energy_diff, + # sw_deriv, + # rij, + # nlist, + # natoms, + # n_a_sel=nnei_a, + # n_r_sel=nnei_r, + # ) + # atom_virial = atom_virial + sw_atom_virial + tab_atom_virial + # virial = ( + # virial + # + sw_virial + # + tf.sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1) + # ) + + virial = paddle.reshape(virial, [-1, 9], name="o_virial" + suffix) + atom_virial = paddle.reshape( atom_virial, [-1, 9 * natoms[1]], name="o_atom_virial" + suffix ) model_dict = {} - model_dict["energy"] = energy - model_dict["force"] = force - model_dict["virial"] = virial - model_dict["atom_ener"] = energy_raw - model_dict["atom_virial"] = atom_virial - model_dict["coord"] = coord - model_dict["atype"] = atype - + model_dict["energy"] = energy # [5] + model_dict["force"] = force # [5, 576] + model_dict["virial"] = virial # [5, 9] + model_dict["atom_ener"] = energy_raw # [5, 192] + model_dict["atom_virial"] = atom_virial # [5, 1728] + model_dict["coord"] = coord # [5, 576] + model_dict["atype"] = atype # [5, 192] + + # model_dict["zdebug1"] = self.descrpt.descrpt + # model_dict["zdebug2"] = self.descrpt.descrpt_deriv + # model_dict["zdebug3"] = self.descrpt.rij + # model_dict["zdebug4"] = self.descrpt.nlist + # model_dict["zdebug5"] = self.descrpt.dout + # model_dict["zdebug6"] = self.descrpt.qmat + # model_dict["zdebug7"] = self.descrpt.xyz_scatter_input + # model_dict["zdebug8"] = self.descrpt.xyz_scatter_output + + # model_dict["zdebug9"] = self.descrpt.debug_inputs + # model_dict["zdebug99"] = self.descrpt.debug_inputs_i + # model_dict["zdebug999"] = self.descrpt.debug_inputs_reshape + # model_dict["zdebug9999"] = self.descrpt.debug_xyz_scatter + # model_dict["zdebug99999"] = self.descrpt.debug_xyz_scatter_input + # model_dict["zdebug999999"] = self.descrpt.debug_xyz_scatter_output + + # model_dict["z00_hidden1"] = self.descrpt.embedding_nets[0][0].hidden1 + # model_dict["z00_hidden2"] = self.descrpt.embedding_nets[0][0].hidden2 + # model_dict["z00_hidden3"] = self.descrpt.embedding_nets[0][0].hidden3 + # model_dict["z00_xx1"] = self.descrpt.embedding_nets[0][0].xx1 + # model_dict["z00_xx2"] = self.descrpt.embedding_nets[0][0].xx2 + # model_dict["z00_xx3"] = self.descrpt.embedding_nets[0][0].xx3 + # model_dict["z00_xx4"] = self.descrpt.embedding_nets[0][0].xx4 + # model_dict["z00_0"] = self.descrpt.embedding_nets[0][0].weight[0] + # model_dict["z00_1"] = self.descrpt.embedding_nets[0][0].bias[0] + # model_dict["z00_2"] = self.descrpt.embedding_nets[0][0].xx1 + # model_dict["z00_3"] = self.descrpt.embedding_nets[0][0].hidden1 return model_dict def init_variables( diff --git a/deepmd/model/model.py b/deepmd/model/model.py index 8e6ffad910..660e30dbce 100644 --- a/deepmd/model/model.py +++ b/deepmd/model/model.py @@ -1,27 +1,17 @@ -from abc import ( - ABC, - abstractmethod, -) -from enum import ( - Enum, -) -from typing import ( - List, - Optional, - Union, -) +from abc import ABC +from abc import abstractmethod +from enum import Enum +from typing import List +from typing import Optional +from typing import Union -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - tf, -) -from deepmd.utils.graph import ( - load_graph_def, -) +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import tf +from deepmd.utils.graph import load_graph_def class Model(ABC): - @abstractmethod + # @abstractmethod def build( self, coord_: tf.Tensor, diff --git a/deepmd/paddle_ops.egg-info/PKG-INFO b/deepmd/paddle_ops.egg-info/PKG-INFO new file mode 100644 index 0000000000..08ad719487 --- /dev/null +++ b/deepmd/paddle_ops.egg-info/PKG-INFO @@ -0,0 +1,3 @@ +Metadata-Version: 2.1 +Name: paddle-ops +Version: 0.0.0 diff --git a/deepmd/paddle_ops.egg-info/SOURCES.txt b/deepmd/paddle_ops.egg-info/SOURCES.txt new file mode 100644 index 0000000000..8933b93cb1 --- /dev/null +++ b/deepmd/paddle_ops.egg-info/SOURCES.txt @@ -0,0 +1,12 @@ +load_paddle_op.py +../source/op/paddle_ops/srcs/pd_prod_env_mat_multi_devices_cpu.cc +../source/op/paddle_ops/srcs/pd_prod_env_mat_multi_devices_cuda.cc +../source/op/paddle_ops/srcs/pd_prod_force_se_a_multi_devices_cpu.cc +../source/op/paddle_ops/srcs/pd_prod_force_se_a_multi_devices_cuda.cc +../source/op/paddle_ops/srcs/pd_prod_virial_se_a_multi_devices_cpu.cc +../source/op/paddle_ops/srcs/pd_prod_virial_se_a_multi_devices_cuda.cc +paddle_ops.egg-info/PKG-INFO +paddle_ops.egg-info/SOURCES.txt +paddle_ops.egg-info/dependency_links.txt +paddle_ops.egg-info/not-zip-safe +paddle_ops.egg-info/top_level.txt \ No newline at end of file diff --git a/deepmd/paddle_ops.egg-info/dependency_links.txt b/deepmd/paddle_ops.egg-info/dependency_links.txt new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/deepmd/paddle_ops.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/deepmd/paddle_ops.egg-info/not-zip-safe b/deepmd/paddle_ops.egg-info/not-zip-safe new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/deepmd/paddle_ops.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/deepmd/paddle_ops.egg-info/top_level.txt b/deepmd/paddle_ops.egg-info/top_level.txt new file mode 100644 index 0000000000..7a1d7479ce --- /dev/null +++ b/deepmd/paddle_ops.egg-info/top_level.txt @@ -0,0 +1 @@ +paddle_ops diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 580d434533..04e159d55e 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -5,89 +5,56 @@ import platform import shutil import time -from typing import ( - Dict, - List, -) +from typing import Dict +from typing import List import google.protobuf.message import numpy as np -from packaging.version import ( - Version, -) -from tensorflow.python.client import ( - timeline, -) +from packaging.version import Version +from tensorflow.python.client import timeline # load grad of force module import deepmd.op # noqa: F401 -from deepmd.common import ( - data_requirement, - get_precision, - j_must_have, -) -from deepmd.descriptor.descriptor import ( - Descriptor, -) -from deepmd.env import ( - GLOBAL_ENER_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - TF_VERSION, - get_tf_session_config, - tf, - tfv2, -) -from deepmd.fit import ( - Fitting, -) -from deepmd.loss import ( - DOSLoss, - EnerDipoleLoss, - EnerSpinLoss, - EnerStdLoss, - TensorLoss, -) -from deepmd.model import ( - DipoleModel, - DOSModel, - EnerModel, - MultiModel, - PolarModel, -) +from deepmd.common import data_requirement +from deepmd.common import get_precision +from deepmd.common import j_must_have +from deepmd.descriptor.descriptor import Descriptor +from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION +from deepmd.env import GLOBAL_PD_FLOAT_PRECISION +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import TF_VERSION +from deepmd.env import get_tf_session_config +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.env import tfv2 +from deepmd.fit import Fitting +from deepmd.fit import ener +from deepmd.loss import DOSLoss +from deepmd.loss import EnerDipoleLoss +from deepmd.loss import EnerSpinLoss +from deepmd.loss import EnerStdLoss +from deepmd.loss import TensorLoss +from deepmd.model import DipoleModel +from deepmd.model import DOSModel +from deepmd.model import EnerModel +from deepmd.model import MultiModel +from deepmd.model import PolarModel from deepmd.utils import random as dp_random -from deepmd.utils.argcheck import ( - type_embedding_args, -) -from deepmd.utils.data_system import ( - DeepmdDataSystem, -) -from deepmd.utils.errors import ( - GraphTooLargeError, - GraphWithoutTensorError, -) -from deepmd.utils.graph import ( - get_tensor_by_name_from_graph, - load_graph_def, -) -from deepmd.utils.learning_rate import ( - LearningRateExp, -) -from deepmd.utils.sess import ( - run_sess, -) -from deepmd.utils.spin import ( - Spin, -) -from deepmd.utils.type_embed import ( - TypeEmbedNet, -) +from deepmd.utils.argcheck import type_embedding_args +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.errors import GraphTooLargeError +from deepmd.utils.errors import GraphWithoutTensorError +from deepmd.utils.graph import get_tensor_by_name_from_graph +from deepmd.utils.graph import load_graph_def +from deepmd.utils.learning_rate import LearningRateExp +from deepmd.utils.sess import run_sess +from deepmd.utils.spin import Spin +from deepmd.utils.type_embed import TypeEmbedNet log = logging.getLogger(__name__) # nvnmd -from deepmd.nvnmd.utils.config import ( - nvnmd_cfg, -) +from deepmd.nvnmd.utils.config import nvnmd_cfg def _is_subdir(path, directory): @@ -158,7 +125,8 @@ def _init_param(self, jdata): descrpt_param["multi_task"] = True if descrpt_param["type"] in ["se_e2_a", "se_a", "se_e2_r", "se_r", "hybrid"]: descrpt_param["spin"] = self.spin - self.descrpt = Descriptor(**descrpt_param) + descrpt_param.pop("type") + self.descrpt = deepmd.descriptor.se_a.DescrptSeA(**descrpt_param) # fitting net if not self.multi_task_mode: @@ -167,7 +135,8 @@ def _init_param(self, jdata): fitting_param["descrpt"] = self.descrpt if fitting_type == "ener": fitting_param["spin"] = self.spin - self.fitting = Fitting(**fitting_param) + fitting_param.pop("type") + self.fitting = ener.EnerFitting(**fitting_param) else: self.fitting_dict = {} self.fitting_type_dict = {} @@ -316,7 +285,7 @@ def get_lr_and_coef(lr_param): # loss # infer loss type by fitting_type - def loss_init(_loss_param, _fitting_type, _fitting, _lr): + def loss_init(_loss_param, _fitting_type, _fitting, _lr) -> EnerStdLoss: _loss_type = _loss_param.get("type", "ener") if _fitting_type == "ener": _loss_param.pop("type", None) @@ -576,10 +545,10 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""): # for fparam or aparam settings in 'ener' type fitting net self.fitting.init_variables(graph, graph_def) - if self.is_compress or self.model_type == "compressed_model": - tf.constant("compressed_model", name="model_type", dtype=tf.string) - else: - tf.constant("original_model", name="model_type", dtype=tf.string) + # if self.is_compress or self.model_type == "compressed_model": + # tf.constant("compressed_model", name="model_type", dtype=tf.string) + # else: + # tf.constant("original_model", name="model_type", dtype=tf.string) if self.mixed_prec is not None: self.descrpt.enable_mixed_precision(self.mixed_prec) @@ -593,17 +562,17 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""): self._build_lr() self._build_network(data, suffix) - self._build_training() + # self._build_training() def _build_lr(self): - self._extra_train_ops = [] - self.global_step = tf.train.get_or_create_global_step() + # self._extra_train_ops = [] + self.global_step = 0 if not self.multi_task_mode: self.learning_rate = self.lr.build(self.global_step, self.stop_batch) else: self.learning_rate_dict = {} for fitting_key in self.fitting_type_dict: - self.learning_rate_dict[fitting_key] = self.lr_dict[fitting_key].build( + self.lr_scheduler[fitting_key] = self.lr.build( self.global_step, self.stop_batch ) @@ -678,7 +647,7 @@ def _build_network(self, data, suffix=""): reuse=False, ) - self.l2_l, self.l2_more = self._build_loss() + # self.l2_l, self.l2_more = self._build_loss() log.info("built network") @@ -813,12 +782,12 @@ def _init_session(self): log.info("receive global variables from task#0") run_sess(self.sess, bcast_op) - def train(self, train_data=None, valid_data=None): + def train(self, train_data=None, valid_data=None, stop_batch: int = 10): # if valid_data is None: # no validation set specified. # valid_data = train_data # using training set as validation set. - stop_batch = self.stop_batch - self._init_session() + # stop_batch = self.stop_batch + # self._init_session() # Before data shard is enabled, only cheif do evaluation and record it # self.print_head() @@ -826,15 +795,18 @@ def train(self, train_data=None, valid_data=None): if self.run_opt.is_chief: fp = open(self.disp_file, "a") - cur_batch = run_sess(self.sess, self.global_step) + cur_batch = self.global_step is_first_step = True self.cur_batch = cur_batch + self.optimizer = paddle.optimizer.Adam( + learning_rate=self.learning_rate, parameters=self.model.parameters() + ) if not self.multi_task_mode: log.info( "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % ( - run_sess(self.sess, self.learning_rate), - self.lr.value(cur_batch), + self.learning_rate.get_lr(), + self.learning_rate.get_lr(), self.lr.decay_steps_, self.lr.decay_rate_, self.lr.value(stop_batch), @@ -846,56 +818,51 @@ def train(self, train_data=None, valid_data=None): "%s: start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % ( fitting_key, - run_sess(self.sess, self.learning_rate_dict[fitting_key]), - self.lr_dict[fitting_key].value(cur_batch), + self.learning_rate[fitting_key].base_lr, + self.lr_dict[fitting_key].get_lr(), self.lr_dict[fitting_key].decay_steps_, self.lr_dict[fitting_key].decay_rate_, self.lr_dict[fitting_key].value(stop_batch), ) ) - prf_options = None - prf_run_metadata = None - if self.profiling: - prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - prf_run_metadata = tf.RunMetadata() + # prf_options = None + # prf_run_metadata = None + # if self.profiling: + # prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + # prf_run_metadata = tf.RunMetadata() # set tensorboard execution environment - if self.tensorboard: - summary_merged_op = tf.summary.merge_all() - # Remove TB old logging directory from previous run - try: - shutil.rmtree(self.tensorboard_log_dir) - except FileNotFoundError: - pass # directory does not exist, this is OK - except Exception as e: - # general error when removing directory, warn user - log.exception( - f"Could not remove old tensorboard logging directory: " - f"{self.tensorboard_log_dir}. Error: {e}" - ) - else: - log.debug("Removing old tensorboard log directory.") - tb_train_writer = tf.summary.FileWriter( - self.tensorboard_log_dir + "/train", self.sess.graph - ) - tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + "/test") - else: - tb_train_writer = None - tb_valid_writer = None - if self.enable_profiler: - # https://www.tensorflow.org/guide/profiler - tfv2.profiler.experimental.start(self.tensorboard_log_dir) + # if self.tensorboard: + # summary_merged_op = tf.summary.merge_all() + # # Remove TB old logging directory from previous run + # try: + # shutil.rmtree(self.tensorboard_log_dir) + # except FileNotFoundError: + # pass # directory does not exist, this is OK + # except Exception as e: + # # general error when removing directory, warn user + # log.exception( + # f"Could not remove old tensorboard logging directory: " + # f"{self.tensorboard_log_dir}. Error: {e}" + # ) + # else: + # log.debug("Removing old tensorboard log directory.") + # tb_train_writer = tf.summary.FileWriter( + # self.tensorboard_log_dir + "/train", self.sess.graph + # ) + # tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + "/test") + # else: + # tb_train_writer = None + # tb_valid_writer = None + # if self.enable_profiler: + # # https://www.tensorflow.org/guide/profiler + # tfv2.profiler.experimental.start(self.tensorboard_log_dir) train_time = 0 total_train_time = 0.0 wall_time_tic = time.time() - next_batch_train_op = None - next_fitting_key = None - next_train_batch_list = None - next_datasetloader = None - # dataset loader op if not self.multi_task_mode: datasetloader = DatasetLoader(train_data) @@ -908,35 +875,36 @@ def train(self, train_data=None, valid_data=None): data_op[fitting_key] = datasetloader[fitting_key].build() while cur_batch < stop_batch: + train_batch = datasetloader.get_data_dict() # first round validation: - if is_first_step: - if not self.multi_task_mode: - train_batch = train_data.get_batch() - batch_train_op = self.train_op - else: - fitting_idx = dp_random.choice( - np.arange(self.nfitting), p=np.array(self.fitting_prob) - ) - fitting_key = self.fitting_key_list[fitting_idx] - train_batch = train_data[fitting_key].get_batch() - batch_train_op = self.train_op[fitting_key] - else: - train_batch = next_datasetloader.get_data_dict(next_train_batch_list) - batch_train_op = next_batch_train_op - fitting_key = next_fitting_key + # if is_first_step: + # if not self.multi_task_mode: + # train_batch = train_data.get_batch() + # # batch_train_op = self.train_op + # else: + # fitting_idx = dp_random.choice( + # np.arange(self.nfitting), p=np.array(self.fitting_prob) + # ) + # fitting_key = self.fitting_key_list[fitting_idx] + # train_batch = train_data[fitting_key].get_batch() + # # batch_train_op = self.train_op[fitting_key] + # else: + # train_batch = next_datasetloader.get_data_dict(next_train_batch_list) + # # batch_train_op = next_batch_train_op + # fitting_key = next_fitting_key # for next round - if not self.multi_task_mode: - next_datasetloader = datasetloader - next_batch_train_op = self.train_op - next_train_batch_op = data_op - else: - fitting_idx = dp_random.choice( - np.arange(self.nfitting), p=np.array(self.fitting_prob) - ) - next_fitting_key = self.fitting_key_list[fitting_idx] - next_datasetloader = datasetloader[next_fitting_key] - next_batch_train_op = self.train_op[fitting_key] - next_train_batch_op = data_op[fitting_key] + # if not self.multi_task_mode: + # next_datasetloader = datasetloader + # next_batch_train_op = self.train_op + # next_train_batch_op = data_op + # else: + # fitting_idx = dp_random.choice( + # np.arange(self.nfitting), p=np.array(self.fitting_prob) + # ) + # next_fitting_key = self.fitting_key_list[fitting_idx] + # next_datasetloader = datasetloader[next_fitting_key] + # next_batch_train_op = self.train_op[fitting_key] + # next_train_batch_op = data_op[fitting_key] if self.display_in_training and is_first_step: if self.run_opt.is_chief: @@ -982,32 +950,180 @@ def train(self, train_data=None, valid_data=None): if self.timing_in_training: tic = time.time() - train_feed_dict = self.get_feed_dict(train_batch, is_training=True) + # train_feed_dict = self.get_feed_dict(train_batch, is_training=True) # use tensorboard to visualize the training of deepmd-kit # it will takes some extra execution time to generate the tensorboard data if self.tensorboard and (cur_batch % self.tensorboard_freq == 0): - summary, _, next_train_batch_list = run_sess( - self.sess, - [summary_merged_op, batch_train_op, next_train_batch_op], - feed_dict=train_feed_dict, - options=prf_options, - run_metadata=prf_run_metadata, + # summary, _, next_train_batch_list = run_sess( + # self.sess, + # [summary_merged_op, batch_train_op, next_train_batch_op], + # feed_dict=train_feed_dict, + # options=prf_options, + # run_metadata=prf_run_metadata, + # ) + # tb_train_writer.add_summary(summary, cur_batch) + model_pred = self.model( + paddle.to_tensor(train_batch["coord"], "float32"), + paddle.to_tensor(train_batch["type"], "int32"), + paddle.to_tensor(train_batch["natoms_vec"], "int32", "cpu"), + paddle.to_tensor(train_batch["box"], "float32"), + paddle.to_tensor(train_batch["default_mesh"], "int32"), + train_batch, + suffix="", + reuse=False, ) - tb_train_writer.add_summary(summary, cur_batch) else: - _, next_train_batch_list = run_sess( - self.sess, - [batch_train_op, next_train_batch_op], - feed_dict=train_feed_dict, - options=prf_options, - run_metadata=prf_run_metadata, + # for k, v in train_feed_dict.items(): + # print(f"{k} {v.shape if hasattr(v, 'shape') else v}") + """ + find_box:0", dtype=float32) () + find_coord:0", dtype=float32) () + find_numb_copy:0", dtype=float32) () + find_energy:0", dtype=float32) () + find_force:0", dtype=float32) () + find_virial:0", dtype=float32) () + find_atom_ener:0", dtype=float32) () + find_atom_pref:0", dtype=float32) () + box:0", shape=(?,), dtype=float64) (9,) + coord:0", shape=(?,), dtype=float64) (576,) + numb_copy:0", shape=(?,), dtype=float64) (1,) + energy:0", shape=(?,), dtype=float64) (1,) + force:0", shape=(?,), dtype=float64) (576,) + virial:0", shape=(?,), dtype=float64) (9,) + atom_ener:0", shape=(?,), dtype=float64) (192,) + atom_pref:0", shape=(?,), dtype=float64) (576,) + natoms:0", shape=(4,), dtype=int32) (4,) + mesh:0", shape=(?,), dtype=int32) (6,) + type:0", shape=(?,), dtype=int32) (192,) + aceholder:0", dtype=bool) True + """ + model_inputs = {} + for kk in train_batch.keys(): + if kk == "find_type" or kk == "type": + continue + prec = "float64" + if "find_" in kk: + model_inputs[kk] = paddle.to_tensor( + train_batch[kk], dtype="float64" + ) + else: + model_inputs[kk] = paddle.to_tensor( + np.reshape(train_batch[kk], [-1]), dtype=prec + ) + + for ii in ["type"]: + model_inputs[ii] = paddle.to_tensor( + np.reshape(train_batch[ii], [-1]), dtype="int32" + ) + for ii in ["natoms_vec", "default_mesh"]: + model_inputs[ii] = paddle.to_tensor(train_batch[ii], dtype="int32") + model_inputs["is_training"] = paddle.to_tensor(True) + model_inputs["natoms_vec"] = paddle.to_tensor( + model_inputs["natoms_vec"], place="cpu" + ) + # for k, v in model_inputs.items(): + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # f"deepmd-kit/examples/water/se_e2_a/align_input/{k}", + # v, + # ) + # exit() + # { + # find_box: [] + # box: [9] + # find_coord: [] + # coord: [576] + # find_numb_copy: [] + # numb_copy: [1] + # find_energy: [] + # energy: [1] + # find_force: [] + # force: [576] + # find_virial: [] + # virial: [9] + # find_atom_ener: [] + # atom_ener: [192] + # find_atom_pref: [] + # atom_pref: [576] + # natoms_vec: [4] + # default_mesh: [6] + # type: [192] + # is_training: [] + # } + model_pred = self.model( + model_inputs["coord"], + model_inputs["type"], + model_inputs["natoms_vec"], + model_inputs["box"], + model_inputs["default_mesh"], + model_inputs, + suffix="", + reuse=False, + ) + # for k, v in model_pred.items(): + # np.save( + # "/workspace/hesensen/deepmd_backend/" + # f"deepmd-kit/examples/water/se_e2_a/align_input/pred_{k}", + # v, + # ) + # exit() + + # loss = ( + # model_pred["force"].sum() + # + model_pred["virial"].sum() + # + model_pred["energy"].sum() + # + model_pred["atom_ener"].sum() + # ) + # print(f"{self.cur_batch} {self.learning_rate.get_lr():.10f}") + l2_l, l2_more = self.loss.compute_loss( + self.learning_rate.get_lr(), + model_inputs["natoms_vec"], + model_pred, + model_inputs, + suffix="train", ) + + self.optimizer.clear_grad() + l2_l.backward() + self.optimizer.step() + self.global_step += 1 + + # _, next_train_batch_list = run_sess( + # self.sess, + # [batch_train_op, next_train_batch_op], + # feed_dict=train_feed_dict, + # options=prf_options, + # run_metadata=prf_run_metadata, + # ) + """next_train_batch_list + find_box (): none + box (1, 9): (1, 9) + find_coord (): none + coord (1, 576): (1, 576) + find_numb_copy (): none + numb_copy (1, 1): (1, 1) + find_energy (): none + energy (1, 1): (1, 1) + find_force (): none + force (1, 576): (1, 576) + find_virial (): none + virial (1, 9): (1, 9) + find_atom_ener (): none + atom_ener (1, 192): (1, 192) + find_atom_pref (): none + atom_pref (1, 576): (1, 576) + type (1, 192): (1, 192) + natoms_vec (4,): (4,) + default_mesh (6,): (6,) + """ if self.timing_in_training: toc = time.time() if self.timing_in_training: train_time += toc - tic - cur_batch = run_sess(self.sess, self.global_step) + cur_batch = self.global_step self.cur_batch = cur_batch + if (cur_batch % self.lr.decay_steps_) == 0: + self.learning_rate.step() # on-the-fly validation if self.display_in_training and (cur_batch % self.disp_freq == 0): @@ -1060,12 +1176,10 @@ def train(self, train_data=None, valid_data=None): if ( self.save_freq > 0 and cur_batch % self.save_freq == 0 - and self.saver is not None + # and self.saver is not None ): self.save_checkpoint(cur_batch) - if ( - self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0 - ) and self.saver is not None: + if self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0: self.save_checkpoint(cur_batch) if self.run_opt.is_chief: fp.close() @@ -1083,42 +1197,44 @@ def train(self, train_data=None, valid_data=None): total_train_time / (stop_batch // self.disp_freq * self.disp_freq), ) - if self.profiling and self.run_opt.is_chief: - fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats) - chrome_trace = fetched_timeline.generate_chrome_trace_format() - with open(self.profiling_file, "w") as f: - f.write(chrome_trace) - if self.enable_profiler and self.run_opt.is_chief: - tfv2.profiler.experimental.stop() + # if self.profiling and self.run_opt.is_chief: + # fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats) + # chrome_trace = fetched_timeline.generate_chrome_trace_format() + # with open(self.profiling_file, "w") as f: + # f.write(chrome_trace) + # if self.enable_profiler and self.run_opt.is_chief: + # tfv2.profiler.experimental.stop() def save_checkpoint(self, cur_batch: int): - try: - ckpt_prefix = self.saver.save( - self.sess, - os.path.join(os.getcwd(), self.save_ckpt), - global_step=cur_batch, - ) - except google.protobuf.message.DecodeError as e: - raise GraphTooLargeError( - "The graph size exceeds 2 GB, the hard limitation of protobuf." - " Then a DecodeError was raised by protobuf. You should " - "reduce the size of your model." - ) from e - # make symlinks from prefix with step to that without step to break nothing - # get all checkpoint files - original_files = glob.glob(ckpt_prefix + ".*") - for ori_ff in original_files: - new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :] - try: - # remove old one - os.remove(new_ff) - except OSError: - pass - if platform.system() != "Windows": - # by default one does not have access to create symlink on Windows - os.symlink(ori_ff, new_ff) - else: - shutil.copyfile(ori_ff, new_ff) + # try: + # ckpt_prefix = self.saver.save( + # self.sess, + # os.path.join(os.getcwd(), self.save_ckpt), + # global_step=cur_batch, + # ) + # except google.protobuf.message.DecodeError as e: + # raise GraphTooLargeError( + # "The graph size exceeds 2 GB, the hard limitation of protobuf." + # " Then a DecodeError was raised by protobuf. You should " + # "reduce the size of your model." + # ) from e + # # make symlinks from prefix with step to that without step to break nothing + # # get all checkpoint files + # original_files = glob.glob(ckpt_prefix + ".*") + # for ori_ff in original_files: + # new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :] + # try: + # # remove old one + # os.remove(new_ff) + # except OSError: + # pass + # if platform.system() != "Windows": + # # by default one does not have access to create symlink on Windows + # os.symlink(ori_ff, new_ff) + # else: + # shutil.copyfile(ori_ff, new_ff) + paddle.save(self.model.state_dict(), f"Model_{cur_batch}.pdparams") + paddle.save(self.optimizer.state_dict(), f"Optimier_{cur_batch}.pdopt") log.info("saved checkpoint %s" % self.save_ckpt) def get_feed_dict(self, batch, is_training): @@ -1127,18 +1243,18 @@ def get_feed_dict(self, batch, is_training): if kk == "find_type" or kk == "type" or kk == "real_natoms_vec": continue if "find_" in kk: - feed_dict[self.place_holders[kk]] = batch[kk] + feed_dict[kk] = batch[kk] else: - feed_dict[self.place_holders[kk]] = np.reshape(batch[kk], [-1]) + feed_dict[kk] = np.reshape(batch[kk], [-1]) for ii in ["type"]: - feed_dict[self.place_holders[ii]] = np.reshape(batch[ii], [-1]) + feed_dict[ii] = np.reshape(batch[ii], [-1]) for ii in ["natoms_vec", "default_mesh"]: - feed_dict[self.place_holders[ii]] = batch[ii] - feed_dict[self.place_holders["is_training"]] = is_training + feed_dict[ii] = batch[ii] + feed_dict["is_training"] = is_training return feed_dict def get_global_step(self): - return run_sess(self.sess, self.global_step) + return self.global_step # def print_head (self) : # depreciated # if self.run_opt.is_chief: @@ -1157,7 +1273,7 @@ def valid_on_the_fly( cur_batch = self.cur_batch if not self.multi_task_mode: - current_lr = run_sess(self.sess, self.learning_rate) + current_lr = self.learning_rate.get_lr() else: assert ( fitting_key is not None @@ -1263,8 +1379,8 @@ def print_on_training( fp.write(print_str) fp.flush() - @staticmethod - def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix=""): + # @staticmethod + def eval_single_list(self, single_batch_list, loss, prefix=""): if single_batch_list is None: return None numb_batch = len(single_batch_list) @@ -1273,8 +1389,8 @@ def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix=" for i in range(numb_batch): batch = single_batch_list[i] natoms = batch["natoms_vec"] - feed_dict = get_feed_dict_func(batch, is_training=False) - results = loss.eval(sess, feed_dict, natoms) + # feed_dict = get_feed_dict_func(batch, is_training=False) + results = loss.eval(self.model, batch, natoms) for k, v in results.items(): if k == "natoms": @@ -1290,9 +1406,7 @@ def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix=" def get_evaluation_results(self, batch_list): if not self.multi_task_mode: - avg_results = self.eval_single_list( - batch_list, self.loss, self.sess, self.get_feed_dict - ) + avg_results = self.eval_single_list(batch_list, self.loss) else: avg_results = {} for fitting_key in batch_list: @@ -1474,9 +1588,11 @@ def get_train_batch() -> List[np.ndarray]: batch_data = tuple([batch_data[kk] for kk in self.data_keys]) return batch_data - return tf.py_func(get_train_batch, [], self.data_types, name="train_data") + return get_train_batch - def get_data_dict(self, batch_list: List[np.ndarray]) -> Dict[str, np.ndarray]: + def get_data_dict( + self, batch_list: List[np.ndarray] = None + ) -> Dict[str, np.ndarray]: """Generate a dict of the loaded data. Parameters @@ -1489,4 +1605,8 @@ def get_data_dict(self, batch_list: List[np.ndarray]) -> Dict[str, np.ndarray]: Dict[str, np.ndarray] The dict of the loaded data. """ - return {kk: vv for kk, vv in zip(self.data_keys, batch_list)} + batch_data = self.train_data.get_batch() + # convert dict to list of arryas + batch_data = tuple([batch_data[kk] for kk in self.data_keys]) + return {kk: vv for kk, vv in zip(self.data_keys, batch_data)} + # return {kk: vv for kk, vv in zip(self.data_keys, batch_list)} diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index f393618cb1..5b55261d06 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -1,18 +1,12 @@ import logging import os -from typing import ( - Callable, - Tuple, -) +from typing import Callable +from typing import Tuple import numpy as np -from deepmd.env import ( - tf, -) -from deepmd.utils.errors import ( - OutOfMemoryError, -) +from deepmd.env import tf +from deepmd.utils.errors import OutOfMemoryError log = logging.getLogger(__name__) @@ -100,9 +94,11 @@ def execute( OOM when batch size is 1 """ try: + # print(__file__, self.current_batch_size, natoms) n_batch, result = callable( max(self.current_batch_size // natoms, 1), start_index ) + # print(__file__, n_batch) except OutOfMemoryError as e: # TODO: it's very slow to catch OOM error; I don't know what TF is doing here # but luckily we only need to catch once @@ -196,6 +192,7 @@ def execute_with_batch_size( for rr in result: rr.reshape((n_batch, -1)) results.append(result) + # print(__file__, "here") r = tuple([np.concatenate(r, axis=0) for r in zip(*results)]) if len(r) == 1: diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 16fcbfc7c5..8ffb7bc58a 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -1,21 +1,15 @@ #!/usr/bin/env python3 import logging -from typing import ( - List, - Optional, -) +from typing import List +from typing import Optional import numpy as np -from deepmd.env import ( - GLOBAL_ENER_FLOAT_PRECISION, - GLOBAL_NP_FLOAT_PRECISION, -) +from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION from deepmd.utils import random as dp_random -from deepmd.utils.path import ( - DPPath, -) +from deepmd.utils.path import DPPath log = logging.getLogger(__name__) @@ -458,7 +452,7 @@ def _load_set(self, set_name: DPPath): self.data_dict[kk]["ndof"], atomic=self.data_dict[kk]["atomic"], high_prec=self.data_dict[kk]["high_prec"], - must=self.data_dict[kk]["must"], + must=False, type_sel=self.data_dict[kk]["type_sel"], repeat=self.data_dict[kk]["repeat"], default=self.data_dict[kk]["default"], diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py index 324f4f7fff..315138010e 100644 --- a/deepmd/utils/learning_rate.py +++ b/deepmd/utils/learning_rate.py @@ -1,12 +1,10 @@ -from typing import ( - Optional, -) +from typing import Optional import numpy as np +from paddle.optimizer import lr -from deepmd.env import ( - tf, -) +from deepmd.env import paddle +from deepmd.env import tf class LearningRateExp: @@ -89,13 +87,16 @@ def build( np.log(self.stop_lr_ / self.start_lr_) / (stop_step / self.decay_steps_) ) - return tf.train.exponential_decay( + # print("decay_steps_ = ", self.decay_steps_) + return lr.ExponentialDecay( self.start_lr_, - global_step, - self.decay_steps_, - self.decay_rate_, - staircase=True, + gamma=self.decay_rate_, ) + # return paddle.optimizer.lr.ExponentialDecay( + # learning_rate=self.start_lr_, + # gamma=self.decay_rate_ ** (1 / self.decay_steps_), + # # verbose=True, + # ) def start_lr(self) -> float: """Get the start lr.""" diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index 9b23bc9d76..12ef281ef7 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -1,24 +1,25 @@ import logging import math -from typing import ( - List, - Tuple, -) +from typing import List +from typing import Tuple import numpy as np -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - default_tf_session_config, - op_module, - tf, -) -from deepmd.utils.data_system import ( - DeepmdDataSystem, -) -from deepmd.utils.parallel_op import ( - ParallelOp, -) +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +from deepmd.env import default_tf_session_config +from deepmd.env import op_module +from deepmd.env import tf + +# from paddle.utils import cpp_extension +# op_module = cpp_extension.load( +# name="custom_op_paddle2", +# sources=["/workspace/hesensen/deepmd-kit/source/op/paddle/neighbor_stat.cc"], +# extra_include_paths=["/workspace/hesensen/deepmd-kit/source/lib/include/","/usr/local/cuda/targets/x86_64-linux/include/", "/workspace/hesensen/deepmd-kit/source/op"], +# # extra_library_paths=["../build/lib/", "/usr/local/cuda/lib64"], +# verbose=True, +# ) +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.parallel_op import ParallelOp log = logging.getLogger(__name__) @@ -48,45 +49,44 @@ def __init__( self.rcut = rcut self.ntypes = ntypes self.one_type = one_type - sub_graph = tf.Graph() - - def builder(): - place_holders = {} - for ii in ["coord", "box"]: - place_holders[ii] = tf.placeholder( - GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii - ) - place_holders["type"] = tf.placeholder( - tf.int32, [None, None], name="t_type" - ) - place_holders["natoms_vec"] = tf.placeholder( - tf.int32, [self.ntypes + 2], name="t_natoms" - ) - place_holders["default_mesh"] = tf.placeholder( - tf.int32, [None], name="t_mesh" - ) - t_type = place_holders["type"] - t_natoms = place_holders["natoms_vec"] - if self.one_type: - # all types = 0, natoms_vec = [natoms, natoms, natoms] - t_type = tf.clip_by_value(t_type, -1, 0) - t_natoms = tf.tile(t_natoms[0:1], [3]) - - _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( - place_holders["coord"], - t_type, - t_natoms, - place_holders["box"], - place_holders["default_mesh"], - rcut=self.rcut, - ) - place_holders["dir"] = tf.placeholder(tf.string) - return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"]) - - with sub_graph.as_default(): - self.p = ParallelOp(builder, config=default_tf_session_config) - - self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) + # sub_graph = tf.Graph() + + # def builder(): + # place_holders = {} + # for ii in ["coord", "box"]: + # place_holders[ii] = tf.placeholder( + # GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii + # ) + # place_holders["type"] = tf.placeholder( + # tf.int32, [None, None], name="t_type" + # ) + # place_holders["natoms_vec"] = tf.placeholder( + # tf.int32, [self.ntypes + 2], name="t_natoms" + # ) + # place_holders["default_mesh"] = tf.placeholder( + # tf.int32, [None], name="t_mesh" + # ) + # t_type = place_holders["type"] + # t_natoms = place_holders["natoms_vec"] + # if self.one_type: + # # all types = 0, natoms_vec = [natoms, natoms, natoms] + # t_type = tf.clip_by_value(t_type, -1, 0) + # t_natoms = tf.tile(t_natoms[0:1], [3]) + # _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( # 这里只计算一次 + # place_holders["coord"], + # t_type, + # t_natoms, + # place_holders["box"], + # place_holders["default_mesh"], + # rcut=self.rcut, + # ) + # place_holders["dir"] = tf.placeholder(tf.string) + # return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"]) + + # with sub_graph.as_default(): + # self.p = ParallelOp(builder, config=default_tf_session_config) + + # self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms. @@ -108,44 +108,120 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: if not self.one_type: self.max_nbor_size *= self.ntypes - def feed(): - for ii in range(len(data.system_dirs)): - for jj in data.data_systems[ii].dirs: - data_set = data.data_systems[ii]._load_set(jj) - for kk in range(np.array(data_set["type"]).shape[0]): - yield { - "coord": np.array(data_set["coord"])[kk].reshape( - [-1, data.natoms[ii] * 3] - ), - "type": np.array(data_set["type"])[kk].reshape( - [-1, data.natoms[ii]] - ), - "natoms_vec": np.array(data.natoms_vec[ii]), - "box": np.array(data_set["box"])[kk].reshape([-1, 9]), - "default_mesh": np.array(data.default_mesh[ii]), - "dir": str(jj), - } - - for mn, dt, jj in self.p.generate(self.sub_sess, feed()): - if dt.size != 0: - dt = np.min(dt) - else: - dt = self.rcut - log.warning( - "Atoms with no neighbors found in %s. Please make sure it's what you expected." - % jj - ) - if dt < self.min_nbor_dist: - if math.isclose(dt, 0.0, rel_tol=1e-6): - # it's unexpected that the distance between two atoms is zero - # zero distance will cause nan (#874) - raise RuntimeError( - "Some atoms are overlapping in %s. Please check your" - " training data to remove duplicated atoms." % jj + # def feed(): + # for ii in range(len(data.system_dirs)): + # for jj in data.data_systems[ii].dirs: + # data_set = data.data_systems[ii]._load_set(jj) + # for kk in range(np.array(data_set["type"]).shape[0]): + # ret = { + # "coord": np.array(data_set["coord"])[kk].reshape( + # [-1, data.natoms[ii] * 3] + # ), # (1, 576) + # "type": np.array(data_set["type"])[kk].reshape( + # [-1, data.natoms[ii]] + # ), # (1, 192) + # "natoms_vec": np.array(data.natoms_vec[ii]), # (4,) + # "box": np.array(data_set["box"])[kk].reshape([-1, 9]), # (1, 9) + # "default_mesh": np.array(data.default_mesh[ii]), # (6,) + # "dir": str(jj), # ../data/data_0/set.xxx + # } + # print(str(jj)) + # print("coord", ret["coord"].shape, ret["coord"].dtype) + # print("type", ret["type"].shape, ret["type"].dtype) + # print("natoms_vec", ret["natoms_vec"].shape, ret["natoms_vec"].dtype) + # print("box", ret["box"].shape, ret["box"].dtype) + # print("default_mesh", ret["default_mesh"].shape, ret["default_mesh"].dtype) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/coord.npy", ret["coord"]) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/type.npy", ret["type"]) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/natoms_vec.npy", ret["natoms_vec"]) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/box.npy", ret["box"]) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/default_mesh.npy", ret["default_mesh"]) + # yield ret + import paddle + + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii]._load_set(jj) + for kk in range(np.array(data_set["type"]).shape[0]): + coord = np.array(data_set["coord"])[kk].reshape( + [-1, data.natoms[ii] * 3] ) - self.min_nbor_dist = dt - var = np.max(mn, axis=0) - self.max_nbor_size = np.maximum(var, self.max_nbor_size) + coord = paddle.to_tensor( + coord, dtype="float32", place="cpu" + ) # [1, 576] + + _type = np.array(data_set["type"])[kk].reshape( + [-1, data.natoms[ii]] + ) + _type = paddle.to_tensor( + _type, dtype="int32", place="cpu" + ) # [1, 192] + + natoms_vec = np.array(data.natoms_vec[ii]) + natoms_vec = paddle.to_tensor( + natoms_vec, dtype="int64", place="cpu" + ) # [4] + + box = np.array(data_set["box"])[kk].reshape([-1, 9]) + box = paddle.to_tensor(box, dtype="float32", place="cpu") # [1, 9] + + default_mesh = np.array(data.default_mesh[ii]) + default_mesh = paddle.to_tensor( + default_mesh, dtype="int32", place="cpu" + ) # [6] + + rcut = self.rcut + mn, dt = op_module.neighbor_stat( + coord, + _type, + natoms_vec, + box, + default_mesh, + rcut, + ) + if dt.size != 0: + dt = paddle.min(dt).item() + else: + dt = self.rcut + log.warning( + "Atoms with no neighbors found in %s. Please make sure it's what you expected." + % jj + ) + if dt < self.min_nbor_dist: + if math.isclose(dt, 0.0, rel_tol=1e-6): + # it's unexpected that the distance between two atoms is zero + # zero distance will cause nan (#874) + raise RuntimeError( + "Some atoms are overlapping in %s. Please check your" + " training data to remove duplicated atoms." % jj + ) + self.min_nbor_dist = dt + var = paddle.max(mn, axis=0).numpy() + self.max_nbor_size = np.maximum(var, self.max_nbor_size) + + # for mn, dt, jj in self.p.generate(self.sub_sess, feed()): # _max_nbor_size, _min_nbor_dist, dir + # # print(mn.shape, dt.shape, jj) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/max_nbor_size.npy", mn) + # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/min_nbor_dist.npy", dt) + # if dt.size != 0: + # dt = np.min(dt) + # else: + # dt = self.rcut + # log.warning( + # "Atoms with no neighbors found in %s. Please make sure it's what you expected." + # % jj + # ) + # if dt < self.min_nbor_dist: + # if math.isclose(dt, 0.0, rel_tol=1e-6): + # # it's unexpected that the distance between two atoms is zero + # # zero distance will cause nan (#874) + # raise RuntimeError( + # "Some atoms are overlapping in %s. Please check your" + # " training data to remove duplicated atoms." % jj + # ) + # self.min_nbor_dist = dt + # var = np.max(mn, axis=0) + # self.max_nbor_size = np.maximum(var, self.max_nbor_size) log.info("training data with min nbor dist: " + str(self.min_nbor_dist)) log.info("training data with max nbor size: " + str(self.max_nbor_size)) diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py index a718da0b26..8471964000 100644 --- a/deepmd/utils/network.py +++ b/deepmd/utils/network.py @@ -1,12 +1,11 @@ import numpy as np +from paddle import nn -from deepmd.common import ( - get_precision, -) -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - tf, -) +from deepmd.common import get_precision +from deepmd.env import GLOBAL_PD_FLOAT_PRECISION +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import paddle +from deepmd.env import tf def one_layer_rand_seed_shift(): @@ -296,3 +295,256 @@ def variable_summaries(var: tf.Variable, name: str): tf.summary.scalar("max", tf.reduce_max(var)) tf.summary.scalar("min", tf.reduce_min(var)) tf.summary.histogram("histogram", var) + + +class OneLayer(paddle.nn.Layer): + def __init__( + self, + in_features, + out_features, + activation_fn=paddle.nn.functional.tanh, + precision=GLOBAL_PD_FLOAT_PRECISION, + stddev=1.0, + bavg=0.0, + name="linear", + seed=None, + use_timestep=False, + trainable=True, + useBN=False, + ): + super(OneLayer, self).__init__(name) + self.out_features = out_features + self.activation_fn = activation_fn + self.use_timestep = use_timestep + self.useBN = useBN + self.seed = seed + paddle.seed(seed) + + self.weight = self.create_parameter( + shape=[in_features, out_features], + dtype=precision, + is_bias=False, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal( + std=stddev / np.sqrt(in_features + out_features) + ), + ) + # print(bavg, stddev) + self.bias = self.create_parameter( + shape=[out_features], + dtype=precision, + is_bias=True, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal( + mean=bavg if isinstance(bavg, float) else bavg[0], std=stddev + ), + ) + if self.activation_fn is not None and self.use_timestep: + self.idt = self.create_parameter( + shape=[out_features], + dtype=precision, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal(mean=0.1, std=0.001), + ) + + def forward(self, input): + hidden = paddle.matmul(input, self.weight) + self.bias + if self.activation_fn is not None: + if self.useBN: + None + # hidden_bn = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse) + # return activation_fn(hidden_bn) + else: + if self.use_timestep: + hidden = ( + paddle.reshape( + self.activation_fn(hidden), [-1, self.out_features] + ) + * self.idt + ) + else: + hidden = paddle.reshape( + self.activation_fn(hidden), [-1, self.out_features] + ) + return hidden + + +class EmbeddingNet(paddle.nn.Layer): + """Parameters + ---------- + xx : Tensor + Input tensor of shape [-1,1] + network_size: list of int + Size of the embedding network. For example [16,32,64] + precision: + Precision of network weights. For example, tf.float64 + activation_fn: + Activation function + resnet_dt: boolean + Using time-step in the ResNet construction + name_suffix: str + The name suffix append to each variable. + stddev: float + Standard deviation of initializing network parameters + bavg: float + Mean of network intial bias + seed: int + Random seed for initializing network parameters + trainable: boolean + If the netowk is trainable + """ + + def __init__( + self, + network_size, + precision, + activation_fn=paddle.nn.functional.tanh, + resnet_dt=False, + stddev=1.0, + bavg=0.0, + seed=42, + trainable=True, + name="", + ): + super().__init__(name) + self.name = name + self.outputs_size = [1] + network_size + self.activation_fn = activation_fn + self.resnet_dt = resnet_dt + self.seed = seed + paddle.seed(seed) + + outputs_size = self.outputs_size + weight = [] + bias = [] + idt = [] + for ii in range(1, len(outputs_size)): + weight.append( + self.create_parameter( + shape=[outputs_size[ii - 1], outputs_size[ii]], + dtype=precision, + is_bias=False, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal( + std=stddev / np.sqrt(outputs_size[ii] + outputs_size[ii - 1]) + ), + ) + ) + # print(outputs_size[ii-1], precision, False, trainable, outputs_size[ii]+outputs_size[ii-1]) + # exit() + bias.append( + self.create_parameter( + shape=[1, outputs_size[ii]], + dtype=precision, + is_bias=True, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal( + mean=bavg, std=stddev + ), + ) + ) + if resnet_dt: + idt.append( + self.create_parameter( + shape=[1, outputs_size[ii]], + dtype=precision, + attr=paddle.ParamAttr(trainable=trainable), + default_initializer=paddle.nn.initializer.Normal( + mean=0.1, std=0.001 + ), + ) + ) + + self.weight = paddle.nn.ParameterList(weight) + self.bias = paddle.nn.ParameterList(bias) + self.idt = paddle.nn.ParameterList(idt) + + def forward(self, xx): + # outputs_size = self.outputs_size + # print(self.outputs_size) + # for ii in range(1, len(outputs_size)): + # # if self.activation_fn is not None: + # hidden = paddle.reshape( + # self.activation_fn(paddle.matmul(xx, self.weight[ii-1]) + self.bias[ii-1]), + # [-1, outputs_size[ii]] + # ) + # # print(__file__, 1) + # # else: + # # hidden = paddle.reshape( + # # paddle.matmul(xx, self.weight[ii-1]) + self.bias[ii-1], + # # [-1, outputs_size[ii]] + # # ) + # # print(__file__, 2) + + # if outputs_size[ii] == outputs_size[ii - 1]: + # if self.resnet_dt: + # xx += hidden * self.idt[ii] + # # print(__file__, 3) + # else: + # xx += hidden + # # print(__file__, 4) + # elif outputs_size[ii] == outputs_size[ii-1] * 2: + # if self.resnet_dt: + # xx = paddle.concat([xx,xx], axis=1) + hidden * self.idt[ii] + # # print(__file__, 5) + # else: + # xx = paddle.concat([xx,xx], axis=1) + hidden + # # print(__file__, 6) + # else: + # # print(__file__, 7) + # xx = hidden + # # exit() + + # return xx + # if not hasattr(self, "xx1"): + # self.xx1 = xx + # paddle.save(self.xx1.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_xx1.npy") + # paddle.save(self.weight[0].numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_weight_0.npy") + # paddle.save(self.bias[0].numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_bias_0.npy") + + hidden = nn.functional.tanh( + nn.functional.linear(xx, self.weight[0], self.bias[0]) + ).reshape( + [-1, 25] + ) # 1 + xx = hidden # 7 + + # if not hasattr(self, "hidden1"): + # self.hidden1 = hidden + # paddle.save(self.hidden1.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_hidden1.npy") + + # if not hasattr(self, "xx2"): + # self.xx2 = xx + # paddle.save(self.xx2.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_xx2.npy") + + hidden = nn.functional.tanh( + nn.functional.linear(xx, self.weight[1], self.bias[1]) + ).reshape( + [-1, 50] + ) # 1 + xx = paddle.concat([xx, xx], axis=1) + hidden # 6 + + # if not hasattr(self, "hidden2"): + # self.hidden2 = hidden + # paddle.save(self.hidden2.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_hidden2.npy") + + # if not hasattr(self, "xx3"): + # self.xx3 = xx + # paddle.save(self.xx3.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_xx3.npy") + + hidden = nn.functional.tanh( + nn.functional.linear(xx, self.weight[2], self.bias[2]) + ).reshape( + [-1, 100] + ) # 1 + xx = paddle.concat([xx, xx], axis=1) + hidden # 6 + + # if not hasattr(self, "hidden3"): + # self.hidden3 = hidden + # paddle.save(self.hidden3.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_hidden3.npy") + + # if not hasattr(self, "xx4"): + # self.xx4 = xx + # paddle.save(self.xx4.numpy(), f"/workspace/hesensen/deepmd_backend/debug_emb/{self.name}_xx4.npy") + + return xx diff --git a/deepmd/utils/type_embed.py b/deepmd/utils/type_embed.py index 7a3e0925b8..807c3db431 100644 --- a/deepmd/utils/type_embed.py +++ b/deepmd/utils/type_embed.py @@ -1,22 +1,13 @@ -from typing import ( - List, - Optional, - Union, -) +from typing import List +from typing import Optional +from typing import Union -from deepmd.common import ( - get_activation_func, - get_precision, -) -from deepmd.env import ( - tf, -) -from deepmd.utils.graph import ( - get_type_embedding_net_variables_from_graph_def, -) -from deepmd.utils.network import ( - embedding_net, -) +from deepmd.common import get_activation_func +from deepmd.common import get_precision +from deepmd.env import paddle +from deepmd.env import tf +from deepmd.utils.graph import get_type_embedding_net_variables_from_graph_def +from deepmd.utils.network import embedding_net def embed_atom_type( @@ -47,15 +38,15 @@ def embed_atom_type( The embedded type of each atom. It has the shape of [numb_atoms, embedding_dim] """ - te_out_dim = type_embedding.get_shape().as_list()[-1] + te_out_dim = type_embedding.shape[-1] atype = [] for ii in range(ntypes): - atype.append(tf.tile([ii], [natoms[2 + ii]])) - atype = tf.concat(atype, axis=0) - atm_embed = tf.nn.embedding_lookup( - type_embedding, tf.cast(atype, dtype=tf.int32) + atype.append(paddle.tile([ii], [natoms[2 + ii]])) + atype = paddle.concat(atype, axis=0) + atm_embed = paddle.nn.functional.embedding( + paddle.cast(atype, dtype=paddle.int32), type_embedding ) # (nf*natom)*nchnl - atm_embed = tf.reshape(atm_embed, [-1, te_out_dim]) + atm_embed = paddle.reshape(atm_embed, [-1, te_out_dim]) return atm_embed diff --git a/source/lib/paddle_src/neighbor_stat.cu b/source/lib/paddle_src/neighbor_stat.cu new file mode 100644 index 0000000000..6754f3efc9 --- /dev/null +++ b/source/lib/paddle_src/neighbor_stat.cu @@ -0,0 +1,217 @@ +// #include +// #include +// #include +#include +#include +#include +#include "paddle/extension.h" + +#include "device.h" +#include "prod_virial.h" +#include "gpu_cuda.h" + +#include "paddle/extension.h" +#include "errors.h" +#include "neighbor_list.h" +#include "device.h" + +#undef PADDLE_WITH_CUDA +// #define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +#define CHECK_INPUT_CPU(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") +// #define CHECK_INPUT_READY(x) PD_CHECK(x.IsInitialized(), #x " must be initialized before usage.") +typedef double boxtensor_t; +typedef double compute_t; + +std::vector NeighborStatOpCPUForward( + const paddle::Tensor& coord_tensor, + const paddle::Tensor& type_tensor, + const paddle::Tensor& natoms_tensor, + const paddle::Tensor& box_tensor, + const paddle::Tensor& mesh_tensor, + float rcut +) { + CHECK_INPUT_CPU(coord_tensor); + CHECK_INPUT_CPU(type_tensor); + CHECK_INPUT_CPU(natoms_tensor); + CHECK_INPUT_CPU(box_tensor); + CHECK_INPUT_CPU(mesh_tensor); + + CHECK_INPUT_DIM(coord_tensor, 2); + CHECK_INPUT_DIM(type_tensor, 2); + CHECK_INPUT_DIM(natoms_tensor, 1); + CHECK_INPUT_DIM(box_tensor, 2); + CHECK_INPUT_DIM(mesh_tensor, 1); + PD_CHECK(natoms_tensor.shape()[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + + const int64_t* natoms = natoms_tensor.data(); + int64_t nloc = natoms[0]; + int64_t nall = natoms[1]; + int64_t nsamples = coord_tensor.shape()[0]; + int64_t ntypes = natoms_tensor.shape()[0] - 2; + + PD_CHECK(nsamples == type_tensor.shape()[0], "number of samples should match"); + PD_CHECK(nsamples == box_tensor.shape()[0], "number of samples should match"); + PD_CHECK(nall * 3 == coord_tensor.shape()[1], "number of atoms should match"); + PD_CHECK(nall == type_tensor.shape()[1], "number of atoms should match"); + PD_CHECK(9 == box_tensor.shape()[1], "number of box should be 9"); + + // std::cout << "1" << std::endl; + int nei_mode = 0; + if (mesh_tensor.shape()[0] == 6) { + // manual copied pbc + assert(nloc == nall); + nei_mode = 1; + } else if (mesh_tensor.shape()[0] == 0) { + // no pbc + nei_mode = -1; + } else { + throw deepmd::deepmd_exception("invalid mesh tensor"); + } + // if region is given extended, do not use pbc + bool b_pbc = (nei_mode >= 1 || nei_mode == -1) ? false : true; + bool b_norm_atom = (nei_mode == 1) ? true : false; + + std::vector max_nbor_size_shape = {nloc, ntypes}; + paddle::Tensor max_nbor_size_tensor = paddle::zeros( + max_nbor_size_shape, + type_tensor.dtype(), + type_tensor.place() + ); + // std::cout << "2" << std::endl; + + const auto* coord = coord_tensor.data(); + // std::cout << "3" << std::endl; + const auto* type = type_tensor.data(); + // std::cout << "4" << std::endl; + const auto* box = box_tensor.data(); + // std::cout << "5" << std::endl; + const auto* mesh = mesh_tensor.data(); + // std::cout << "6" << std::endl; + auto *max_nbor_size = max_nbor_size_tensor.mutable_data(); + // std::cout << "7" << std::endl; + + boxtensor_t boxt[9] = {0}; + for (int dd = 0; dd < 9; ++dd) { + boxt[dd] = box[dd]; + } + SimulationRegion region; + region.reinitBox(boxt); + // set & normalize coord + std::vector d_coord3(nall * 3); + for (int ii = 0; ii < nall; ++ii) { + for (int dd = 0; dd < 3; ++dd) { + d_coord3[ii * 3 + dd] = coord[ii * 3 + dd]; + } + if (b_norm_atom) { + compute_t inter[3]; + region.phys2Inter(inter, &d_coord3[3 * ii]); + for (int dd = 0; dd < 3; ++dd) { + if (inter[dd] < 0) + inter[dd] += 1.; + else if (inter[dd] >= 1) + inter[dd] -= 1.; + } + region.inter2Phys(&d_coord3[3 * ii], inter); + } + } + + // set type + std::vector d_type(nall); + for (int ii = 0; ii < nall; ++ii) d_type[ii] = type[ii]; + + // build nlist + std::vector > d_nlist_a; + std::vector > d_nlist_r; + std::vector nlist_map; + bool b_nlist_map = false; + + if (nei_mode == 1) { + // std::cout << "I'm in nei_mode 1" << std::endl; + std::vector bk_d_coord3 = d_coord3; + std::vector bk_d_type = d_type; + std::vector ncell, ngcell; + copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, + bk_d_type, rcut, region); + b_nlist_map = true; + std::vector nat_stt(3, 0); + std::vector ext_stt(3), ext_end(3); + for (int dd = 0; dd < 3; ++dd) { + ext_stt[dd] = -ngcell[dd]; + ext_end[dd] = ncell[dd] + ngcell[dd]; + } + ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, -1, rcut, nat_stt, + ncell, ext_stt, ext_end, region, ncell); + } else if (nei_mode == -1) { + ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL); + } else { + throw deepmd::deepmd_exception("unknow neighbor mode"); + } + + int MAX_NNEI = 0; + for (int ii = 0; ii < nloc; ii++) { + MAX_NNEI = MAX_NNEI < d_nlist_r[ii].size() ? d_nlist_r[ii].size() : MAX_NNEI; + } + + // allocate output tensor for deepmd-kit + std::vector min_nbor_dist_shape = {nloc * MAX_NNEI}; + paddle::Tensor min_nbor_dist_tensor = paddle::full( + min_nbor_dist_shape, + 10000.0, + coord_tensor.dtype(), + coord_tensor.place() + ); + auto* min_nbor_dist = min_nbor_dist_tensor.mutable_data(); + +#pragma omp parallel for + for (int ii = 0; ii < nloc; ii++) { + if (d_type[ii] < 0) continue; // virtual atom + for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { + int type = d_type[d_nlist_r[ii][jj]]; + if (type < 0) continue; // virtual atom + max_nbor_size[ii * ntypes + type] += 1; + compute_t rij[3] = { + d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], + d_coord3[d_nlist_r[ii][jj] * 3 + 1] - d_coord3[ii * 3 + 1], + d_coord3[d_nlist_r[ii][jj] * 3 + 2] - d_coord3[ii * 3 + 2]}; + min_nbor_dist[ii * MAX_NNEI + jj] = + sqrt(rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]); + } + } + return {max_nbor_size_tensor, min_nbor_dist_tensor}; +} + + +std::vector NeighborStatForward( + const paddle::Tensor& coord_tensor, /*float32*/ + const paddle::Tensor& type_tensor, /*int32*/ + const paddle::Tensor& natoms_tensor, /*int64*/ + const paddle::Tensor& box_tensor, /*float32*/ + const paddle::Tensor& mesh_tensor, /*int32*/ + float rcut +) { + if (coord_tensor.is_cpu()) { + // std::cout << coord_tensor.dtype() << std::endl; + // std::cout << type_tensor.dtype() << std::endl; + // std::cout << natoms_tensor.dtype() << std::endl; + // std::cout << box_tensor.dtype() << std::endl; + // std::cout << mesh_tensor.dtype() << std::endl; + return NeighborStatOpCPUForward( + coord_tensor, + type_tensor, + natoms_tensor, + box_tensor, + mesh_tensor, + rcut + ); + } else { + PD_THROW("Unsupported device type for forward function of custom relu operator."); + } +} + + +PD_BUILD_OP(neighbor_stat) + .Inputs({"coord", "type", "natoms", "box", "mesh"}) + .Outputs({"max_nbor_size", "min_nbor_dist"}) + .Attrs({"rcut: float"}) + .SetKernelFn(PD_KERNEL(NeighborStatForward)); diff --git a/source/lib/paddle_src/prod_env_mat.cc b/source/lib/paddle_src/prod_env_mat.cc new file mode 100644 index 0000000000..7ebfd6cdc7 --- /dev/null +++ b/source/lib/paddle_src/prod_env_mat.cc @@ -0,0 +1,321 @@ +#include "prod_env_mat.h" + +#include + +#include +#include + +#include "env_mat.h" +#include "fmt_nlist.h" + +using namespace deepmd; + +template +void deepmd::prod_env_mat_a_cpu(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type) { + if (f_type == NULL) { + f_type = type; + } + const int nnei = sec.back(); + const int nem = nnei * 4; + + // set & normalize coord + std::vector d_coord3(nall * 3); + for (int ii = 0; ii < nall; ++ii) { + for (int dd = 0; dd < 3; ++dd) { + d_coord3[ii * 3 + dd] = coord[ii * 3 + dd]; + } + } + + // set type + std::vector d_f_type(nall); + for (int ii = 0; ii < nall; ++ii) { + d_f_type[ii] = f_type[ii]; + } + + // build nlist + std::vector > d_nlist_a(nloc); + + assert(nloc == inlist.inum); + for (unsigned ii = 0; ii < nloc; ++ii) { + d_nlist_a[ii].reserve(max_nbor_size); + } + for (unsigned ii = 0; ii < nloc; ++ii) { + int i_idx = inlist.ilist[ii]; + for (unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj) { + int j_idx = inlist.firstneigh[ii][jj]; + d_nlist_a[i_idx].push_back(j_idx); + } + } + +#pragma omp parallel for + for (int ii = 0; ii < nloc; ++ii) { + std::vector fmt_nlist_a; + int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_f_type, ii, + d_nlist_a[ii], rcut, sec); + std::vector d_em_a; + std::vector d_em_a_deriv; + std::vector d_em_r; + std::vector d_em_r_deriv; + std::vector d_rij_a; + env_mat_a_cpu(d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_f_type, ii, + fmt_nlist_a, sec, rcut_smth, rcut); + + // check sizes + assert(d_em_a.size() == nem); + assert(d_em_a_deriv.size() == nem * 3); + assert(d_rij_a.size() == nnei * 3); + assert(fmt_nlist_a.size() == nnei); + // record outputs + for (int jj = 0; jj < nem; ++jj) { + if (type[ii] >= 0) { + em[ii * nem + jj] = + (d_em_a[jj] - avg[type[ii] * nem + jj]) / std[type[ii] * nem + jj]; + } else { + em[ii * nem + jj] = 0; + } + } + for (int jj = 0; jj < nem * 3; ++jj) { + if (type[ii] >= 0) { + em_deriv[ii * nem * 3 + jj] = + d_em_a_deriv[jj] / std[type[ii] * nem + jj / 3]; + } else { + em_deriv[ii * nem * 3 + jj] = 0; + } + } + for (int jj = 0; jj < nnei * 3; ++jj) { + rij[ii * nnei * 3 + jj] = d_rij_a[jj]; + } + for (int jj = 0; jj < nnei; ++jj) { + nlist[ii * nnei + jj] = fmt_nlist_a[jj]; + } + } +} + +template +void deepmd::prod_env_mat_r_cpu(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec) { + const int nnei = sec.back(); + const int nem = nnei * 1; + + // set & normalize coord + std::vector d_coord3(nall * 3); + for (int ii = 0; ii < nall; ++ii) { + for (int dd = 0; dd < 3; ++dd) { + d_coord3[ii * 3 + dd] = coord[ii * 3 + dd]; + } + } + + // set type + std::vector d_type(nall); + for (int ii = 0; ii < nall; ++ii) { + d_type[ii] = type[ii]; + } + + // build nlist + std::vector > d_nlist_a(nloc); + + assert(nloc == inlist.inum); + for (unsigned ii = 0; ii < nloc; ++ii) { + d_nlist_a[ii].reserve(max_nbor_size); + } + for (unsigned ii = 0; ii < nloc; ++ii) { + int i_idx = inlist.ilist[ii]; + for (unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj) { + int j_idx = inlist.firstneigh[ii][jj]; + d_nlist_a[i_idx].push_back(j_idx); + } + } + +#pragma omp parallel for + for (int ii = 0; ii < nloc; ++ii) { + std::vector fmt_nlist_a; + int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_type, ii, + d_nlist_a[ii], rcut, sec); + std::vector d_em_a; + std::vector d_em_a_deriv; + std::vector d_em_r; + std::vector d_em_r_deriv; + std::vector d_rij_a; + env_mat_r_cpu(d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_type, ii, + fmt_nlist_a, sec, rcut_smth, rcut); + + // check sizes + assert(d_em_a.size() == nem); + assert(d_em_a_deriv.size() == nem * 3); + assert(d_rij_a.size() == nnei * 3); + assert(fmt_nlist_a.size() == nnei); + // record outputs + for (int jj = 0; jj < nem; ++jj) { + em[ii * nem + jj] = (d_em_a[jj] - avg[d_type[ii] * nem + jj]) / + std[d_type[ii] * nem + jj]; + } + for (int jj = 0; jj < nem * 3; ++jj) { + em_deriv[ii * nem * 3 + jj] = + d_em_a_deriv[jj] / std[d_type[ii] * nem + jj / 3]; + } + for (int jj = 0; jj < nnei * 3; ++jj) { + rij[ii * nnei * 3 + jj] = d_rij_a[jj]; + } + for (int jj = 0; jj < nnei; ++jj) { + nlist[ii * nnei + jj] = fmt_nlist_a[jj]; + } + } +} + +template void deepmd::prod_env_mat_a_cpu(double *em, + double *em_deriv, + double *rij, + int *nlist, + const double *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const double *avg, + const double *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type); + +template void deepmd::prod_env_mat_a_cpu(float *em, + float *em_deriv, + float *rij, + int *nlist, + const float *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const float *avg, + const float *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type); + +template void deepmd::prod_env_mat_r_cpu(double *em, + double *em_deriv, + double *rij, + int *nlist, + const double *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const double *avg, + const double *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec); + +template void deepmd::prod_env_mat_r_cpu(float *em, + float *em_deriv, + float *rij, + int *nlist, + const float *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const float *avg, + const float *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec); + +// #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +void deepmd::env_mat_nbor_update(InputNlist &inlist, + InputNlist &gpu_inlist, + int &max_nbor_size, + int *&nbor_list_dev, + const int *mesh, + const int size) { + int *mesh_host = new int[size]; + memcpy_device_to_host(mesh, mesh_host, size); + memcpy(&inlist.ilist, 4 + mesh_host, sizeof(int *)); + memcpy(&inlist.numneigh, 8 + mesh_host, sizeof(int *)); + memcpy(&inlist.firstneigh, 12 + mesh_host, sizeof(int **)); + const int ago = mesh_host[0]; + if (ago == 0 || gpu_inlist.inum < inlist.inum) { + const int inum = inlist.inum; + if (gpu_inlist.inum < inum) { + delete_device_memory(gpu_inlist.ilist); + delete_device_memory(gpu_inlist.numneigh); + delete_device_memory(gpu_inlist.firstneigh); + malloc_device_memory(gpu_inlist.ilist, inum); + malloc_device_memory(gpu_inlist.numneigh, inum); + malloc_device_memory(gpu_inlist.firstneigh, inum); + } + memcpy_host_to_device(gpu_inlist.ilist, inlist.ilist, inum); + memcpy_host_to_device(gpu_inlist.numneigh, inlist.numneigh, inum); + int _max_nbor_size = max_numneigh(inlist); + if (_max_nbor_size <= 256) { + _max_nbor_size = 256; + } else if (_max_nbor_size <= 512) { + _max_nbor_size = 512; + } else if (_max_nbor_size <= 1024) { + _max_nbor_size = 1024; + } else if (_max_nbor_size <= 2048) { + _max_nbor_size = 2048; + } else { + _max_nbor_size = 4096; + } + if (nbor_list_dev == NULL || _max_nbor_size > max_nbor_size || + inum > gpu_inlist.inum) { + delete_device_memory(nbor_list_dev); + malloc_device_memory(nbor_list_dev, inum * _max_nbor_size); + } + // update info + gpu_inlist.inum = inum; + max_nbor_size = _max_nbor_size; + + // copy nbor list from host to the device + std::vector nbor_list_host(inum * max_nbor_size, 0); + int **_firstneigh = (int **)malloc(sizeof(int *) * inum); + for (int ii = 0; ii < inum; ii++) { + _firstneigh[ii] = nbor_list_dev + ii * max_nbor_size; + for (int jj = 0; jj < inlist.numneigh[ii]; jj++) { + nbor_list_host[ii * max_nbor_size + jj] = inlist.firstneigh[ii][jj]; + } + } + memcpy_host_to_device(nbor_list_dev, &nbor_list_host[0], + inum * max_nbor_size); + memcpy_host_to_device(gpu_inlist.firstneigh, _firstneigh, inum); + free(_firstneigh); + } + delete[] mesh_host; +} +// #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/paddle_src/prod_env_mat.cu b/source/lib/paddle_src/prod_env_mat.cu new file mode 100644 index 0000000000..81270a0c81 --- /dev/null +++ b/source/lib/paddle_src/prod_env_mat.cu @@ -0,0 +1,1324 @@ +#include +#include +#include +#include "paddle/extension.h" + +#define GOOGLE_CUDA 1 + +#include +#include "utilities.h" +#include "coord.h" +#include "fmt_nlist.h" +#include "region.h" +#include "neighbor_list.h" +#include "prod_env_mat.h" +#include "gpu_cuda.h" +#include + +typedef long long int_64; + +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") +// #define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") + +__device__ inline double _sqrt(double x) { return sqrt(x); } +__device__ inline float _sqrt(float x) { return sqrtf(x); } +__device__ inline double _rsqrt(double x) { return rsqrt(x); } +__device__ inline float _rsqrt(float x) { return rsqrtf(x); } + +template +static int +_norm_copy_coord_gpu( + std::vector* tensor_list, + FPTYPE *&coord_cpy, + int *&type_cpy, + int *&idx_mapping, + int &nall, + int &mem_cpy, + const FPTYPE *coord, + const FPTYPE *box, + const int *type, + const int &nloc, + const int &max_cpy_trial, + const float &rcut_r); + +template +static int +_build_nlist_gpu( + std::vector *tensor_list, + int *&ilist, + int *&numneigh, + int **&firstneigh, + int *&jlist, + int &max_nnei, + int &mem_nnei, + const FPTYPE *coord, + const int &nloc, + const int &new_nall, + const int &max_nnei_trial, + const float &rcut_r); + +static void +_map_nlist_gpu( + int *nlist, + const int *idx_mapping, + const int &nloc, + const int &nnei); + +template +static void +_prepare_coord_nlist_gpu( + std::vector *tensor_list, + FPTYPE const **coord, + FPTYPE *&coord_cpy, + int const **type, + int *&type_cpy, + int *&idx_mapping, + deepmd::InputNlist &inlist, + int *&ilist, + int *&numneigh, + int **&firstneigh, + int *&jlist, + int *&nbor_list_dev, + int &new_nall, + int &mem_cpy, + int &mem_nnei, + int &max_nbor_size, + const FPTYPE *box, + const int *mesh_tensor_data, + const int mesh_tensor_size, + const int &nloc, + const int &nei_mode, + const float &rcut_r, + const int &max_cpy_trial, + const int &max_nnei_trial); + +template +__device__ inline uint_64 encoding_nbor_info(const int type, + const FPTYPE dist, + const int index) { + // nbor info checking: + // the type of nbor atom must be smaller than 128 + // the distance of center atom between nbor atom must be smaller than 128 + // the index of nbor atom(including ghost region) must be smaller than + // 16777216(1 << 24) + if (type >= 128 || dist >= (FPTYPE)128.0 || index >= (1 << 24)) { + asm("trap;"); + } + return ((uint_64)type << 57) + + (uint_64)((double)dist * ((uint_64)1 << 50)) / (1 << 24) * (1 << 24) + + index; +} + +__device__ inline void decoding_nbor_info(int& type, + int& index, + const uint_64 key) { + type = key >> 57; + index = key & 0xFFFFFF; +} + +template +__global__ void get_i_idx(FPTYPE* i_idx, const int nloc, const FPTYPE* ilist) { + const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= nloc) { + return; + } + i_idx[ilist[idx]] = idx; +} + +// common part of prod_env_mat +template +__launch_bounds__(BLOCK_THREADS) __global__ + void BlockSortKernel(Key* d_in, + Key* d_out) // Tile of output +{ + enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; + // Specialize BlockLoad type for our thread block (uses warp-striped loads for + // coalescing, then transposes in shared memory to a blocked arrangement) + typedef cub::BlockLoad + BlockLoadT; + // Specialize BlockRadixSort type for our thread block + typedef cub::BlockRadixSort + BlockRadixSortT; + // Shared memory + __shared__ union TempStorage { + typename BlockLoadT::TempStorage load; + typename BlockRadixSortT::TempStorage sort; + } temp_storage; + // Per-thread tile items + Key items[ITEMS_PER_THREAD]; + // Our current block's offset + int_64 block_offset = (int_64)blockIdx.x * TILE_SIZE; + // Load items into a blocked arrangement + BlockLoadT(temp_storage.load).Load(d_in + block_offset, items); + // Barrier for smem reuse + __syncthreads(); + // Sort keys + BlockRadixSortT(temp_storage.sort).SortBlockedToStriped(items); + // Store output in striped fashion + cub::StoreDirectStriped(threadIdx.x, d_out + block_offset, + items); +} + + +template +__device__ inline FPTYPE dev_dot(FPTYPE* arr1, FPTYPE* arr2) { + return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2]; +} + +template +__device__ inline void spline5_switch( + FPTYPE& vv, FPTYPE& dd, FPTYPE& xx, const float& rmin, const float& rmax) { + if (xx < rmin) { + dd = (FPTYPE)0.; + vv = (FPTYPE)1.; + } else if (xx < rmax) { + FPTYPE uu = (xx - rmin) / (rmax - rmin); + FPTYPE du = (FPTYPE)1. / (rmax - rmin); + vv = uu * uu * uu * + ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) + + (FPTYPE)1.; + dd = ((FPTYPE)3. * uu * uu * + ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) + + uu * uu * uu * ((FPTYPE)-12. * uu + (FPTYPE)15.)) * + du; + } else { + dd = (FPTYPE)0.; + vv = (FPTYPE)0.; + } +} + +template +__global__ void format_nlist_fill_a(uint_64* key, + const FPTYPE* coord, + const int* type, + const int* numneigh, + int** firstneigh, + const float rcut, + int* i_idx, + const int MAX_NBOR_SIZE) { + // <<>> + const int_64 idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y; + + const int nsize = numneigh[i_idx[idx]]; + if (idy >= nsize) { + return; + } + + const int* nei_idx = firstneigh[i_idx[idx]]; + // dev_copy(nei_idx, &jlist[jrange[i_idx]], nsize); + uint_64* key_in = key + idx * MAX_NBOR_SIZE; + FPTYPE diff[3]; + const int& j_idx = nei_idx[idy]; + if (type[j_idx] < 0) return; + for (int dd = 0; dd < 3; dd++) { + diff[dd] = coord[j_idx * 3 + dd] - coord[idx * 3 + dd]; + } + FPTYPE rr = _sqrt(dev_dot(diff, diff)); + if (rr <= rcut) { + key_in[idy] = encoding_nbor_info(type[j_idx], rr, j_idx); + } +} + +template +__global__ void fill_nei_iter(int* nei_iter_dev, + const FPTYPE* key, + const int nloc, + const int max_nbor_size, + const int sec_size) { + int_64 row = blockIdx.x; + int col = blockIdx.y * blockDim.x + threadIdx.x; + const FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size; + int nei_type_cur = -1, nbor_idx_cur = 0; + int nei_type_pre = -1, nbor_idx_pre = 0; + if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]) { + if (col >= 1) + decoding_nbor_info(nei_type_pre, nbor_idx_pre, key_out[col - 1]); + decoding_nbor_info(nei_type_cur, nbor_idx_cur, key_out[col]); + } + if (nei_type_cur != nei_type_pre) { + nei_iter_dev[row * sec_size + nei_type_cur] = col; + } +} + +template +__global__ void format_nlist_fill_b(int* nlist, + const int nlist_size, + const int nloc, + FPTYPE* key, + const int* sec, + const int sec_size, + int* nei_iter_dev, + const int max_nbor_size) { + int_64 row = blockIdx.x; + int col = blockIdx.y * blockDim.x + threadIdx.x; + int* nei_iter = nei_iter_dev + row * sec_size; + FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size; + int* row_nlist = nlist + row * nlist_size; + if (col < max_nbor_size) { + if (key_out[col] != key_out[max_nbor_size - 1]) { + int nei_type = 0, nbor_idx = 0; + decoding_nbor_info(nei_type, nbor_idx, key_out[col]); + int out_indx = col - nei_iter[nei_type] + sec[nei_type]; + if (out_indx < sec[nei_type + 1]) { + row_nlist[out_indx] = nbor_idx; + } + } + } +} + +template +__global__ void encoding_decoding_nbor_info(uint_64* key, + int* out_type, + int* out_index, + const int* in_type, + const FPTYPE* in_dist, + const int* in_index, + const int size_of_array) { + const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size_of_array) { + return; + } + + key[idx] = encoding_nbor_info(in_type[idx], in_dist[idx], in_index[idx]); + decoding_nbor_info(out_type[idx], out_index[idx], key[idx]); +} + +template +void format_nbor_list_256(uint_64* key, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + const int& nloc, + const float& rcut, + int* i_idx) { + const int LEN = 256; + const int MAX_NBOR_SIZE = 256; + const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(1, LEN); + format_nlist_fill_a<<>>( + key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, + MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + const int ITEMS_PER_THREAD = 4; + const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD; + // BlockSortKernel<<>> ( + BlockSortKernel + <<>>(key, key + nloc * MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void format_nbor_list_512(uint_64* key, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + const int& nloc, + const float& rcut, + int* i_idx) { + const int LEN = 256; + const int MAX_NBOR_SIZE = 512; + const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(1, LEN); + format_nlist_fill_a<<>>( + key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, + MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + const int ITEMS_PER_THREAD = 4; + const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD; + // BlockSortKernel<<>> ( + BlockSortKernel + <<>>(key, key + nloc * MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void format_nbor_list_1024(uint_64* key, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + const int& nloc, + const float& rcut, + int* i_idx) { + const int LEN = 256; + const int MAX_NBOR_SIZE = 1024; + const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(1, LEN); + format_nlist_fill_a<<>>( + key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, + MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + const int ITEMS_PER_THREAD = 8; + const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD; + // BlockSortKernel<<>> ( + BlockSortKernel + <<>>(key, key + nloc * MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void format_nbor_list_2048(uint_64* key, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + const int& nloc, + const float& rcut, + int* i_idx) { + const int LEN = 256; + const int MAX_NBOR_SIZE = 2048; + const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(1, LEN); + format_nlist_fill_a<<>>( + key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, + MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + const int ITEMS_PER_THREAD = 8; + const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD; + // BlockSortKernel<<>> ( + BlockSortKernel + <<>>(key, key + nloc * MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void format_nbor_list_4096(uint_64* key, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + const int& nloc, + const float& rcut, + int* i_idx) { + const int LEN = 256; + const int MAX_NBOR_SIZE = 4096; + const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(1, LEN); + format_nlist_fill_a<<>>( + key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, + MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + const int ITEMS_PER_THREAD = 16; + const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD; + // BlockSortKernel<<>> ( + BlockSortKernel + <<>>(key, key + nloc * MAX_NBOR_SIZE); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + + +template +__global__ void compute_env_mat_a(FPTYPE* em, + FPTYPE* em_deriv, + FPTYPE* rij, + const FPTYPE* coord, + const FPTYPE* avg, + const FPTYPE* std, + const int* type, + const int* nlist, + const int nnei, + const float rmin, + const float rmax) { + // <<>> + const int_64 bid = blockIdx.x; + const unsigned int tid = threadIdx.x; + if (type[bid] < 0) return; + if (tid >= nnei) { + return; + } + const int ndescrpt = nnei * 4; + const int* row_nlist = nlist + bid * nnei; + FPTYPE* row_rij = rij + bid * nnei * 3; + FPTYPE* row_descript = em + bid * nnei * 4; + FPTYPE* row_descript_deriv = em_deriv + bid * nnei * 12; + for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) { + const int idx_value = ii * 4; // 4 components + const int idx_deriv = ii * 12; // 4 components time 3 directions + if (row_nlist[ii] >= 0) { + FPTYPE rr[3] = {0}; + FPTYPE dd[4] = {0}; + FPTYPE vv[12] = {0}; + const int j_idx = row_nlist[ii]; + for (int kk = 0; kk < 3; kk++) { + rr[kk] = coord[j_idx * 3 + kk] - coord[bid * 3 + kk]; + row_rij[ii * 3 + kk] = rr[kk]; + } + // const FPTYPE * rr = &row_rij[ii * 3]; + FPTYPE nr2 = dev_dot(rr, rr); + FPTYPE inr = _rsqrt(nr2); + FPTYPE nr = nr2 * inr; + FPTYPE inr2 = inr * inr; + FPTYPE inr4 = inr2 * inr2; + FPTYPE inr3 = inr4 * nr; + FPTYPE sw, dsw; + spline5_switch(sw, dsw, nr, rmin, rmax); + dd[0] = ((FPTYPE)1. / nr); //* sw; + dd[1] = (rr[0] / nr2); //* sw; + dd[2] = (rr[1] / nr2); //* sw; + dd[3] = (rr[2] / nr2); //* sw; + vv[0] = (rr[0] * inr3 * sw - + dd[0] * dsw * rr[0] * + inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3]; + vv[1] = (rr[1] * inr3 * sw - + dd[0] * dsw * rr[1] * + inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3]; + vv[2] = (rr[2] * inr3 * sw - + dd[0] * dsw * rr[2] * + inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3]; + // ****deriv of component x/r2 + vv[3] = (((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw - + dd[1] * dsw * rr[0] * + inr); // avg[type[(idx_deriv + 3) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 3) % (ndescrpt * 3)) / 3]; + vv[4] = (((FPTYPE)2. * rr[0] * rr[1] * inr4) * sw - + dd[1] * dsw * rr[1] * + inr); // avg[type[(idx_deriv + 4) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 4) % (ndescrpt * 3)) / 3]; + vv[5] = (((FPTYPE)2. * rr[0] * rr[2] * inr4) * sw - + dd[1] * dsw * rr[2] * + inr); // avg[type[(idx_deriv + 5) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 5) % (ndescrpt * 3)) / 3]; + // ***deriv of component y/r2 + vv[6] = (((FPTYPE)2. * rr[1] * rr[0] * inr4) * sw - + dd[2] * dsw * rr[0] * + inr); // avg[type[(idx_deriv + 6) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 6) % (ndescrpt * 3)) / 3]; + vv[7] = (((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw - + dd[2] * dsw * rr[1] * + inr); // avg[type[(idx_deriv + 7) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 7) % (ndescrpt * 3)) / 3]; + vv[8] = (((FPTYPE)2. * rr[1] * rr[2] * inr4) * sw - + dd[2] * dsw * rr[2] * + inr); // avg[type[(idx_deriv + 8) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 8) % (ndescrpt * 3)) / 3]; + // ***deriv of component z/r2 + vv[9] = (((FPTYPE)2. * rr[2] * rr[0] * inr4) * sw - + dd[3] * dsw * rr[0] * + inr); // avg[type[(idx_deriv + 9) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 9) % (ndescrpt * 3)) / 3]; + vv[10] = + (((FPTYPE)2. * rr[2] * rr[1] * inr4) * sw - + dd[3] * dsw * rr[1] * + inr); // avg[type[(idx_deriv + 10) / (ndescrpt * 3)] * ndescrpt + // + ((idx_deriv + 10) % (ndescrpt * 3)) / 3]; + vv[11] = + (((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw - + dd[3] * dsw * rr[2] * + inr); // avg[type[(idx_deriv + 11) / (ndescrpt * 3)] * ndescrpt + // + ((idx_deriv + 11) % (ndescrpt * 3)) / 3]; + // 4 value components + dd[0] *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] + // * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + + // idx_value + 0]; + dd[1] *= sw; // * em[idx * ndescrpt + idx_value + 1]);// - avg[type[idx] + // * ndescrpt + idx_value + 1]) / std[type[idx] * ndescrpt + + // idx_value + 1]; + dd[2] *= sw; // * em[idx * ndescrpt + idx_value + 2]);// - avg[type[idx] + // * ndescrpt + idx_value + 2]) / std[type[idx] * ndescrpt + + // idx_value + 2]; + dd[3] *= sw; // * em[idx * ndescrpt + idx_value + 3]);// - avg[type[idx] + // * ndescrpt + idx_value + 3]) / std[type[idx] * ndescrpt + + // idx_value + 3]; + for (int ii = 0; ii < 12; ii++) { + row_descript_deriv[idx_deriv + ii] = + vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3]; + } + for (int ii = 0; ii < 4; ii++) { + row_descript[idx_value + ii] = + (dd[ii] - avg[type[bid] * ndescrpt + idx_value + ii]) / + std[type[bid] * ndescrpt + idx_value + ii]; + } + } else { + // TODO: move it to the memset. + row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / + std[type[bid] * ndescrpt + idx_value]; + } + } +} + +template +__global__ void compute_env_mat_r(FPTYPE* em, + FPTYPE* em_deriv, + FPTYPE* rij, + const FPTYPE* coord, + const FPTYPE* avg, + const FPTYPE* std, + const int* type, + const int* nlist, + const int nnei, + const float rmin, + const float rmax) { + // <<>> + const int_64 bid = blockIdx.x; + const unsigned int tid = threadIdx.x; + if (tid >= nnei) { + return; + } + const int ndescrpt = nnei; + const int* row_nlist = nlist + bid * nnei; + FPTYPE* row_rij = rij + bid * nnei * 3; + FPTYPE* row_em = em + bid * nnei; + FPTYPE* row_em_deriv = em_deriv + bid * nnei * 3; + for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) { + const int idx_value = ii; // 4 components + const int idx_deriv = ii * 3; // 4 components time 3 directions + if (row_nlist[ii] >= 0) { + FPTYPE rr[3] = {0}; + FPTYPE vv[3] = {0}; + FPTYPE dd = 0; + const int& j_idx = row_nlist[ii]; + for (int kk = 0; kk < 3; kk++) { + rr[kk] = coord[j_idx * 3 + kk] - coord[bid * 3 + kk]; + row_rij[ii * 3 + kk] = rr[kk]; + } + // const FPTYPE * rr = &row_rij[ii * 3]; + FPTYPE nr2 = dev_dot(rr, rr); + FPTYPE inr = _rsqrt(nr2); + FPTYPE nr = nr2 * inr; + FPTYPE inr2 = inr * inr; + FPTYPE inr4 = inr2 * inr2; + FPTYPE inr3 = inr4 * nr; + FPTYPE sw, dsw; + spline5_switch(sw, dsw, nr, rmin, rmax); + dd = ((FPTYPE)1. / nr); //* sw; + vv[0] = (rr[0] * inr3 * sw - + dd * dsw * rr[0] * + inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3]; + vv[1] = (rr[1] * inr3 * sw - + dd * dsw * rr[1] * + inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3]; + vv[2] = (rr[2] * inr3 * sw - + dd * dsw * rr[2] * + inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * + // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3]; + + // 4 value components + dd *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] * + // ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + + // idx_value + 0]; + for (int ii = 0; ii < 3; ii++) { + row_em_deriv[idx_deriv + ii] = + vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3]; + } + row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) / + std[type[bid] * ndescrpt + idx_value]; + } else { + // TODO: move it to the memset. + row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / + std[type[bid] * ndescrpt + idx_value]; + } + } +} + +namespace deepmd { +template +void format_nbor_list_gpu_cuda(int* nlist, + const FPTYPE* coord, + const int* type, + const InputNlist& gpu_inlist, + int* array_int, + uint_64* array_longlong, + const int max_nbor_size, + const int nloc, + const int nall, + const float rcut, + const std::vector sec) { + const int LEN = 256; + const int nnei = sec.back(); + const int nblock = (nloc + LEN - 1) / LEN; + int* sec_dev = array_int; + int* nei_iter = array_int + sec.size(); // = new int[sec_size]; + int* i_idx = array_int + sec.size() + nloc * sec.size(); + uint_64* key = array_longlong; + assert(max_nbor_size == 256 || max_nbor_size == 512 || + max_nbor_size == 1024 || max_nbor_size == 2048 || + max_nbor_size == 4096); + DPErrcheck(cudaMemset(nlist, -1, sizeof(int) * int_64(nloc) * nnei)); + DPErrcheck(cudaMemset(key, 0xffffffff, + sizeof(uint_64) * int_64(nloc) * max_nbor_size)); + DPErrcheck(cudaMemcpy(sec_dev, &sec[0], sizeof(int) * sec.size(), + cudaMemcpyHostToDevice)); + + get_i_idx<<>>(i_idx, nloc, gpu_inlist.ilist); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + + if (max_nbor_size == 256) { + format_nbor_list_256(key, coord, type, gpu_inlist, nloc, rcut, i_idx); + } else if (max_nbor_size == 512) { + format_nbor_list_512(key, coord, type, gpu_inlist, nloc, rcut, i_idx); + } else if (max_nbor_size == 1024) { + format_nbor_list_1024(key, coord, type, gpu_inlist, nloc, rcut, i_idx); + } else if (max_nbor_size == 2048) { + format_nbor_list_2048(key, coord, type, gpu_inlist, nloc, rcut, i_idx); + } else if (max_nbor_size == 4096) { + format_nbor_list_4096(key, coord, type, gpu_inlist, nloc, rcut, i_idx); + } + + fill_nei_iter<<>>( + nei_iter, key, nloc, max_nbor_size, sec.size()); + + format_nlist_fill_b<<>>( + nlist, nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} +} + +namespace deepmd { + +template +void prod_env_mat_a_gpu_cuda(FPTYPE* em, + FPTYPE* em_deriv, + FPTYPE* rij, + int* nlist, + const FPTYPE* coord, + const int* type, + const InputNlist& gpu_inlist, + int* array_int, + uint_64* array_longlong, + const int max_nbor_size, + const FPTYPE* avg, + const FPTYPE* std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int* f_type) { + if (f_type == NULL) { + f_type = type; + } + const int nnei = sec.back(); + const int ndescrpt = nnei * 4; + DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt)); + DPErrcheck( + cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3)); + DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3)); + + format_nbor_list_gpu_cuda(nlist, coord, f_type, gpu_inlist, array_int, + array_longlong, max_nbor_size, nloc, nall, rcut, + sec); + nborErrcheck(cudaGetLastError()); + nborErrcheck(cudaDeviceSynchronize()); + + compute_env_mat_a<<>>( + em, em_deriv, rij, coord, avg, std, type, nlist, nnei, rcut_smth, rcut); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void prod_env_mat_r_gpu_cuda(FPTYPE* em, + FPTYPE* em_deriv, + FPTYPE* rij, + int* nlist, + const FPTYPE* coord, + const int* type, + const deepmd::InputNlist& gpu_inlist, + int* array_int, + uint_64* array_longlong, + const int max_nbor_size, + const FPTYPE* avg, + const FPTYPE* std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec) { + const int nnei = sec.back(); + const int ndescrpt = nnei * 1; + DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt)); + DPErrcheck( + cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3)); + DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3)); + + format_nbor_list_gpu_cuda(nlist, coord, type, gpu_inlist, array_int, + array_longlong, max_nbor_size, nloc, nall, rcut, + sec); + nborErrcheck(cudaGetLastError()); + nborErrcheck(cudaDeviceSynchronize()); + + compute_env_mat_r<<>>( + em, em_deriv, rij, coord, avg, std, type, nlist, nnei, rcut_smth, rcut); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} +} + + +template +void prod_env_mat_a_cuda_forward_kernel( + int nsamples, int nloc, int ndescrpt, int nnei, int nall, int mem_cpy, int mem_nnei, + int max_nbor_size, int nei_mode, float rcut_a, float rcut_r, float rcut_r_smth, int max_cpy_trial, + int max_nnei_trial, bool b_nlist_map, const std::vector& sec_a, + const std::vector& sec_r, deepmd::InputNlist gpu_inlist, int* nbor_list_dev, int* array_int, unsigned long long* array_longlong, + data_t *p_em, data_t *p_em_deriv, data_t *p_rij, int *p_nlist, + const data_t *p_coord, const data_t *p_box, const data_t *avg, + const data_t *std, const int *p_type, const paddle::Tensor& mesh_tensor) +{ + + for (int ff = 0; ff < nsamples; ++ff) + { + data_t *em = p_em + ff * nloc * ndescrpt; + data_t *em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3; + data_t *rij = p_rij + ff * nloc * nnei * 3; + int *nlist = p_nlist + ff * nloc * nnei; + const data_t *coord = p_coord + ff * nall * 3; + const data_t *box = p_box + ff * 9; + const int *type = p_type + ff * nall; + + + int *idx_mapping = NULL; + int *ilist = NULL, *numneigh = NULL; + int **firstneigh = NULL; + deepmd::malloc_device_memory(firstneigh, nloc); + int *jlist = NULL; + data_t *coord_cpy; + int *type_cpy; + int frame_nall = nall; + int mesh_tensor_size = static_cast(mesh_tensor.size()); + std::vector tensor_list; + _prepare_coord_nlist_gpu( + &tensor_list, &coord, coord_cpy, &type, type_cpy, idx_mapping, + gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev, + frame_nall, mem_cpy, mem_nnei, max_nbor_size, + box, mesh_tensor.data(), mesh_tensor_size, nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial); + // allocate temp memory, temp memory must not be used after this operation! + std::vector int_temp_shape{int(sec_a.size()) + nloc * int(sec_a.size()) + nloc}; + auto int_temp = paddle::empty( + int_temp_shape, + paddle::DataType::FLOAT32, + paddle::GPUPlace() + ); + + array_int = int_temp.mutable_data(); + + deepmd::malloc_device_memory(array_longlong, nloc * GPU_MAX_NBOR_SIZE * 2); + // launch the gpu(nv) compute function + + deepmd::prod_env_mat_a_gpu_cuda( + em, em_deriv, rij, nlist, + coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a); + if (b_nlist_map) + _map_nlist_gpu(nlist, idx_mapping, nloc, nnei); + deepmd::delete_device_memory(firstneigh); + deepmd::delete_device_memory(array_longlong); + array_longlong = NULL; + } +} + +void cum_sum(std::vector& sec, const std::vector& n_sel) { + sec.resize(n_sel.size() + 1); + sec[0] = 0; + for (int ii = 1; ii < sec.size(); ++ii) { + sec[ii] = sec[ii - 1] + n_sel[ii - 1]; + } +} + + +std::vector prod_env_mat_a_cuda_forward( + const paddle::Tensor& coord_tensor, + const paddle::Tensor& atype_tensor, + const paddle::Tensor& box_tensor, + const paddle::Tensor& mesh_tensor, + const paddle::Tensor& t_avg_tensor, + const paddle::Tensor& t_std_tensor, + const paddle::Tensor& natoms_tensor, + float rcut_a, + float rcut_r, + float rcut_r_smth, + std::vector sel_a, + std::vector sel_r +) +{ + std::vector sec_a; + std::vector sec_r; + int ndescrpt, ndescrpt_a, ndescrpt_r; + int nnei, nnei_a, nnei_r, max_nbor_size; + int mem_cpy, max_cpy_trial; + int mem_nnei, max_nnei_trial; + std::string device; + int *array_int = NULL; + unsigned long long *array_longlong = NULL; + deepmd::InputNlist gpu_inlist; + int *nbor_list_dev = NULL; + float nloc_f, nall_f; + + cum_sum(sec_a, sel_a); + cum_sum(sec_r, sel_r); + ndescrpt_a = sec_a.back() * 4; + ndescrpt_r = sec_r.back() * 1; + ndescrpt = ndescrpt_a + ndescrpt_r; + // std::cout << "ndescrpt = " << ndescrpt << std::endl; + nnei_a = sec_a.back(); + nnei_r = sec_r.back(); + nnei = nnei_a + nnei_r; + max_nbor_size = 1024; + max_cpy_trial = 100; + mem_cpy = 256; + max_nnei_trial = 100; + mem_nnei = 256; + // std::cout << "natoms.dtype = " << natoms.dtype() << std::endl; + // std::cout << "natoms.shape = "; + // for (auto &x: natoms) + // { + // std::cout << x << std::endl; + // } + // std::cout << std::endl; + + // std::cout << << std::endl; + // std::cout << "natoms.numel = " << natoms.numel() << std::endl; + // std::cout << "ckpt 1===============" << std::endl; + // auto* natoms = natoms.data(); + // std::cout << "natoms.numel() = " << natoms.numel() << std::endl; + // std::cout << "ckpt 2===============" << std::endl; + // std::cout << natoms[0] << std::endl; + auto natoms = natoms_tensor.data(); + int nloc = natoms[0]; // TODO: 使用natoms[0] 会段错误 + // std::cout << "nloc = " << nloc << std::endl; + // std::cout << "ckpt 3===============" << std::endl; + int nall = natoms[1]; // TODO: 使用natoms[1] 会段错误 + // std::cout << "nall = " << nloc << std::endl; + // std::cout << "ckpt 4===============" << std::endl; + // int ntypes = natoms.shape()[0] - 2; + // std::cout << "ckpt 5===============" << std::endl; + int nsamples = coord_tensor.shape()[0]; + // std::cout << "ckpt 6===============" << std::endl; + + int nei_mode = 0; + bool b_nlist_map = false; + if (mesh_tensor.shape()[0] == 16) { + // lammps neighbor list + nei_mode = 3; + } else if (mesh_tensor.shape()[0] == 6) { + // manual copied pbc + assert(nloc == nall); + nei_mode = 1; + b_nlist_map = true; + } else if (mesh_tensor.shape()[0] == 0) { + // no pbc + assert(nloc == nall); + nei_mode = -1; + } else { + PD_THROW("invalid mesh tensor"); + } + + // create output tensors + auto descrpt_tensor = paddle::empty( + {nsamples, nloc * ndescrpt}, + coord_tensor.dtype(), + coord_tensor.place() + ); + // std::cout << "descrpt_tensor.shape = "; + // for (auto &x: descrpt_tensor.shape()) + // std::cout << x << " "; + // std::cout << std::endl; + + auto descrpt_deriv_tensor = paddle::empty( + {nsamples, nloc * ndescrpt * 3}, + coord_tensor.dtype(), + coord_tensor.place() + ); + // std::cout << "descrpt_deriv_tensor.shape = "; + // for (auto &x: descrpt_deriv_tensor.shape()) + // std::cout << x << " "; + // std::cout << std::endl; + + auto rij_tensor = paddle::empty( + {nsamples, nloc * nnei * 3}, + coord_tensor.dtype(), + coord_tensor.place() + ); + // std::cout << "rij_tensor.shape = "; + // for (auto &x: rij_tensor.shape()) + // std::cout << x << " "; + // std::cout << std::endl; + + auto nlist_tensor = paddle::empty( + {nsamples, nloc * nnei}, + coord_tensor.dtype(), + coord_tensor.place() + ); + // std::cout << "nlist_tensor.shape = "; + // for (auto &x: nlist_tensor.shape()) + // std::cout << x << " "; + // std::cout << std::endl; + + // loop over samples + PD_DISPATCH_FLOATING_TYPES( + coord_tensor.type(), "prod_env_mat_a_cuda_forward_kernel", ([&] { + prod_env_mat_a_cuda_forward_kernel( + nsamples, nloc, ndescrpt, nnei, nall, mem_cpy, mem_nnei, max_nbor_size, + nei_mode, rcut_a, rcut_r, rcut_r_smth, max_cpy_trial, max_nnei_trial, b_nlist_map, sec_a, sec_r, + gpu_inlist, nbor_list_dev, array_int, array_longlong, + descrpt_tensor.mutable_data(), + descrpt_deriv_tensor.mutable_data(), + rij_tensor.mutable_data(), + nlist_tensor.mutable_data(), + coord_tensor.data(), + box_tensor.copy_to(paddle::CPUPlace(), false).data(), + t_avg_tensor.data(), + t_std_tensor.data(), + atype_tensor.data(), + mesh_tensor); + })); + return {descrpt_tensor, descrpt_deriv_tensor, rij_tensor, nlist_tensor}; +} + +template +static int +_norm_copy_coord_gpu( + std::vector* tensor_list, + FPTYPE *&coord_cpy, + int *&type_cpy, + int *&idx_mapping, + int &nall, + int &mem_cpy, + const FPTYPE *coord, + const FPTYPE *box, + const int *type, + const int &nloc, + const int &max_cpy_trial, + const float &rcut_r) +{ + // Tensor FPTYPE_temp; + std::vector FPTYPE_temp_shape{nall*3}; + paddle::Tensor tmp_coord_tensor = paddle::Tensor(paddle::PlaceType::kGPU, FPTYPE_temp_shape); + FPTYPE *tmp_coord = tmp_coord_tensor.mutable_data(paddle::PlaceType::kGPU); + tensor_list->push_back(tmp_coord_tensor); + cudaMemcpy(tmp_coord, coord, sizeof(FPTYPE) * nall * 3, cudaMemcpyDeviceToDevice); + + deepmd::Region region; + deepmd::init_region_cpu(region, box); + FPTYPE box_info[18]; + std::copy(region.boxt, region.boxt + 9, box_info); + std::copy(region.rec_boxt, region.rec_boxt + 9, box_info + 9); + int cell_info[23]; + deepmd::compute_cell_info(cell_info, rcut_r, region); + const int loc_cellnum = cell_info[21]; + const int total_cellnum = cell_info[22]; + + //Tensor double_temp; + std::vector double_temp_shape {18}; + paddle::Tensor double_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU, double_temp_shape); + FPTYPE *box_info_dev = double_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + tensor_list->push_back(double_temp_tensor); + + //Tensor int_temp; + std::vector int_temp_shape {23+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc}; + paddle::Tensor int_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU, int_temp_shape); + int *cell_info_dev = int_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + int *int_data_dev = cell_info_dev + 23; + tensor_list->push_back(int_temp_tensor); + + deepmd::memcpy_host_to_device(box_info_dev, box_info, 18); + deepmd::memcpy_host_to_device(cell_info_dev, cell_info, 23); + + deepmd::Region region_dev; + FPTYPE *new_boxt = region_dev.boxt; + FPTYPE *new_rec_boxt = region_dev.rec_boxt; + region_dev.boxt = box_info_dev; + region_dev.rec_boxt = box_info_dev + 9; + + deepmd::normalize_coord_gpu(tmp_coord, nall, region_dev); + + + int tt; + paddle::Tensor cpy_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU); + paddle::Tensor t_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU); + for (tt = 0; tt < max_cpy_trial; ++tt) + { + std::vector cpy_temp_shape {mem_cpy * 3}; + std::vector t_temp_shape {mem_cpy * 2}; + cpy_temp_tensor.reshape(cpy_temp_shape); + coord_cpy = cpy_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + t_temp_tensor.reshape(t_temp_shape); + type_cpy = t_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + + idx_mapping = type_cpy + mem_cpy; + int ret = deepmd::copy_coord_gpu( + coord_cpy, type_cpy, idx_mapping, &nall, int_data_dev, + tmp_coord, type, nloc, mem_cpy, loc_cellnum, total_cellnum, cell_info_dev, region_dev); + if (ret == 0) + { + break; + } + else + { + mem_cpy *= 2; + } + } + tensor_list->push_back(cpy_temp_tensor); + tensor_list->push_back(t_temp_tensor); + region_dev.boxt = new_boxt; + region_dev.rec_boxt = new_rec_boxt; + + return (tt != max_cpy_trial); +} + +template +static int +_build_nlist_gpu( + std::vector *tensor_list, + int *&ilist, + int *&numneigh, + int **&firstneigh, + int *&jlist, + int &max_nnei, + int &mem_nnei, + const FPTYPE *coord, + const int &nloc, + const int &new_nall, + const int &max_nnei_trial, + const float &rcut_r) +{ + //Tensor nlist_temp; + std::vector nlist_temp_shape {nloc * 2}; + paddle::Tensor nlist_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU, nlist_temp_shape); + ilist = nlist_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + tensor_list->push_back(nlist_temp_tensor); + numneigh = ilist + nloc; + //Tensor jlist_temp; + int *ind_data = NULL; + + std::vector firstneigh_host(nloc); + int tt; + paddle::Tensor jlist_temp_tensor = paddle::Tensor(paddle::PlaceType::kGPU); + for (tt = 0; tt < max_nnei_trial; ++tt) + { + std::vector jlist_temp_shape {3 * nloc * mem_nnei}; + jlist_temp_tensor.reshape(jlist_temp_shape); + jlist = jlist_temp_tensor.mutable_data(paddle::PlaceType::kGPU); + ind_data = jlist + nloc * mem_nnei; + for (int ii = 0; ii < nloc; ++ii) + { + firstneigh_host[ii] = jlist + ii * mem_nnei; + } + deepmd::memcpy_host_to_device(firstneigh, firstneigh_host); + deepmd::InputNlist inlist(nloc, ilist, numneigh, firstneigh); + int ret = deepmd::build_nlist_gpu( + inlist, &max_nnei, ind_data, + coord, nloc, new_nall, mem_nnei, rcut_r); + if (ret == 0) + { + break; + } + else + { + mem_nnei *= 2; + } + } + tensor_list->push_back(jlist_temp_tensor); + return (tt != max_nnei_trial); +} + +static void +_map_nlist_gpu( + int *nlist, + const int *idx_mapping, + const int &nloc, + const int &nnei) +{ + deepmd::use_nlist_map(nlist, idx_mapping, nloc, nnei); +} + +template +static void +_prepare_coord_nlist_gpu( + std::vector *tensor_list, + FPTYPE const **coord, + FPTYPE *&coord_cpy, + int const **type, + int *&type_cpy, + int *&idx_mapping, + deepmd::InputNlist &inlist, + int *&ilist, + int *&numneigh, + int **&firstneigh, + int *&jlist, + int *&nbor_list_dev, + int &new_nall, + int &mem_cpy, + int &mem_nnei, + int &max_nbor_size, + const FPTYPE *box, + const int *mesh_tensor_data, + const int mesh_tensor_size, + const int &nloc, + const int &nei_mode, + const float &rcut_r, + const int &max_cpy_trial, + const int &max_nnei_trial) +{ + inlist.inum = nloc; + if (nei_mode != 3) + { + // build nlist by myself + // normalize and copy coord + if (nei_mode == 1) + { + int copy_ok = _norm_copy_coord_gpu( + tensor_list, coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy, + *coord, box, *type, nloc, max_cpy_trial, rcut_r); + PD_CHECK(copy_ok, "cannot allocate mem for copied coords"); + *coord = coord_cpy; + *type = type_cpy; + + } + + //build nlist + int build_ok = _build_nlist_gpu( + tensor_list, ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei, + *coord, nloc, new_nall, max_nnei_trial, rcut_r); + PD_CHECK(build_ok, "cannot allocate mem for nlist"); + if (max_nbor_size <= 1024) + { + max_nbor_size = 1024; + } + else if (max_nbor_size <= 2048) + { + max_nbor_size = 2048; + } + else + { + max_nbor_size = 4096; + } + inlist.ilist = ilist; + inlist.numneigh = numneigh; + inlist.firstneigh = firstneigh; + } + else + { + // update nbor list + deepmd::InputNlist inlist_temp; + inlist_temp.inum = nloc; + deepmd::env_mat_nbor_update( + inlist_temp, inlist, max_nbor_size, nbor_list_dev, + mesh_tensor_data, mesh_tensor_size); + // env_mat_nbor_update( + // inlist_temp, inlist, max_nbor_size, nbor_list_dev, + // mesh_tensor_data, mesh_tensor_size); + PD_CHECK((max_numneigh(inlist_temp) <= GPU_MAX_NBOR_SIZE), "Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_numneigh(inlist_temp)) + " is larger than " + std::to_string(GPU_MAX_NBOR_SIZE) + ", which currently is not supported by deepmd-kit."); + } +} + + +std::vector ProdEnvMatAForward( + const paddle::Tensor& coord_tensor, + const paddle::Tensor& atype_tensor, + const paddle::Tensor& mesh_tensor, + const paddle::Tensor& box_tensor, + const paddle::Tensor& t_avg_tensor, + const paddle::Tensor& t_std_tensor, + const paddle::Tensor& natoms_tensor, + float rcut_a, + float rcut_r, + float rcut_r_smth, + std::vector sel_a, + std::vector sel_r +) { + if (coord_tensor.is_gpu()) { + return prod_env_mat_a_cuda_forward( + coord_tensor, + atype_tensor, + mesh_tensor, + box_tensor, + t_avg_tensor, + t_std_tensor, + natoms_tensor, + rcut_a, + rcut_r, + rcut_r_smth, + sel_a, + sel_r + ); + } else { + PD_THROW("Unsupported device type for forward function of custom relu operator."); + } +} + + +std::vector> ProdEnvMatAInferShape( + std::vector coord_shape, + std::vector atype_shape, + std::vector box_shape, + std::vector mesh_shape, + std::vector t_avg_shape, + std::vector t_std_shape, + std::vector natoms_shape, + float rcut_a, + float rcut_r, + float rcut_r_smth, + const std::vector& sel_a, + const std::vector& sel_r +) { + int64_t nloc = /*natoms[0]*/ 192; + int64_t nall = /*natoms[1]*/ 192; + + std::vector sec_a; + std::vector sec_r; + cum_sum(sec_a, sel_a); + cum_sum(sec_r, sel_r); + + int64_t nsamples = coord_shape[0]; + int64_t ndescrpt_a = sec_a.back() * 4; + int64_t ndescrpt_r = sec_r.back() * 1; + int64_t ndescrpt = ndescrpt_a + ndescrpt_r; + + int64_t nnei_a = sec_a.back(); + int64_t nnei_r = sec_r.back(); + int64_t nnei = nnei_a + nnei_r; + + std::vector descrpt_shape = {nsamples, nloc * ndescrpt}; + std::vector descrpt_deriv_shape = {nsamples, nloc * ndescrpt * 3}; + std::vector rij_shape = {nsamples, nloc * nnei * 3}; + std::vector nlist_shape = {nsamples, nloc * nnei}; + return {descrpt_shape, descrpt_deriv_shape, rij_shape, nlist_shape}; +} + +std::vector ProdEnvMatAInferDtype( + paddle::DataType coord_dtype, + paddle::DataType atype_dtype, + paddle::DataType box_dtype, + paddle::DataType mesh_dtype, + paddle::DataType t_avg_dtype, + paddle::DataType t_std_dtype, + paddle::DataType natoms_dtype +) { + return {coord_dtype, coord_dtype, coord_dtype, coord_dtype}; +} + + +PD_BUILD_OP(prod_env_mat_a) + .Inputs({"coord", "atype", "box", "mesh", "t_avg", "t_std", "natoms"}) + .Outputs({"descrpt", "descrpt_deriv", "rij", "nlist"}) + .Attrs({"rcut_a: float", "rcut_r: float", "rcut_r_smth: float", "sel_a: std::vector", "sel_r: std::vector"}) + .SetKernelFn(PD_KERNEL(ProdEnvMatAForward)) + .SetInferShapeFn(PD_INFER_SHAPE(ProdEnvMatAInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(ProdEnvMatAInferDtype)); \ No newline at end of file diff --git a/source/lib/paddle_src/prod_env_mat.h b/source/lib/paddle_src/prod_env_mat.h new file mode 100644 index 0000000000..3052dd2230 --- /dev/null +++ b/source/lib/paddle_src/prod_env_mat.h @@ -0,0 +1,140 @@ +#pragma once +#include + +#include "device.h" +#include "neighbor_list.h" + +namespace deepmd { + +template +void prod_env_mat_a_cpu(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type = NULL); + +template +void prod_env_mat_r_cpu(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &inlist, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec); + +#if GOOGLE_CUDA +template +void prod_env_mat_a_gpu_cuda(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &gpu_inlist, + int *array_int, + unsigned long long *array_longlong, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type = NULL); + +template +void prod_env_mat_r_gpu_cuda(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &gpu_inlist, + int *array_int, + unsigned long long *array_longlong, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec); + +void env_mat_nbor_update(InputNlist &inlist, + InputNlist &gpu_inlist, + int &max_nbor_size, + int *&nbor_list_dev, + const int *mesh, + const int size); +#endif // GOOGLE_CUDA + +#if TENSORFLOW_USE_ROCM +template +void prod_env_mat_a_gpu_rocm(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &gpu_inlist, + int *array_int, + unsigned long long *array_longlong, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec, + const int *f_type = NULL); + +template +void prod_env_mat_r_gpu_rocm(FPTYPE *em, + FPTYPE *em_deriv, + FPTYPE *rij, + int *nlist, + const FPTYPE *coord, + const int *type, + const InputNlist &gpu_inlist, + int *array_int, + unsigned long long *array_longlong, + const int max_nbor_size, + const FPTYPE *avg, + const FPTYPE *std, + const int nloc, + const int nall, + const float rcut, + const float rcut_smth, + const std::vector sec); + +void env_mat_nbor_update(InputNlist &inlist, + InputNlist &gpu_inlist, + int &max_nbor_size, + int *&nbor_list_dev, + const int *mesh, + const int size); +#endif // TENSORFLOW_USE_ROCM + +} // namespace deepmd diff --git a/source/lib/paddle_src/prod_force.cu b/source/lib/paddle_src/prod_force.cu new file mode 100644 index 0000000000..4416cef082 --- /dev/null +++ b/source/lib/paddle_src/prod_force.cu @@ -0,0 +1,303 @@ +#include "paddle/extension.h" + +#include "device.h" +#include "prod_force.h" +#include "gpu_cuda.h" + +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") + +template +__global__ void force_deriv_wrt_center_atom(FPTYPE* force, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const int ndescrpt) { + __shared__ FPTYPE data[THREADS_PER_BLOCK * 3]; + int_64 bid = blockIdx.x; + unsigned int tid = threadIdx.x; + for (int ii = tid; ii < THREADS_PER_BLOCK * 3; ii += THREADS_PER_BLOCK) { + data[ii] = 0.f; + } + for (int ii = tid; ii < ndescrpt; ii += THREADS_PER_BLOCK) { + for (int jj = 0; jj < 3; jj++) { + data[jj * THREADS_PER_BLOCK + tid] += + net_deriv[bid * ndescrpt + ii] * + in_deriv[bid * ndescrpt * 3 + ii * 3 + jj]; + } + } + __syncthreads(); + // do reduction in shared memory + for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) { + if (tid < ii) { + for (int jj = 0; jj < 3; jj++) { + data[jj * THREADS_PER_BLOCK + tid] += + data[jj * THREADS_PER_BLOCK + tid + ii]; + } + } + __syncthreads(); + } + // write result for this block to global memory + if (tid == 0) { + force[bid * 3 + 0] -= data[THREADS_PER_BLOCK * 0]; + force[bid * 3 + 1] -= data[THREADS_PER_BLOCK * 1]; + force[bid * 3 + 2] -= data[THREADS_PER_BLOCK * 2]; + } +} + +template +__global__ void force_deriv_wrt_neighbors_a(FPTYPE* force, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const int* nlist, + const int nloc, + const int nnei) { + // idy -> nnei + const int_64 idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; + const unsigned int idz = threadIdx.y; + const int ndescrpt = nnei * 4; + if (idy >= nnei) { + return; + } + // deriv wrt neighbors + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + FPTYPE force_tmp = 0.f; + for (int idw = 0; idw < 4; ++idw) { + force_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * + in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz]; + } + atomicAdd(force + j_idx * 3 + idz, force_tmp); +} + +template +__global__ void force_deriv_wrt_neighbors_r(FPTYPE* force, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const int* nlist, + const int nloc, + const int nnei) { + // idy -> nnei + const int_64 idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; + const unsigned int idz = threadIdx.y; + const int ndescrpt = nnei * 1; + if (idy >= nnei) { + return; + } + // deriv wrt neighbors + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + atomicAdd(force + j_idx * 3 + idz, + net_deriv[idx * ndescrpt + idy] * + in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]); +} + +namespace deepmd { +template +void prod_force_a_gpu_cuda(FPTYPE* force, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei) { + const int ndescrpt = nnei * 4; + DPErrcheck(cudaMemset(force, 0, sizeof(FPTYPE) * nall * 3)); + + force_deriv_wrt_center_atom + <<>>(force, net_deriv, in_deriv, ndescrpt); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + + const int LEN = 64; + const int nblock = (nnei + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(LEN, 3); + force_deriv_wrt_neighbors_a<<>>( + force, net_deriv, in_deriv, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void prod_force_r_gpu_cuda(FPTYPE* force, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei) { + const int ndescrpt = nnei * 1; + DPErrcheck(cudaMemset(force, 0, sizeof(FPTYPE) * nall * 3)); + + force_deriv_wrt_center_atom + <<>>(force, net_deriv, in_deriv, ndescrpt); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + + const int LEN = 64; + const int nblock = (nnei + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(LEN, 3); + force_deriv_wrt_neighbors_r<<>>( + force, net_deriv, in_deriv, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template void prod_force_a_gpu_cuda(float* force, + const float* net_deriv, + const float* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +template void prod_force_a_gpu_cuda(double* force, + const double* net_deriv, + const double* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +template void prod_force_r_gpu_cuda(float* force, + const float* net_deriv, + const float* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +template void prod_force_r_gpu_cuda(double* force, + const double* net_deriv, + const double* in_deriv, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +} // namespace deepmd + + +template +void PdProdForceSeAOpForwardCUDAKernel( + int nloc, int nall, int nframes, int ndescrpt, int nnei, + data_t* p_force, const data_t* p_net_deriv, const data_t* p_in_deriv, const int* p_nlist +) { + for(int kk = 0; kk < nframes; ++kk){ + data_t * force = p_force + kk * nall * 3; + const data_t * net_deriv = p_net_deriv + kk * nloc * ndescrpt; + const data_t * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; + const int * nlist = p_nlist + kk * nloc * nnei; + deepmd::prod_force_a_gpu_cuda( + force, + net_deriv, in_deriv, nlist, nloc, nall, nnei + ); + } +} + + +std::vector PdProdForceSeAOpCUDAForward( + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + CHECK_INPUT(net_deriv_tensor); + CHECK_INPUT(in_deriv_tensor); + CHECK_INPUT(nlist_tensor); + // CHECK_INPUT(natoms_tensor); + CHECK_INPUT_DIM(net_deriv_tensor, 2); + CHECK_INPUT_DIM(in_deriv_tensor, 2); + CHECK_INPUT_DIM(natoms_tensor, 1); + + PD_CHECK(natoms_tensor.shape()[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + const int* natoms = natoms_tensor.data(); + int nloc = natoms[0]; + int nall = natoms[1]; + int nframes = net_deriv_tensor.shape()[0]; + int ndescrpt = net_deriv_tensor.shape()[1] / nloc; + int nnei = nlist_tensor.shape()[1] / nloc; + + PD_CHECK(nframes == in_deriv_tensor.shape()[0], "number of samples should match"); + PD_CHECK(nframes == nlist_tensor.shape()[0],"number of samples should match"); + PD_CHECK(nloc * ndescrpt * 3 == in_deriv_tensor.shape()[1], "number of descriptors should match"); + + std::vector force_shape {nframes, 3 * nall}; + paddle::Tensor force_tensor = paddle::Tensor(paddle::PlaceType::kGPU, force_shape); + + assert (nframes == force_shape[0]); + assert (nframes == net_deriv_tensor.shape()[0]); + assert (nframes == in_deriv_tensor.shape()[0]); + assert (nframes == nlist_tensor.shape()[0]); + assert (nall * 3 == force_shape[1]); + assert (nloc * ndescrpt == net_deriv_tensor.shape()[1]); + assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape()[1]); + assert (nloc * nnei == nlist_tensor.shape()[1]); + assert (nnei * 4 == ndescrpt); + + PD_DISPATCH_FLOATING_TYPES( + net_deriv_tensor.type(), "pd_prod_force_se_a_cpu_forward_kernel", ([&] { + PdProdForceSeAOpForwardCUDAKernel( + nloc, nall, nframes, ndescrpt, nnei, + force_tensor.mutable_data(), net_deriv_tensor.data(), + in_deriv_tensor.data(), nlist_tensor.data()); + })); + + return {force_tensor}; +} + + +std::vector PdProdForceSeAForward( + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + // if(net_deriv_tensor.place() == paddle::PlaceType::kCPU){ + // return PdProdForceSeAOpCPUForward(net_deriv_tensor, in_deriv_tensor, nlist_tensor, natoms_tensor, n_a_sel, n_r_sel); + // }else if(net_deriv_tensor.place() == paddle::PlaceType::kGPU){ + return PdProdForceSeAOpCUDAForward(net_deriv_tensor, in_deriv_tensor, nlist_tensor, natoms_tensor, n_a_sel, n_r_sel); + // }else{ + // PD_THROW("No Such kernel for PdFrodForceSeAForward!"); + // } +} + +std::vector> PdProdForceSeAInferShape( + std::vector net_deriv_shape, + std::vector in_deriv_shape, + std::vector nlist_shape, + std::vector natoms_shape, + const int &n_a_sel, + const int &n_r_sel +) { + // int64_t nloc = /*natoms[0]*/ 192; + int64_t nall = /*natoms[1]*/ 192; + int64_t nframes = net_deriv_shape[0]; + std::vector force_shape = {nframes, 3 * nall}; + return {force_shape}; +} + +std::vector PdProdForceSeAInferDtype( + paddle::DataType net_deriv_dtype, + paddle::DataType in_deriv_dtype, + paddle::DataType nlist_dtype, + paddle::DataType natoms_dtype +) { + return {net_deriv_dtype}; +} + + +PD_BUILD_OP(prod_force_se_a) + .Inputs({"net_deriv", "in_deriv", "nlist", "natoms"}) + .Outputs({"force"}) + .Attrs({"n_a_sel: int", "n_r_sel: int"}) + .SetKernelFn(PD_KERNEL(PdProdForceSeAForward)) + .SetInferShapeFn(PD_INFER_SHAPE(PdProdForceSeAInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(PdProdForceSeAInferDtype)); diff --git a/source/lib/paddle_src/prod_force_grad.cu b/source/lib/paddle_src/prod_force_grad.cu new file mode 100644 index 0000000000..a1dad3dc3c --- /dev/null +++ b/source/lib/paddle_src/prod_force_grad.cu @@ -0,0 +1,275 @@ +#include "paddle/extension.h" + +#include "device.h" +#include "prod_force_grad.h" +#include "gpu_cuda.h" + +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") + +template +__device__ inline FPTYPE dev_dot(const FPTYPE* arr1, const FPTYPE* arr2) { + return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2]; +} + +template +__global__ void force_grad_wrt_center_atom(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const int ndescrpt) { + __shared__ FPTYPE grad_one[3]; + int_64 center_idx = blockIdx.x; + unsigned int tid = threadIdx.x; + if (tid < 3) { + grad_one[tid] = grad[center_idx * 3 + tid]; + } + __syncthreads(); + unsigned int descrpt_idx = blockIdx.y * blockDim.x + tid; + if (descrpt_idx < ndescrpt) { + grad_net[center_idx * ndescrpt + descrpt_idx] -= dev_dot( + grad_one, env_deriv + center_idx * ndescrpt * 3 + descrpt_idx * 3); + } +} + +template +__global__ void force_grad_wrt_neighbors_a(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const int* nlist, + const int nloc, + const int nnei) { + // idy -> nnei + const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned int idy = blockIdx.y; + const unsigned int idw = threadIdx.y; + if (idx >= nloc) { + return; + } + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + if (j_idx >= nloc) j_idx = j_idx % nloc; + grad_net[idx * nnei * 4 + idy * 4 + idw] += dev_dot( + grad + j_idx * 3, env_deriv + idx * nnei * 4 * 3 + idy * 4 * 3 + idw * 3); +} + +template +__global__ void force_grad_wrt_neighbors_r(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const int* nlist, + const int nloc, + const int nnei) { + // idy -> nnei + const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned int idy = blockIdx.y; + if (idx >= nloc) { + return; + } + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + if (j_idx >= nloc) j_idx = j_idx % nloc; + grad_net[idx * nnei + idy] += + dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 3 + idy * 3); +} + +namespace deepmd { +template +void prod_force_grad_a_gpu_cuda(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const int* nlist, + const int nloc, + const int nnei) { + const int ndescrpt = nnei * 4; + DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt)); + const int nblock = (ndescrpt + TPB - 1) / TPB; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(TPB, 1); + force_grad_wrt_center_atom<<>>(grad_net, grad, + env_deriv, ndescrpt); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + + const int LEN = 128; + const int nblock_ = (nloc + LEN - 1) / LEN; + dim3 block_grid_(nblock_, nnei); + dim3 thread_grid_(LEN, 4); + force_grad_wrt_neighbors_a<<>>( + grad_net, grad, env_deriv, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void prod_force_grad_r_gpu_cuda(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const int* nlist, + const int nloc, + const int nnei) { + const int ndescrpt = nnei * 1; + DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt)); + const int nblock = (ndescrpt + TPB - 1) / TPB; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(TPB, 1); + force_grad_wrt_center_atom<<>>(grad_net, grad, + env_deriv, ndescrpt); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + + const int LEN = 128; + const int nblock_ = (nloc + LEN - 1) / LEN; + dim3 block_grid_(nblock_, nnei); + dim3 thread_grid_(LEN, 1); + force_grad_wrt_neighbors_r<<>>( + grad_net, grad, env_deriv, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template void prod_force_grad_a_gpu_cuda(float* grad_net, + const float* grad, + const float* env_deriv, + const int* nlist, + const int nloc, + const int nnei); +template void prod_force_grad_a_gpu_cuda(double* grad_net, + const double* grad, + const double* env_deriv, + const int* nlist, + const int nloc, + const int nnei); +template void prod_force_grad_r_gpu_cuda(float* grad_net, + const float* grad, + const float* env_deriv, + const int* nlist, + const int nloc, + const int nnei); +template void prod_force_grad_r_gpu_cuda(double* grad_net, + const double* grad, + const double* env_deriv, + const int* nlist, + const int nloc, + const int nnei); +} // namespace deepmd + +template +void PdProdForceSeAOpCUDABackwardKernel( + int nloc, int nframes, int ndescrpt, int nnei, + const data_t* p_grad, const data_t* p_net_deriv, const data_t* p_in_deriv, + const int* p_nlist, data_t* p_grad_net +) { + for (int_64 kk = 0; kk < nframes; ++kk) { + data_t* grad_net = p_grad_net + kk * nloc * ndescrpt; + const data_t* grad = p_grad + kk * nloc * 3; + const data_t* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; + const int* nlist = p_nlist + kk * nloc * nnei; + deepmd::prod_force_grad_a_gpu_cuda( + grad_net, grad, in_deriv, nlist, nloc, nnei + ); + } +} + + +std::vector PdProdForceSeAOpCUDABackward( + const paddle::Tensor& force_grad_tensor, + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + auto grad_shape = force_grad_tensor.shape(); + auto net_deriv_shape = net_deriv_tensor.shape(); + auto in_deriv_shape = in_deriv_tensor.shape(); + auto nlist_shape = nlist_tensor.shape(); + auto natoms_shape = natoms_tensor.shape(); + + CHECK_INPUT_DIM(force_grad_tensor, 2); + CHECK_INPUT_DIM(net_deriv_tensor, 2); + CHECK_INPUT_DIM(in_deriv_tensor, 2); + CHECK_INPUT_DIM(nlist_tensor, 2); + CHECK_INPUT_DIM(natoms_tensor, 1); + + PD_CHECK(natoms_shape[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + + const int* natoms = nullptr; + // if(natoms_tensor.place() != paddle::PlaceType::kCPU){ + // natoms = natoms_tensor.copy_to(paddle::PlaceType::kCPU).data(); + // }else{ + natoms = natoms_tensor.data(); + // } + int nframes = net_deriv_shape[0]; + int nloc = natoms[0]; + int ndescrpt = net_deriv_shape[1] / nloc; + int nnei = nlist_shape[1] / nloc; + + PD_CHECK(nframes == grad_shape[0], "number of frames should match"); + PD_CHECK(nframes == in_deriv_shape[0], "number of samples should match"); + PD_CHECK(nframes == nlist_shape[0],"number of samples should match"); + PD_CHECK(nloc * 3 == grad_shape[1], "input grad shape should be 3 x natoms"); + PD_CHECK(nloc * ndescrpt * 3 == in_deriv_shape[1], "number of descriptors should match"); + PD_CHECK(nnei == (n_a_sel + n_r_sel), "number of neighbors should match"); + + std::vector grad_net_shape {nframes, nloc * ndescrpt}; + // paddle::Tensor grad_net_tensor = paddle::Tensor(paddle::PlaceType::kCPU, grad_net_shape); + paddle::Tensor grad_net_tensor = paddle::empty( + grad_net_shape, + force_grad_tensor.dtype(), + force_grad_tensor.place() + ); + + // if(force_grad_tensor.place() == paddle::PlaceType::kCPU){ + // PD_DISPATCH_FLOATING_TYPES( + // force_grad_tensor.type(), "pd_prod_force_se_a_cpu_backward_kernel", ([&] { + // PdProdForceSeAOpCPUBackwardKernel( + // nloc, nframes, ndescrpt, nnei, + // force_grad_tensor.data(), + // net_deriv_tensor.data(), + // in_deriv_tensor.data(), + // nlist_tensor.data(), + // grad_net_tensor.mutable_data()); + // })); + // }else{ + PD_DISPATCH_FLOATING_TYPES( + force_grad_tensor.type(), "pd_prod_force_se_a_cuda_backward_kernel", ([&] { + PdProdForceSeAOpCUDABackwardKernel( + nloc, nframes, ndescrpt, nnei, + force_grad_tensor.data(), + net_deriv_tensor.data(), + in_deriv_tensor.data(), + nlist_tensor.data(), + grad_net_tensor.mutable_data()); + })); + // } + return {grad_net_tensor}; +} + + +std::vector PdProdForceSeABackward( + const paddle::Tensor& force_grad_tensor, + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + return PdProdForceSeAOpCUDABackward( + force_grad_tensor, net_deriv_tensor, in_deriv_tensor, + nlist_tensor, natoms_tensor, n_a_sel, n_r_sel + ); +} + + +PD_BUILD_GRAD_OP(prod_force_se_a) + .Inputs({paddle::Grad("force"), "net_deriv", "in_deriv", "nlist", "natoms"}) + .Outputs({paddle::Grad("net_deriv")}) + .Attrs({"n_a_sel: int", "n_r_sel: int"}) + .SetKernelFn(PD_KERNEL(PdProdForceSeABackward)); + diff --git a/source/lib/paddle_src/prod_virial.cc b/source/lib/paddle_src/prod_virial.cc new file mode 100644 index 0000000000..8769ccf8f1 --- /dev/null +++ b/source/lib/paddle_src/prod_virial.cc @@ -0,0 +1,219 @@ +#include "custom_op.h" + +REGISTER_OP("ProdVirial") + .Attr("T: {float, double} = DT_DOUBLE") + .Input("net_deriv: T") + .Input("in_deriv: T") + .Input("rij: T") + .Input("nlist: int32") + .Input("axis: int32") + .Input("natoms: int32") + .Attr("n_a_sel: int") + .Attr("n_r_sel: int") + .Output("virial: T") + .Output("atom_virial: T"); + +using namespace tensorflow; + +using CPUDevice = Eigen::ThreadPoolDevice; + +template +class ProdVirialOp : public OpKernel { + public: + explicit ProdVirialOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel)); + OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel)); + n_a_shift = n_a_sel * 4; + } + + void Compute(OpKernelContext* context) override { + deepmd::safe_compute( + context, [this](OpKernelContext* context) { this->_Compute(context); }); + } + + void _Compute(OpKernelContext* context) { + // Grab the input tensor + const Tensor& net_deriv_tensor = context->input(0); + const Tensor& in_deriv_tensor = context->input(1); + const Tensor& rij_tensor = context->input(2); + const Tensor& nlist_tensor = context->input(3); + const Tensor& axis_tensor = context->input(4); + const Tensor& natoms_tensor = context->input(5); + + // set size of the sample + OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of net deriv should be 2")); + OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of input deriv should be 2")); + OP_REQUIRES(context, (rij_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of rij should be 2")); + OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of nlist should be 2")); + OP_REQUIRES(context, (axis_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of axis should be 2")); + OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1), + errors::InvalidArgument("Dim of natoms should be 1")); + + OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3), + errors::InvalidArgument( + "number of atoms should be larger than (or equal to) 3")); + auto natoms = natoms_tensor.flat(); + + int nframes = net_deriv_tensor.shape().dim_size(0); + int nloc = natoms(0); + int nall = natoms(1); + int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc; + int nnei = nlist_tensor.shape().dim_size(1) / nloc; + + // check the sizes + OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)), + errors::InvalidArgument("number of samples should match")); + OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)), + errors::InvalidArgument("number of samples should match")); + OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)), + errors::InvalidArgument("number of samples should match")); + OP_REQUIRES(context, (nframes == axis_tensor.shape().dim_size(0)), + errors::InvalidArgument("number of samples should match")); + + OP_REQUIRES(context, + (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)), + errors::InvalidArgument("number of descriptors should match")); + OP_REQUIRES(context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)), + errors::InvalidArgument("dim of rij should be nnei * 3")); + OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel), + errors::InvalidArgument("number of neighbors should match")); + OP_REQUIRES( + context, (nloc * 4 == axis_tensor.shape().dim_size(1)), + errors::InvalidArgument("number of axis type+id should be 2+2")); + + // Create an output tensor + TensorShape virial_shape; + virial_shape.AddDim(nframes); + virial_shape.AddDim(9); + Tensor* virial_tensor = NULL; + OP_REQUIRES_OK(context, + context->allocate_output(0, virial_shape, &virial_tensor)); + TensorShape atom_virial_shape; + atom_virial_shape.AddDim(nframes); + atom_virial_shape.AddDim(9 * nall); + Tensor* atom_virial_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape, + &atom_virial_tensor)); + + // flat the tensors + auto net_deriv = net_deriv_tensor.flat(); + auto in_deriv = in_deriv_tensor.flat(); + auto rij = rij_tensor.flat(); + auto nlist = nlist_tensor.flat(); + auto axis = axis_tensor.flat(); + auto virial = virial_tensor->flat(); + auto atom_virial = atom_virial_tensor->flat(); + + // loop over samples +#pragma omp parallel for + for (int kk = 0; kk < nframes; ++kk) { + int net_iter = kk * nloc * ndescrpt; + int in_iter = kk * nloc * ndescrpt * 12; + int rij_iter = kk * nloc * nnei * 3; + int nlist_iter = kk * nloc * nnei; + int axis_iter = kk * nloc * 4; + int virial_iter = kk * 9; + int atom_virial_iter = kk * nall * 9; + + for (int ii = 0; ii < 9; ++ii) { + virial(virial_iter + ii) = 0.; + } + for (int ii = 0; ii < 9 * nall; ++ii) { + atom_virial(atom_virial_iter + ii) = 0.; + } + + // compute virial of a frame + for (int ii = 0; ii < nloc; ++ii) { + int i_idx = ii; + + // set axes + int axis0_type = axis(axis_iter + i_idx * 4 + 0); + int axis1_type = axis(axis_iter + i_idx * 4 + 2); + int axis_0 = axis(axis_iter + i_idx * 4 + 1); + int axis_1 = axis(axis_iter + i_idx * 4 + 3); + if (axis0_type == 1) axis_0 += n_a_sel; + if (axis1_type == 1) axis_1 += n_a_sel; + + // deriv wrt neighbors + for (int jj = 0; jj < nnei; ++jj) { + int j_idx = nlist(nlist_iter + i_idx * nnei + jj); + if (j_idx < 0) continue; + if (jj == axis_0) { + for (int aa = 0; aa < ndescrpt; ++aa) { + FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa); + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + FPTYPE tmp_v = + pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * + in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + + dd0); + virial(virial_iter + dd0 * 3 + dd1) += tmp_v; + atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += + tmp_v; + } + } + } + } else if (jj == axis_1) { + for (int aa = 0; aa < ndescrpt; ++aa) { + FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa); + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + FPTYPE tmp_v = + pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * + in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + + dd0); + virial(virial_iter + dd0 * 3 + dd1) += tmp_v; + atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += + tmp_v; + } + } + } + } else { + int aa_start, aa_end; + make_descript_range(aa_start, aa_end, jj); + for (int aa = aa_start; aa < aa_end; ++aa) { + FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa); + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + FPTYPE tmp_v = + pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * + in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + + dd0); + virial(virial_iter + dd0 * 3 + dd1) += tmp_v; + atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += + tmp_v; + } + } + } + } + } + } + } + } + + private: + int n_r_sel, n_a_sel, n_a_shift; + inline void make_descript_range(int& idx_start, + int& idx_end, + const int& nei_idx) { + if (nei_idx < n_a_sel) { + idx_start = nei_idx * 4; + idx_end = nei_idx * 4 + 4; + } else { + idx_start = n_a_shift + (nei_idx - n_a_sel); + idx_end = n_a_shift + (nei_idx - n_a_sel) + 1; + } + } +}; + +#define REGISTER_CPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ProdVirial").Device(DEVICE_CPU).TypeConstraint("T"), \ + ProdVirialOp); +REGISTER_CPU(float); +REGISTER_CPU(double); diff --git a/source/lib/paddle_src/prod_virial.cu b/source/lib/paddle_src/prod_virial.cu new file mode 100644 index 0000000000..fe7abee63b --- /dev/null +++ b/source/lib/paddle_src/prod_virial.cu @@ -0,0 +1,496 @@ +#include +#include +#include +#include "paddle/extension.h" + +#include "device.h" +#include "prod_virial.h" +#include "gpu_cuda.h" + +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") +#define CHECK_INPUT_READY(x) PD_CHECK(x.IsInitialized(), #x " must be initialized before usage.") + +template +__global__ void atom_virial_reduction(FPTYPE* virial, + const FPTYPE* atom_virial, + const int nall) { + unsigned int bid = blockIdx.x; + unsigned int tid = threadIdx.x; + __shared__ FPTYPE data[THREADS_PER_BLOCK]; + data[tid] = (FPTYPE)0.; + for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) { + data[tid] += atom_virial[ii * 9 + bid]; + } + __syncthreads(); + // do reduction in shared memory + for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) { + if (tid < ii) { + data[tid] += data[tid + ii]; + } + __syncthreads(); + } + // write result for this block to global memory + if (tid == 0) virial[bid] = data[0]; +} + +template +__global__ void virial_deriv_wrt_neighbors_a(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) { + // idx -> nloc + // idy -> nnei + // idz = dd0 * 3 + dd1 + // dd0 = idz / 3 + // dd1 = idz % 3 + const int_64 idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; + const unsigned int idz = threadIdx.y; + const int ndescrpt = nnei * 4; + if (idy >= nnei) { + return; + } + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + // atomicAdd( + // virial + idz, + // net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + // + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % + // 3]); + FPTYPE virial_tmp = (FPTYPE)0.; + for (int idw = 0; idw < 4; ++idw) { + virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * + rij[idx * nnei * 3 + idy * 3 + idz % 3] * + in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3]; + } + atomicAdd(atom_virial + j_idx * 9 + idz, virial_tmp); +} + +template +__global__ void virial_deriv_wrt_neighbors_r(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) { + // idx -> nloc + // idy -> nnei + // idz = dd0 * 3 + dd1 + // dd0 = idz / 3 + // dd1 = idz % 3 + const int_64 idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; + const unsigned int idz = threadIdx.y; + const int ndescrpt = nnei * 1; + + if (idy >= nnei) { + return; + } + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + // atomicAdd( + // virial + idz, + // net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + // + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % + // 3]); + atomicAdd(atom_virial + j_idx * 9 + idz, + net_deriv[idx * ndescrpt + idy] * + rij[idx * nnei * 3 + idy * 3 + idz % 3] * + in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]); +} + +namespace deepmd { +template +void prod_virial_a_gpu_cuda(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei) { + DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9)); + DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall)); + + const int LEN = 16; + int nblock = (nnei + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(LEN, 9); + // compute virial of a frame + virial_deriv_wrt_neighbors_a<<>>( + virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + // reduction atom_virial to virial + atom_virial_reduction<<<9, TPB>>>(virial, atom_virial, nall); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + +template +void prod_virial_r_gpu_cuda(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* in_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei) { + DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9)); + DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall)); + + const int LEN = 16; + int nblock = (nnei + LEN - 1) / LEN; + dim3 block_grid(nloc, nblock); + dim3 thread_grid(LEN, 9); + // compute virial of a frame + virial_deriv_wrt_neighbors_r<<>>( + virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); + // reduction atom_virial to virial + atom_virial_reduction<<<9, TPB>>>(virial, atom_virial, nall); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} +} // namespace deepmd + +template +void PdProdVirialSeAOpForwardCUDAKernel( + int nloc, int nall, int ndescrpt, int nnei, int nframes, + data_t* p_virial, data_t* p_atom_virial, const data_t* p_net_deriv, + const data_t* p_in_deriv, const data_t* p_rij, const int* p_nlist){ + + for(int kk = 0; kk < nframes; ++kk){ + data_t * virial = p_virial + kk * 9; + data_t * atom_virial = p_atom_virial + kk * nall * 9; + const data_t * net_deriv = p_net_deriv + kk * nloc * ndescrpt; + const data_t * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; + const data_t * rij = p_rij + kk * nloc * nnei * 3; + const int * nlist = p_nlist + kk * nloc * nnei; + deepmd::prod_virial_a_gpu_cuda( + virial, atom_virial, + net_deriv, in_deriv, rij, nlist, nloc, nall, nnei); + } +} + + +std::vector PdProdVirialSeAOpCUDAForward( +const paddle::Tensor& net_deriv_tensor, +const paddle::Tensor& in_deriv_tensor, +const paddle::Tensor& rij_tensor, +const paddle::Tensor& nlist_tensor, +const paddle::Tensor& natoms_tensor, +int n_a_sel, +int n_r_sel +){ + CHECK_INPUT(net_deriv_tensor); + CHECK_INPUT(in_deriv_tensor); + CHECK_INPUT(rij_tensor); + CHECK_INPUT(nlist_tensor); + // CHECK_INPUT(natoms_tensor); // TODO: 暂时指定python端必须为cpu,gpu的copy_to会导致返回的指针数据不对 + + CHECK_INPUT_DIM(net_deriv_tensor, 2); + CHECK_INPUT_DIM(in_deriv_tensor, 2); + CHECK_INPUT_DIM(rij_tensor, 2); + CHECK_INPUT_DIM(nlist_tensor, 2); + CHECK_INPUT_DIM(natoms_tensor, 1); + + PD_CHECK(natoms_tensor.shape()[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + const int* natoms = natoms_tensor.data(); + // printf("natoms_tensor.numel() = %d\n", natoms_tensor.numel()); + int nloc = natoms[0]; + // printf("nloc = %d\n", nloc); + int nall = natoms[1]; + // printf("nall = %d\n", nall); + int nnei = nlist_tensor.shape()[1] / nloc; + int nframes = net_deriv_tensor.shape()[0]; + int ndescrpt = net_deriv_tensor.shape()[1] / nloc; + PD_CHECK(nframes == in_deriv_tensor.shape()[0], "number of samples should match"); + PD_CHECK(nframes == rij_tensor.shape()[0], "number of samples should match"); + PD_CHECK(nframes == nlist_tensor.shape()[0],"number of samples should match"); + PD_CHECK(nloc * ndescrpt * 3 == in_deriv_tensor.shape()[1], "number of descriptors should match"); + PD_CHECK((nloc * nnei * 3) == rij_tensor.shape()[1], "dim of rij should be nnei * 3"); + + std::vector virial_shape {nframes, 9}; + std::vector atom_virial_shape {nframes, 9 * nall}; + paddle::Tensor virial_tensor = paddle::Tensor(paddle::PlaceType::kGPU, virial_shape); + paddle::Tensor atom_virial_tensor = paddle::Tensor(paddle::PlaceType::kGPU, atom_virial_shape); + + PD_DISPATCH_FLOATING_TYPES( + net_deriv_tensor.type(), "pd_prod_virial_se_a_cpu_forward_kernel", ([&] { + PdProdVirialSeAOpForwardCUDAKernel( + nloc, nall, ndescrpt, nnei, nframes, + virial_tensor.mutable_data(), atom_virial_tensor.mutable_data(), + net_deriv_tensor.data(), in_deriv_tensor.data(), + rij_tensor.data(), nlist_tensor.data()); + })); + + return {virial_tensor, atom_virial_tensor}; +} + +std::vector PdProdVirialSeAForward( + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& rij_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + if (net_deriv_tensor.is_gpu()) { + // std::cout << natoms_tensor.dtype() << std::endl; + // const int* natoms = natoms_tensor.data(); + // printf("%d\n", natoms[0]); + return PdProdVirialSeAOpCUDAForward( + net_deriv_tensor, + in_deriv_tensor, + rij_tensor, + nlist_tensor, + natoms_tensor, + n_a_sel, + n_r_sel + ); + } else { + PD_THROW("Unsupported device type for forward function of custom relu operator."); + } +} + + +/*以下是反向代码*/ + +// template +// void PdProdForceSeAOpCPUBackwardKernel( +// int nloc, int nframes, int ndescrpt, int nnei, +// const data_t* grad, const data_t* net_deriv, +// const data_t* in_deriv, const data_t* rij, const int* nlist, +// data_t* grad_net){ + +// #pragma omp parallel for +// for (int kk = 0; kk < nframes; ++kk){ + +// int grad_iter = kk * 9; +// int in_iter = kk * nloc * ndescrpt * 3; +// int rij_iter = kk * nloc * nnei * 3; +// int nlist_iter = kk * nloc * nnei; +// int grad_net_iter = kk * nloc * ndescrpt; +// // int n_a_shift = n_a_sel * 4; + +// deepmd::prod_virial_grad_a_cpu( +// &grad_net[grad_net_iter], +// &grad[grad_iter], +// &in_deriv[in_iter], +// &rij[rij_iter], +// &nlist[nlist_iter], +// nloc, +// nnei); +// } +// } + + +// template +// void PdProdForceSeAOpGPUBackwardKernel( +// int nloc, int nframes, int ndescrpt, int nnei, +// const data_t* virial_grad, const data_t* net_deriv, +// const data_t* in_deriv, const data_t* rij, const int* nlist, +// data_t* grad_net +// ) { +// for (int_64 kk = 0; kk < nframes; ++kk) { +// FPTYPE* grad_net = p_grad_net + kk * nloc * ndescrpt; +// const FPTYPE* virial_grad = p_grad + kk * 9; +// const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; +// const FPTYPE* rij = p_rij + kk * nloc * nnei * 3; +// const int* nlist = p_nlist + kk * nloc * nnei; +// if (device == "GPU") { +// deepmd::prod_virial_grad_a_gpu_cuda( +// grad_net, virial_grad, in_deriv, rij, nlist, nloc, nnei +// ); +// } +// } +// } + + +// std::vector PdProdVirialSeAOpCPUBackward( +// const paddle::Tensor& virial_grad_tensor, +// const paddle::Tensor& net_deriv_tensor, +// const paddle::Tensor& in_deriv_tensor, +// const paddle::Tensor& rij_tensor, +// const paddle::Tensor& nlist_tensor, +// const paddle::Tensor& natoms_tensor, +// int n_a_sel, +// int n_r_sel +// ){ +// // CHECK_INPUT_READY(virial_grad_tensor); +// // CHECK_INPUT_READY(net_deriv_tensor); +// // CHECK_INPUT_READY(in_deriv_tensor); +// // CHECK_INPUT_READY(rij_tensor); +// // CHECK_INPUT_READY(nlist_tensor); +// // CHECK_INPUT_READY(natoms_tensor); + +// auto grad_shape = virial_grad_tensor.shape(); +// auto net_deriv_shape = net_deriv_tensor.shape(); +// auto in_deriv_shape = in_deriv_tensor.shape(); +// auto rij_shape = rij_tensor.shape(); +// auto nlist_shape = nlist_tensor.shape(); +// auto natoms_shape = natoms_tensor.shape(); + +// CHECK_INPUT_DIM(virial_grad_tensor, 2); +// CHECK_INPUT_DIM(net_deriv_tensor, 2); +// CHECK_INPUT_DIM(in_deriv_tensor, 2); +// CHECK_INPUT_DIM(rij_tensor, 2); +// CHECK_INPUT_DIM(nlist_tensor, 2); +// CHECK_INPUT_DIM(natoms_tensor, 1); + +// PD_CHECK(natoms_shape[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + +// const int* natoms = nullptr; +// // if(natoms_tensor.place() != paddle::PlaceType::kCPU){ +// // natoms = natoms_tensor.copy_to(paddle::PlaceType::kCPU).data(); +// // }else{ +// natoms = natoms_tensor.data(); +// // } +// int nframes = net_deriv_shape[0]; +// int nloc = natoms[0]; +// int ndescrpt = net_deriv_shape[1] / nloc; +// int nnei = nlist_shape[1] / nloc; + +// PD_CHECK(nframes == grad_shape[0], "number of frames should match"); +// PD_CHECK(nframes == in_deriv_shape[0], "number of samples should match"); +// PD_CHECK(nframes == rij_shape[0], "number of frames should match"); +// PD_CHECK(nframes == nlist_shape[0],"number of samples should match"); +// PD_CHECK(9 == grad_shape[1], "input grad shape should be 3 x natoms"); +// PD_CHECK(nloc * ndescrpt * 3 == in_deriv_shape[1], "number of descriptors should match"); +// PD_CHECK(nloc * nnei * 3 == rij_shape[1], "dim of rij should be nnei * 3"); +// PD_CHECK(nnei == (n_a_sel + n_r_sel), "number of neighbors should match"); + +// std::vector grad_net_shape {nframes, nloc * ndescrpt}; +// // paddle::Tensor grad_net_tensor = paddle::Tensor(paddle::PlaceType::kCPU, grad_net_shape); +// paddle::Tensor grad_net_tensor = paddle::empty( +// grad_net_shape, +// virial_grad_tensor.dtype(), +// virial_grad_tensor.place() +// ); + +// if(virial_grad_tensor.place() == paddle::PlaceType::kCPU){ +// PD_DISPATCH_FLOATING_TYPES( +// virial_grad_tensor.type(), "pd_prod_force_se_a_cpu_backward_kernel", ([&] { +// PdProdForceSeAOpCPUBackwardKernel( +// nloc, nframes, ndescrpt, nnei, +// virial_grad_tensor.data(), +// net_deriv_tensor.data(), +// in_deriv_tensor.data(), +// rij_tensor.data(), nlist_tensor.data(), +// grad_net_tensor.mutable_data()); +// })); +// }else{ +// PD_DISPATCH_FLOATING_TYPES( +// virial_grad_tensor.type(), "pd_prod_force_se_a_cpu_backward_kernel", ([&] { +// PdProdForceSeAOpGPUBackwardKernel( +// nloc, nframes, ndescrpt, nnei, +// virial_grad_tensor.data(), +// net_deriv_tensor.data(), +// in_deriv_tensor.data(), +// rij_tensor.data(), +// nlist_tensor.data(), +// grad_net_tensor.mutable_data()); +// })); +// } +// return {grad_net_tensor}; +// } + + +// std::vector PdProdVirialSeABackward( +// const paddle::Tensor& virial_grad_tensor, +// const paddle::Tensor& net_deriv_tensor, +// const paddle::Tensor& in_deriv_tensor, +// const paddle::Tensor& rij_tensor, +// const paddle::Tensor& nlist_tensor, +// const paddle::Tensor& natoms_tensor, +// int n_a_sel, +// int n_r_sel +// ) { +// return PdProdVirialSeAOpCPUBackward( +// virial_grad_tensor, net_deriv_tensor, in_deriv_tensor, +// rij_tensor, +// nlist_tensor, natoms_tensor, n_a_sel, n_r_sel +// ); +// } + +// std::vector> PdProdVirialSeAInferShape( +// // std::vector x_shape +// std::vector net_deriv_shape, +// std::vector in_deriv_shape, +// std::vector rij_shape, +// std::vector nlist_shape, +// std::vector natoms_shape, +// int n_a_sel, +// int n_r_sel +// ) { +// auto virial_shape {nframes, 9}; +// auto atom_virial_shape {nframes, 9 * nall}; +// return {virial_shape, atom_virial_shape}; +// } + +// std::vector PdProdVirialSeAInferDtype(paddle::DataType x_dtype, ...) { +// return {x_dtype, ...}; +// } + +std::vector> PdProdVirialSeAInferShape( + std::vector net_deriv_shape, + std::vector in_deriv_shape, + std::vector rij_shape, + std::vector nlist_shape, + std::vector natoms_shape, + const int &n_a_sel, + const int &n_r_sel +) { + // int64_t nloc = /*natoms[0]*/ 192; + int64_t nall = /*natoms[1]*/ 192; + int64_t nframes = net_deriv_shape[0]; + + std::vector virial_shape = {nframes, 9}; + std::vector atom_virial_shape = {nframes, 9 * nall}; + + return {virial_shape, atom_virial_shape}; +} + +std::vector PdProdVirialSeAInferDtype( + paddle::DataType net_deriv_dtype, + paddle::DataType in_deriv_dtype, + paddle::DataType rij_dtype, + paddle::DataType nlist_dtype, + paddle::DataType natoms_dtype +) { + return {net_deriv_dtype, net_deriv_dtype}; +} + + +PD_BUILD_OP(prod_virial_se_a) + .Inputs({"net_deriv", "in_deriv", "rij", "nlist", "natoms"}) + .Outputs({"virial", "atom_virial"}) + .Attrs({"n_a_sel: int", "n_r_sel: int"}) + .SetKernelFn(PD_KERNEL(PdProdVirialSeAForward)) + .SetInferShapeFn(PD_INFER_SHAPE(PdProdVirialSeAInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(PdProdVirialSeAInferDtype)); + + +// PD_BUILD_GRAD_OP(prod_virial_se_a) +// .Inputs({paddle::Grad("virial"), "net_deriv", "in_deriv", "rij", "nlist", "natoms"}) +// .Outputs({paddle::Grad("net_deriv")}) +// .Attrs({"n_a_sel: int", "n_r_sel: int"}) +// .SetKernelFn(PD_KERNEL(PdProdVirialSeABackward)); + + diff --git a/source/lib/paddle_src/prod_virial.h b/source/lib/paddle_src/prod_virial.h new file mode 100644 index 0000000000..c51e333a47 --- /dev/null +++ b/source/lib/paddle_src/prod_virial.h @@ -0,0 +1,75 @@ +#pragma once + +namespace deepmd { + +template +void prod_virial_a_cpu(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); + +template +void prod_virial_r_cpu(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); + +#if GOOGLE_CUDA +template +void prod_virial_a_gpu_cuda(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); + +template +void prod_virial_r_gpu_cuda(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +#endif // GOOGLE_CUDA + +#if TENSORFLOW_USE_ROCM +template +void prod_virial_a_gpu_rocm(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); + +template +void prod_virial_r_gpu_rocm(FPTYPE* virial, + FPTYPE* atom_virial, + const FPTYPE* net_deriv, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nall, + const int nnei); +#endif // TENSORFLOW_USE_ROCM + +} // namespace deepmd diff --git a/source/lib/paddle_src/prod_virial_grad.cc b/source/lib/paddle_src/prod_virial_grad.cc new file mode 100644 index 0000000000..14ba158cc1 --- /dev/null +++ b/source/lib/paddle_src/prod_virial_grad.cc @@ -0,0 +1,138 @@ +#include "prod_virial_grad.h" + +#include +#include + +#include "errors.h" + +inline void make_index_range(int& idx_start, + int& idx_end, + const int& nei_idx, + const int& nnei) { + if (nei_idx < nnei) { + idx_start = nei_idx * 4; + idx_end = nei_idx * 4 + 4; + } else { + throw deepmd::deepmd_exception("should no reach here"); + } +} + +template +void deepmd::prod_virial_grad_a_cpu(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) { + const int ndescrpt = nnei * 4; + + // reset the frame to 0 + for (int ii = 0; ii < nloc; ++ii) { + for (int aa = 0; aa < ndescrpt; ++aa) { + grad_net[ii * ndescrpt + aa] = 0; + } + } + +// compute grad of one frame +#pragma omp parallel for + for (int ii = 0; ii < nloc; ++ii) { + int i_idx = ii; + + // loop over neighbors + for (int jj = 0; jj < nnei; ++jj) { + int j_idx = nlist[i_idx * nnei + jj]; + if (j_idx < 0) continue; + int aa_start, aa_end; + make_index_range(aa_start, aa_end, jj, nnei); + for (int aa = aa_start; aa < aa_end; ++aa) { + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + grad_net[i_idx * ndescrpt + aa] -= + -1.0 * grad[dd0 * 3 + dd1] * + rij[i_idx * nnei * 3 + jj * 3 + dd1] * + env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0]; + } + } + } + } + } +} + +template void deepmd::prod_virial_grad_a_cpu(double* grad_net, + const double* grad, + const double* env_deriv, + const double* rij, + const int* nlist, + const int nloc, + const int nnei); + +template void deepmd::prod_virial_grad_a_cpu(float* grad_net, + const float* grad, + const float* env_deriv, + const float* rij, + const int* nlist, + const int nloc, + const int nnei); + +template +void deepmd::prod_virial_grad_r_cpu(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) +// +// grad_net: nloc x ndescrpt +// grad: 9 +// env_deriv: nloc x ndescrpt x 3 +// rij: nloc x nnei x 3 +// nlist: nloc x nnei +// +{ + const int ndescrpt = nnei * 1; + + // reset the frame to 0 + for (int ii = 0; ii < nloc; ++ii) { + for (int aa = 0; aa < ndescrpt; ++aa) { + grad_net[ii * ndescrpt + aa] = 0; + } + } + +// compute grad of one frame +#pragma omp parallel for + for (int ii = 0; ii < nloc; ++ii) { + int i_idx = ii; + + // loop over neighbors + for (int jj = 0; jj < nnei; ++jj) { + int j_idx = nlist[i_idx * nnei + jj]; + if (j_idx < 0) continue; + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + grad_net[i_idx * ndescrpt + jj] -= + -1.0 * grad[dd0 * 3 + dd1] * + rij[i_idx * nnei * 3 + jj * 3 + dd1] * + env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0]; + } + } + } + } +} + +template void deepmd::prod_virial_grad_r_cpu(double* grad_net, + const double* grad, + const double* env_deriv, + const double* rij, + const int* nlist, + const int nloc, + const int nnei); + +template void deepmd::prod_virial_grad_r_cpu(float* grad_net, + const float* grad, + const float* env_deriv, + const float* rij, + const int* nlist, + const int nloc, + const int nnei); diff --git a/source/lib/paddle_src/prod_virial_grad.cu b/source/lib/paddle_src/prod_virial_grad.cu new file mode 100644 index 0000000000..6205c4bdf8 --- /dev/null +++ b/source/lib/paddle_src/prod_virial_grad.cu @@ -0,0 +1,536 @@ +#include +#include +#include +#include "paddle/extension.h" + +#include "device.h" +#include "prod_virial.h" +#include "gpu_cuda.h" + +#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.") +#define CHECK_INPUT_DIM(x, value) PD_CHECK(x.shape().size() == value, #x "'s dim should be " #value ".") +#define CHECK_INPUT_READY(x) PD_CHECK(x.IsInitialized(), #x " must be initialized before usage.") + +// template +// __global__ void atom_virial_reduction(FPTYPE* virial, +// const FPTYPE* atom_virial, +// const int nall) { +// unsigned int bid = blockIdx.x; +// unsigned int tid = threadIdx.x; +// __shared__ FPTYPE data[THREADS_PER_BLOCK]; +// data[tid] = (FPTYPE)0.; +// for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) { +// data[tid] += atom_virial[ii * 9 + bid]; +// } +// __syncthreads(); +// // do reduction in shared memory +// for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) { +// if (tid < ii) { +// data[tid] += data[tid + ii]; +// } +// __syncthreads(); +// } +// // write result for this block to global memory +// if (tid == 0) virial[bid] = data[0]; +// } + +// template +// __global__ void virial_deriv_wrt_neighbors_a(FPTYPE* virial, +// FPTYPE* atom_virial, +// const FPTYPE* net_deriv, +// const FPTYPE* in_deriv, +// const FPTYPE* rij, +// const int* nlist, +// const int nloc, +// const int nnei) { +// // idx -> nloc +// // idy -> nnei +// // idz = dd0 * 3 + dd1 +// // dd0 = idz / 3 +// // dd1 = idz % 3 +// const int_64 idx = blockIdx.x; +// const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; +// const unsigned int idz = threadIdx.y; +// const int ndescrpt = nnei * 4; +// if (idy >= nnei) { +// return; +// } +// int j_idx = nlist[idx * nnei + idy]; +// if (j_idx < 0) { +// return; +// } +// // atomicAdd( +// // virial + idz, +// // net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 +// // + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % +// // 3]); +// FPTYPE virial_tmp = (FPTYPE)0.; +// for (int idw = 0; idw < 4; ++idw) { +// virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * +// rij[idx * nnei * 3 + idy * 3 + idz % 3] * +// in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3]; +// } +// atomicAdd(atom_virial + j_idx * 9 + idz, virial_tmp); +// } + +// template +// __global__ void virial_deriv_wrt_neighbors_r(FPTYPE* virial, +// FPTYPE* atom_virial, +// const FPTYPE* net_deriv, +// const FPTYPE* in_deriv, +// const FPTYPE* rij, +// const int* nlist, +// const int nloc, +// const int nnei) { +// // idx -> nloc +// // idy -> nnei +// // idz = dd0 * 3 + dd1 +// // dd0 = idz / 3 +// // dd1 = idz % 3 +// const int_64 idx = blockIdx.x; +// const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; +// const unsigned int idz = threadIdx.y; +// const int ndescrpt = nnei * 1; + +// if (idy >= nnei) { +// return; +// } +// int j_idx = nlist[idx * nnei + idy]; +// if (j_idx < 0) { +// return; +// } +// // atomicAdd( +// // virial + idz, +// // net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 +// // + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % +// // 3]); +// atomicAdd(atom_virial + j_idx * 9 + idz, +// net_deriv[idx * ndescrpt + idy] * +// rij[idx * nnei * 3 + idy * 3 + idz % 3] * +// in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]); +// } + +// namespace deepmd { +// template +// void prod_virial_a_gpu_cuda(FPTYPE* virial, +// FPTYPE* atom_virial, +// const FPTYPE* net_deriv, +// const FPTYPE* in_deriv, +// const FPTYPE* rij, +// const int* nlist, +// const int nloc, +// const int nall, +// const int nnei) { +// DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9)); +// DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall)); + +// const int LEN = 16; +// int nblock = (nnei + LEN - 1) / LEN; +// dim3 block_grid(nloc, nblock); +// dim3 thread_grid(LEN, 9); +// // compute virial of a frame +// virial_deriv_wrt_neighbors_a<<>>( +// virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei); +// DPErrcheck(cudaGetLastError()); +// DPErrcheck(cudaDeviceSynchronize()); +// // reduction atom_virial to virial +// atom_virial_reduction<<<9, TPB>>>(virial, atom_virial, nall); +// DPErrcheck(cudaGetLastError()); +// DPErrcheck(cudaDeviceSynchronize()); +// } + +// template +// void prod_virial_r_gpu_cuda(FPTYPE* virial, +// FPTYPE* atom_virial, +// const FPTYPE* net_deriv, +// const FPTYPE* in_deriv, +// const FPTYPE* rij, +// const int* nlist, +// const int nloc, +// const int nall, +// const int nnei) { +// DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9)); +// DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall)); + +// const int LEN = 16; +// int nblock = (nnei + LEN - 1) / LEN; +// dim3 block_grid(nloc, nblock); +// dim3 thread_grid(LEN, 9); +// // compute virial of a frame +// virial_deriv_wrt_neighbors_r<<>>( +// virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei); +// DPErrcheck(cudaGetLastError()); +// DPErrcheck(cudaDeviceSynchronize()); +// // reduction atom_virial to virial +// atom_virial_reduction<<<9, TPB>>>(virial, atom_virial, nall); +// DPErrcheck(cudaGetLastError()); +// DPErrcheck(cudaDeviceSynchronize()); +// } +// } // namespace deepmd + +// template +// void PdProdVirialSeAOpForwardCUDAKernel( +// int nloc, int nall, int ndescrpt, int nnei, int nframes, +// data_t* p_virial, data_t* p_atom_virial, const data_t* p_net_deriv, +// const data_t* p_in_deriv, const data_t* p_rij, const int* p_nlist){ + +// for(int kk = 0; kk < nframes; ++kk){ +// data_t * virial = p_virial + kk * 9; +// data_t * atom_virial = p_atom_virial + kk * nall * 9; +// const data_t * net_deriv = p_net_deriv + kk * nloc * ndescrpt; +// const data_t * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; +// const data_t * rij = p_rij + kk * nloc * nnei * 3; +// const int * nlist = p_nlist + kk * nloc * nnei; +// deepmd::prod_virial_a_gpu_cuda( +// virial, atom_virial, +// net_deriv, in_deriv, rij, nlist, nloc, nall, nnei); +// } +// } + +// std::vector PdProdVirialSeAOpCUDAForward( +// const paddle::Tensor& net_deriv_tensor, +// const paddle::Tensor& in_deriv_tensor, +// const paddle::Tensor& rij_tensor, +// const paddle::Tensor& nlist_tensor, +// const paddle::Tensor& natoms_tensor, +// int n_a_sel, +// int n_r_sel +// ){ +// CHECK_INPUT(net_deriv_tensor); +// CHECK_INPUT(in_deriv_tensor); +// CHECK_INPUT(rij_tensor); +// CHECK_INPUT(nlist_tensor); +// // CHECK_INPUT(natoms_tensor); // TODO: 暂时指定python端必须为cpu,gpu的copy_to会导致返回的指针数据不对 + +// CHECK_INPUT_DIM(net_deriv_tensor, 2); +// CHECK_INPUT_DIM(in_deriv_tensor, 2); +// CHECK_INPUT_DIM(rij_tensor, 2); +// CHECK_INPUT_DIM(nlist_tensor, 2); +// CHECK_INPUT_DIM(natoms_tensor, 1); + +// PD_CHECK(natoms_tensor.shape()[0] >= 3, "number of atoms should be larger than (or equal to) 3"); +// const int* natoms = natoms_tensor.data(); +// // printf("natoms_tensor.numel() = %d\n", natoms_tensor.numel()); +// int nloc = natoms[0]; +// // printf("nloc = %d\n", nloc); +// int nall = natoms[1]; +// // printf("nall = %d\n", nall); +// int nnei = nlist_tensor.shape()[1] / nloc; +// int nframes = net_deriv_tensor.shape()[0]; +// int ndescrpt = net_deriv_tensor.shape()[1] / nloc; +// PD_CHECK(nframes == in_deriv_tensor.shape()[0], "number of samples should match"); +// PD_CHECK(nframes == rij_tensor.shape()[0], "number of samples should match"); +// PD_CHECK(nframes == nlist_tensor.shape()[0],"number of samples should match"); +// PD_CHECK(nloc * ndescrpt * 3 == in_deriv_tensor.shape()[1], "number of descriptors should match"); +// PD_CHECK((nloc * nnei * 3) == rij_tensor.shape()[1], "dim of rij should be nnei * 3"); + +// std::vector virial_shape {nframes, 9}; +// std::vector atom_virial_shape {nframes, 9 * nall}; +// paddle::Tensor virial_tensor = paddle::Tensor(paddle::PlaceType::kGPU, virial_shape); +// paddle::Tensor atom_virial_tensor = paddle::Tensor(paddle::PlaceType::kGPU, atom_virial_shape); + +// PD_DISPATCH_FLOATING_TYPES( +// net_deriv_tensor.type(), "pd_prod_virial_se_a_cpu_forward_kernel", ([&] { +// PdProdVirialSeAOpForwardCUDAKernel( +// nloc, nall, ndescrpt, nnei, nframes, +// virial_tensor.mutable_data(), atom_virial_tensor.mutable_data(), +// net_deriv_tensor.data(), in_deriv_tensor.data(), +// rij_tensor.data(), nlist_tensor.data()); +// })); + +// return {virial_tensor, atom_virial_tensor}; +// } + +// std::vector PdProdVirialSeAForward( +// const paddle::Tensor& net_deriv_tensor, +// const paddle::Tensor& in_deriv_tensor, +// const paddle::Tensor& rij_tensor, +// const paddle::Tensor& nlist_tensor, +// const paddle::Tensor& natoms_tensor, +// int n_a_sel, +// int n_r_sel +// ) { +// if (net_deriv_tensor.is_gpu()) { +// // std::cout << natoms_tensor.dtype() << std::endl; +// // const int* natoms = natoms_tensor.data(); +// // printf("%d\n", natoms[0]); +// return PdProdVirialSeAOpCUDAForward( +// net_deriv_tensor, +// in_deriv_tensor, +// rij_tensor, +// nlist_tensor, +// natoms_tensor, +// n_a_sel, +// n_r_sel +// ); +// } else { +// PD_THROW("Unsupported device type for forward function of custom relu operator."); +// } +// } + + +/*以下是反向代码*/ + +// template +// void PdProdForceSeAOpCPUBackwardKernel( +// int nloc, int nframes, int ndescrpt, int nnei, +// const data_t* grad, const data_t* net_deriv, +// const data_t* in_deriv, const data_t* rij, const int* nlist, +// data_t* grad_net){ + +// #pragma omp parallel for +// for (int kk = 0; kk < nframes; ++kk){ + +// int grad_iter = kk * 9; +// int in_iter = kk * nloc * ndescrpt * 3; +// int rij_iter = kk * nloc * nnei * 3; +// int nlist_iter = kk * nloc * nnei; +// int grad_net_iter = kk * nloc * ndescrpt; +// // int n_a_shift = n_a_sel * 4; + +// deepmd::prod_virial_grad_a_cpu( +// &grad_net[grad_net_iter], +// &grad[grad_iter], +// &in_deriv[in_iter], +// &rij[rij_iter], +// &nlist[nlist_iter], +// nloc, +// nnei); +// } +// } + + +template +__device__ inline FPTYPE dev_dot9(const FPTYPE* arr1, const FPTYPE* arr2) { + FPTYPE result = (FPTYPE)0.0; + for (int ii = 0; ii < 9; ii++) { + result += arr1[ii] * arr2[ii]; + } + return result; +} + + +template +__global__ void virial_grad_wrt_neighbors_a(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) { + // idy -> nnei + const unsigned int tid = threadIdx.x; + const int_64 idx = blockIdx.x * blockDim.x + tid; + const unsigned int idy = blockIdx.y; + const unsigned int idw = threadIdx.y; + const int ndescrpt = nnei * 4; + __shared__ FPTYPE grad_one[9]; + if (tid < 9) { + grad_one[tid] = grad[tid]; + } + __syncthreads(); + if (idx >= nloc) { + return; + } + int j_idx = nlist[idx * nnei + idy]; + if (j_idx < 0) { + return; + } + FPTYPE tmp[9]; + for (int dd0 = 0; dd0 < 3; ++dd0) { + for (int dd1 = 0; dd1 < 3; ++dd1) { + tmp[dd0 * 3 + dd1] = + rij[idx * nnei * 3 + idy * 3 + dd1] * + env_deriv[idx * ndescrpt * 3 + idy * 4 * 3 + idw * 3 + dd0]; + } + } + grad_net[idx * ndescrpt + idy * 4 + idw] -= + (FPTYPE)-1.0 * dev_dot9(grad_one, tmp); +} + + +namespace deepmd { +template +void prod_virial_grad_a_gpu_cuda(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei) { + const int ndescrpt = nnei * 4; + DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt)); + const int LEN = 128; + const int nblock = (nloc + LEN - 1) / LEN; + dim3 block_grid(nblock, nnei); + dim3 thread_grid(LEN, 4); + virial_grad_wrt_neighbors_a<<>>( + grad_net, grad, env_deriv, rij, nlist, nloc, nnei); + DPErrcheck(cudaGetLastError()); + DPErrcheck(cudaDeviceSynchronize()); +} + + +template void prod_virial_grad_a_gpu_cuda(float* grad_net, + const float* grad, + const float* env_deriv, + const float* rij, + const int* nlist, + const int nloc, + const int nnei); +template void prod_virial_grad_a_gpu_cuda(double* grad_net, + const double* grad, + const double* env_deriv, + const double* rij, + const int* nlist, + const int nloc, + const int nnei); +} // namespace deepmd + + +template +void PdProdForceSeAOpGPUBackwardKernel( + int nloc, int nframes, int ndescrpt, int nnei, + const data_t* virial_grad, const data_t* net_deriv, + const data_t* in_deriv, const data_t* rij, const int* nlist, + data_t* grad_net +) { + data_t* p_grad_net = grad_net; + const data_t* p_grad = virial_grad; + const data_t* p_in_deriv = in_deriv; + const data_t* p_rij = rij; + const int* p_nlist = nlist; + for (int_64 kk = 0; kk < nframes; ++kk) { + data_t* grad_net = p_grad_net + kk * nloc * ndescrpt; + const data_t* virial_grad = p_grad + kk * 9; + const data_t* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3; + const data_t* rij = p_rij + kk * nloc * nnei * 3; + const int* nlist = p_nlist + kk * nloc * nnei; + // if (device == "GPU") { + deepmd::prod_virial_grad_a_gpu_cuda( + grad_net, virial_grad, in_deriv, rij, nlist, nloc, nnei + ); + // } + } +} + + +std::vector PdProdVirialSeAOpCUDABackward( + const paddle::Tensor& virial_grad_tensor, + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& rij_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +){ + // CHECK_INPUT_READY(virial_grad_tensor); + // CHECK_INPUT_READY(net_deriv_tensor); + // CHECK_INPUT_READY(in_deriv_tensor); + // CHECK_INPUT_READY(rij_tensor); + // CHECK_INPUT_READY(nlist_tensor); + // CHECK_INPUT_READY(natoms_tensor); + + auto grad_shape = virial_grad_tensor.shape(); + auto net_deriv_shape = net_deriv_tensor.shape(); + auto in_deriv_shape = in_deriv_tensor.shape(); + auto rij_shape = rij_tensor.shape(); + auto nlist_shape = nlist_tensor.shape(); + auto natoms_shape = natoms_tensor.shape(); + + CHECK_INPUT_DIM(virial_grad_tensor, 2); + CHECK_INPUT_DIM(net_deriv_tensor, 2); + CHECK_INPUT_DIM(in_deriv_tensor, 2); + CHECK_INPUT_DIM(rij_tensor, 2); + CHECK_INPUT_DIM(nlist_tensor, 2); + CHECK_INPUT_DIM(natoms_tensor, 1); + + PD_CHECK(natoms_shape[0] >= 3, "number of atoms should be larger than (or equal to) 3"); + + const int* natoms = nullptr; + // if(natoms_tensor.place() != paddle::PlaceType::kCPU){ + // natoms = natoms_tensor.copy_to(paddle::PlaceType::kCPU).data(); + // }else{ + natoms = natoms_tensor.data(); + // } + int nframes = net_deriv_shape[0]; + int nloc = natoms[0]; + int ndescrpt = net_deriv_shape[1] / nloc; + int nnei = nlist_shape[1] / nloc; + + PD_CHECK(nframes == grad_shape[0], "number of frames should match"); + PD_CHECK(nframes == in_deriv_shape[0], "number of samples should match"); + PD_CHECK(nframes == rij_shape[0], "number of frames should match"); + PD_CHECK(nframes == nlist_shape[0],"number of samples should match"); + PD_CHECK(9 == grad_shape[1], "input grad shape should be 3 x natoms"); + PD_CHECK(nloc * ndescrpt * 3 == in_deriv_shape[1], "number of descriptors should match"); + PD_CHECK(nloc * nnei * 3 == rij_shape[1], "dim of rij should be nnei * 3"); + PD_CHECK(nnei == (n_a_sel + n_r_sel), "number of neighbors should match"); + + std::vector grad_net_shape {nframes, nloc * ndescrpt}; + // paddle::Tensor grad_net_tensor = paddle::Tensor(paddle::PlaceType::kCPU, grad_net_shape); + paddle::Tensor grad_net_tensor = paddle::empty( + grad_net_shape, + virial_grad_tensor.dtype(), + virial_grad_tensor.place() + ); + + // if(virial_grad_tensor.place() == paddle::PlaceType::kCPU){ + // PD_DISPATCH_FLOATING_TYPES( + // virial_grad_tensor.type(), "pd_prod_force_se_a_cpu_backward_kernel", ([&] { + // PdProdForceSeAOpCPUBackwardKernel( + // nloc, nframes, ndescrpt, nnei, + // virial_grad_tensor.data(), + // net_deriv_tensor.data(), + // in_deriv_tensor.data(), + // rij_tensor.data(), nlist_tensor.data(), + // grad_net_tensor.mutable_data()); + // })); + // }else{ + PD_DISPATCH_FLOATING_TYPES( + virial_grad_tensor.type(), "pd_prod_force_se_a_cuda_backward_kernel", ([&] { + PdProdForceSeAOpGPUBackwardKernel( + nloc, nframes, ndescrpt, nnei, + virial_grad_tensor.data(), + net_deriv_tensor.data(), + in_deriv_tensor.data(), + rij_tensor.data(), + nlist_tensor.data(), + grad_net_tensor.mutable_data()); + })); + // } + return {grad_net_tensor}; +} + + +std::vector PdProdVirialSeABackward( + const paddle::Tensor& virial_grad_tensor, + const paddle::Tensor& net_deriv_tensor, + const paddle::Tensor& in_deriv_tensor, + const paddle::Tensor& rij_tensor, + const paddle::Tensor& nlist_tensor, + const paddle::Tensor& natoms_tensor, + int n_a_sel, + int n_r_sel +) { + return PdProdVirialSeAOpCUDABackward( + virial_grad_tensor, net_deriv_tensor, in_deriv_tensor, + rij_tensor, + nlist_tensor, natoms_tensor, n_a_sel, n_r_sel + ); +} + + +// PD_BUILD_OP(prod_virial_se_a) +// .Inputs({"net_deriv", "in_deriv", "rij", "nlist", "natoms"}) +// .Outputs({"virial", "atom_virial"}) +// .Attrs({"n_a_sel: int", "n_r_sel: int"}) +// .SetKernelFn(PD_KERNEL(PdProdVirialSeAForward)); + + +PD_BUILD_GRAD_OP(prod_virial_se_a) + .Inputs({paddle::Grad("virial"), "net_deriv", "in_deriv", "rij", "nlist", "natoms"}) + .Outputs({paddle::Grad("net_deriv")}) + .Attrs({"n_a_sel: int", "n_r_sel: int"}) + .SetKernelFn(PD_KERNEL(PdProdVirialSeABackward)); diff --git a/source/lib/paddle_src/prod_virial_grad.h b/source/lib/paddle_src/prod_virial_grad.h new file mode 100644 index 0000000000..0e2cc46baa --- /dev/null +++ b/source/lib/paddle_src/prod_virial_grad.h @@ -0,0 +1,63 @@ +#pragma once + +namespace deepmd { + +template +void prod_virial_grad_a_cpu(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); + +template +void prod_virial_grad_r_cpu(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); + +#if GOOGLE_CUDA +template +void prod_virial_grad_a_gpu_cuda(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); + +template +void prod_virial_grad_r_gpu_cuda(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); +#endif // GOOGLE_CUDA + +#if TENSORFLOW_USE_ROCM +template +void prod_virial_grad_a_gpu_rocm(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); + +template +void prod_virial_grad_r_gpu_rocm(FPTYPE* grad_net, + const FPTYPE* grad, + const FPTYPE* env_deriv, + const FPTYPE* rij, + const int* nlist, + const int nloc, + const int nnei); +#endif // TENSORFLOW_USE_ROCM + +} // namespace deepmd diff --git a/source/lib/paddle_src/setup_ins.py b/source/lib/paddle_src/setup_ins.py new file mode 100644 index 0000000000..f510bd62f4 --- /dev/null +++ b/source/lib/paddle_src/setup_ins.py @@ -0,0 +1,54 @@ +from paddle.utils import cpp_extension + +cpp_extension.setup( + name="paddle_deepmd_lib", + ext_modules=cpp_extension.CppExtension( + sources=[ + "../src/coord.cc", + "../src/env_mat_nvnmd.cc", + "../src/env_mat.cc", + "../src/ewald.cc", + "../src/fmt_nlist.cc", + "../src/gelu.cc", + "../src/map_aparam.cc", + "../src/neighbor_list.cc", + "../src/pair_tab.cc", + "../src/prod_env_mat_nvnmd.cc", + "../src/prod_env_mat.cc", + # "../src/prod_force_grad.cc", + # "../src/prod_force.cc", + # "../src/prod_virial_grad.cc", + # "../src/prod_virial.cc", + "../src/region.cc", + "../src/SimulationRegion.cpp", + "../src/soft_min_switch_force_grad.cc", + "../src/soft_min_switch_force.cc", + "../src/soft_min_switch_virial_grad.cc", + "../src/soft_min_switch_virial.cc", + "../src/soft_min_switch.cc", + "../src/tabulate.cc", + "../src/utilities.cc", + "../src/cuda/coord.cu", + "../src/cuda/gelu.cu", + "../src/cuda/neighbor_list.cu", + # "../src/cuda/prod_force_grad.cu", + # "../src/cuda/prod_force.cu", + # "../src/cuda/prod_virial_grad.cu", + # "../src/cuda/prod_virial.cu", + "../src/cuda/region.cu", + "../src/cuda/tabulate.cu", + "./prod_env_mat.cu", + "./prod_virial_grad.cu", + "./prod_virial_grad.cc", + "./prod_virial.cu", + "./prod_force.cu", + # "./prod_force_grad.cc", + "./prod_force_grad.cu", + "./neighbor_stat.cu", + ], + include_dirs=[ + "/workspace/hesensen/deepmd_backend/deepmd-kit-tf/source/lib/include" + ], + library_dirs=["/usr/local/cuda-11/lib64"], + ), +)