diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index 377d492a4e..f8408ad83c 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -38,8 +38,6 @@ ) -# @Descriptor.register("se_e2_a") -# @Descriptor.register("se_a") class DescrptSeA(paddle.nn.Layer): r"""DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input. @@ -149,14 +147,14 @@ def __init__( raise RuntimeError( f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!" ) - self.sel_a = sel # [46(O), 92(H)] / [60, 60] - self.rcut_r = rcut # 6.0 + self.sel_a = sel + self.rcut_r = rcut # NOTE: register 'rcut' in buffer to be accessed in inference self.register_buffer("buffer_rcut", paddle.to_tensor(rcut, dtype="float64")) - self.rcut_r_smth = rcut_smth # 0.5 - self.filter_neuron = neuron # [25, 50, 100] - self.n_axis_neuron = axis_neuron # 16 - self.filter_resnet_dt = resnet_dt # False + self.rcut_r_smth = rcut_smth + self.filter_neuron = neuron + self.n_axis_neuron = axis_neuron + self.filter_resnet_dt = resnet_dt self.seed = seed self.uniform_seed = uniform_seed self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron) @@ -169,8 +167,8 @@ def __init__( assert len(tt) == 2 self.exclude_types.add((tt[0], tt[1])) self.exclude_types.add((tt[1], tt[0])) - self.set_davg_zero = set_davg_zero # False - # self.type_one_side = type_one_side # False + self.set_davg_zero = set_davg_zero + # self.type_one_side = type_one_side self.type_one_side = False self.spin = spin # None @@ -181,10 +179,14 @@ def __init__( self.sel_a.extend(self.sel_a_spin) else: self.ntypes_spin = 0 + # NOTE: register 'ntypes_spin' in buffer to be accessed in inference + self.register_buffer( + "buffer_ntypes_spin", paddle.to_tensor(self.ntypes_spin, dtype="int32") + ) # descrpt config - self.sel_r = [0 for ii in range(len(self.sel_a))] # [0, 0] - self.ntypes = len(self.sel_a) # 2 + self.sel_r = [0 for ii in range(len(self.sel_a))] + self.ntypes = len(self.sel_a) # NOTE: register 'ntypes' in buffer to be accessed in inference self.register_buffer( "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int32") @@ -208,10 +210,10 @@ def __init__( # self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) self.avg_zero = paddle.zeros( [self.ntypes, self.ndescrpt], dtype=GLOBAL_PD_FLOAT_PRECISION - ) # [2, 552] + ) self.std_ones = paddle.ones( [self.ntypes, self.ndescrpt], dtype=GLOBAL_PD_FLOAT_PRECISION - ) # [2, 552] + ) nets = [] # self._pass_filter => self._filter => self._filter_lower for type_input in range(self.ntypes): @@ -235,46 +237,8 @@ def __init__( self.compress = False self.embedding_net_variables = None self.mixed_prec = None - # self.place_holders = {} self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) # like a mask - # avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype( - # GLOBAL_NP_FLOAT_PRECISION - # ) - # std_ones = np.ones([self.ntypes, self.ndescrpt]).astype( - # GLOBAL_NP_FLOAT_PRECISION - # ) - # sub_graph = tf.Graph() - # with sub_graph.as_default(): - # name_pfx = "d_sea_" - # for ii in ["coord", "box"]: - # self.place_holders[ii] = tf.placeholder( - # GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii - # ) - # self.place_holders["type"] = tf.placeholder( - # tf.int32, [None, None], name=name_pfx + "t_type" - # ) - # self.place_holders["natoms_vec"] = tf.placeholder( - # tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms" - # ) - # self.place_holders["default_mesh"] = tf.placeholder( - # tf.int32, [None], name=name_pfx + "t_mesh" - # ) - # self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( - # self.place_holders["coord"], - # self.place_holders["type"], - # self.place_holders["natoms_vec"], - # self.place_holders["box"], - # self.place_holders["default_mesh"], - # self.avg_zero, - # self.std_ones, - # rcut_a=self.rcut_a, - # rcut_r=self.rcut_r, - # rcut_r_smth=self.rcut_r_smth, - # sel_a=self.sel_a, - # sel_r=self.sel_r, - # ) - # self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) self.original_sel = None self.multi_task = multi_task if multi_task: @@ -289,9 +253,6 @@ def __init__( self.t_rcut = paddle.to_tensor( np.max([self.rcut_r, self.rcut_a]), dtype=GLOBAL_PD_FLOAT_PRECISION ) - # self.t_ntypes = paddle.to_tensor(self.ntypes, dtype="int32") - # self.t_ndescrpt = paddle.to_tensor(self.ndescrpt, dtype="int32") - # self.t_sel = paddle.to_tensor(self.sel_a, dtype="int32") self.register_buffer("buffer_sel", paddle.to_tensor(self.sel_a, dtype="int32")) self.register_buffer( "buffer_ndescrpt", paddle.to_tensor(self.ndescrpt, dtype="int32") @@ -587,47 +548,15 @@ def forward( """ davg = self.davg dstd = self.dstd - # if nvnmd_cfg.enable: - # if nvnmd_cfg.restore_descriptor: - # davg, dstd = build_davg_dstd() - # check_switch_range(davg, dstd) - # with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse): if davg is None: davg = np.zeros([self.ntypes, self.ndescrpt]) if dstd is None: dstd = np.ones([self.ntypes, self.ndescrpt]) - # t_rcut = tf.constant( - # np.max([self.rcut_r, self.rcut_a]), - # name="rcut", - # dtype=GLOBAL_TF_FLOAT_PRECISION, - # ) - # t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32) - # t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32) - # t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32) - # t_original_sel = paddle.to_tensor( - # self.original_sel if self.original_sel is not None else self.sel_a, - # ) - # self.t_avg = tf.get_variable( - # "t_avg", - # davg.shape, - # dtype=GLOBAL_TF_FLOAT_PRECISION, - # trainable=False, - # initializer=tf.constant_initializer(davg), - # ) - # self.t_std = tf.get_variable( - # "t_std", - # dstd.shape, - # dtype=GLOBAL_TF_FLOAT_PRECISION, - # trainable=False, - # initializer=tf.constant_initializer(dstd), - # ) coord = paddle.reshape(coord_, [-1, natoms[1] * 3]) box = paddle.reshape(box_, [-1, 9]) atype = paddle.reshape(atype_, [-1, natoms[1]]) - # op_descriptor = ( - # build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a - # ) + ( self.descrpt, self.descrpt_deriv, @@ -648,13 +577,8 @@ def forward( sel_r=self.sel_r, ) # only used when tensorboard was set as true - # tf.summary.histogram("descrpt", self.descrpt) - # tf.summary.histogram("rij", self.rij) - # tf.summary.histogram("nlist", self.nlist) self.descrpt_reshape = paddle.reshape(self.descrpt, [-1, self.ndescrpt]) - # [1, 105984] --> [192, 552] self.descrpt_reshape.stop_gradient = False - # self._identity_tensors(suffix=suffix) self.dout, self.qmat = self._pass_filter( self.descrpt_reshape, atype, @@ -665,8 +589,6 @@ def forward( trainable=self.trainable, ) # [1, all_atom, M1*M2], output_qmat: [1, all_atom, M1*3] - # only used when tensorboard was set as true - # tf.summary.histogram("embedding_net_output", self.dout) return self.dout def get_rot_mat(self) -> paddle.Tensor: @@ -720,9 +642,6 @@ def prod_force_virial( n_a_sel=self.nnei_a, n_r_sel=self.nnei_r, ) - # tf.summary.histogram("force", force) - # tf.summary.histogram("virial", virial) - # tf.summary.histogram("atom_virial", atom_virial) return force, virial, atom_virial @@ -752,7 +671,6 @@ def _pass_filter( ------- Tuple[Tensor, Tensor]: output: [1, all_atom, M1*M2], output_qmat: [1, all_atom, M1*3] """ - # natoms = [192, 192, 64 , 128] if input_dict is not None: type_embedding = input_dict.get("type_embedding", None) else: @@ -799,7 +717,7 @@ def _pass_filter( output_qmat.append(qmat) start_index += natoms[2 + type_i].item() else: - ... + raise NotImplementedError() # This branch will not be excecuted at current # inputs_i = inputs # inputs_i = paddle.reshape(inputs_i, [-1, self.ndescrpt]) @@ -841,8 +759,6 @@ def _pass_filter( # output_qmat.append(qmat) output = paddle.concat(output, axis=1) output_qmat = paddle.concat(output_qmat, axis=1) - # output: [1, 192, M1*M2] - # output_qmat: [1, 192, M1*3] return output, output_qmat def _compute_dstats_sys_smth( @@ -856,13 +772,13 @@ def _compute_dstats_sys_smth( input_dict["default_mesh"] = paddle.to_tensor(mesh, "int32") self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( - input_dict["coord"], # fp32 - input_dict["type"], # int32 - input_dict["box"], # fp32 - input_dict["default_mesh"], # int32 + input_dict["coord"], + input_dict["type"], + input_dict["box"], + input_dict["default_mesh"], self.avg_zero, self.std_ones, - input_dict["natoms_vec"], # int32 + input_dict["natoms_vec"], rcut_a=self.rcut_a, rcut_r=self.rcut_r, rcut_r_smth=self.rcut_r_smth, @@ -968,7 +884,7 @@ def _filter_lower( type_input: int, # outer-loop start_index: int, incrs_index: int, - inputs: paddle.Tensor, # [1, 原子个数(64或128), 552(embedding_dim)] + inputs: paddle.Tensor, nframes: int, natoms: int, type_embedding=None, @@ -983,7 +899,7 @@ def _filter_lower( [0, 1], [0, start_index * 4], [inputs.shape[0], start_index * 4 + incrs_index * 4], - ) # 得到某个类型的原子i对邻域内类型为j的的原子关系,取出二者之间的描述矩阵R natom x nei_type_i x 4 + ) shape_i = inputs_i.shape natom = inputs_i.shape[0] @@ -994,7 +910,7 @@ def _filter_lower( xyz_scatter = paddle.reshape( paddle.slice(inputs_reshape, [0, 1], [0, 0], [inputs_reshape.shape[0], 1]), [-1, 1], - ) # 得到某个类型的原子i对邻域内类型为j的的原子关系,取出二者之间的描述矩阵R矩阵的第一列s(rij) + ) if type_embedding is not None: xyz_scatter = self._concat_type_embedding( @@ -1005,25 +921,6 @@ def _filter_lower( "compression of type embedded descriptor is not supported at the moment" ) # natom x 4 x outputs_size - # if nvnmd_cfg.enable: - # return filter_lower_R42GR( - # type_i, - # type_input, - # inputs_i, - # is_exclude, - # activation_fn, - # bavg, - # stddev, - # trainable, - # suffix, - # self.seed, - # self.seed_shift, - # self.uniform_seed, - # self.filter_neuron, - # self.filter_precision, - # self.filter_resnet_dt, - # self.embedding_net_variables, - # ) if self.compress and (not is_exclude): if self.type_one_side: net = "filter_-1_net_" + str(type_i) @@ -1047,9 +944,7 @@ def _filter_lower( else: if not is_exclude: # excuted this branch - xyz_scatter_out = self.embedding_nets[type_input][type_i]( - xyz_scatter - ) # 对 s(rij) 进行embedding映射, (natom x nei_type_i) x 1==>(natom x nei_type_i) x 100,得到每个原子i对邻域内类型为j的的原子特征,所有该类型的原子的g_i的concat + xyz_scatter_out = self.embedding_nets[type_input][type_i](xyz_scatter) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift else: @@ -1069,23 +964,16 @@ def _filter_lower( # So we need to explicitly assign the shape to paddle.shape(inputs_i)[0] instead of -1 # natom x 4 x outputs_size - # [natom, nei_type_i, 4].T x [natom, nei_type_i, 100] - # 等价于 - # [natom, 4, nei_type_i] x [natom, nei_type_i, 100] - # ==> - # [natom, 4, 100] return paddle.matmul( - paddle.reshape( - inputs_i, [natom, shape_i[1] // 4, 4] - ), # [natom, nei_type_i, 4] - xyz_scatter_out, # [natom, nei_type_i, 100] + paddle.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), + xyz_scatter_out, transpose_x=True, - ) # 得到(R_i).T*g_i,即D_i表达式的右半部分 + ) # @cast_precision def _filter( self, - inputs: paddle.Tensor, # [1, 原子个数(64或128), 552(nnei*4)] + inputs: paddle.Tensor, type_input: int, natoms, type_embedding=None, @@ -1125,13 +1013,9 @@ def _filter( ------- Tuple[Tensor, Tensor]: result: [64/128, M1*M2], qmat: [64/128, M1, 3] """ + # NOTE: code below is annotated as nframes computation is wrong # nframes = paddle.shape(paddle.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0] - # 上述 nframes的计算代码是错误的,reshape前后numel根本不相等,会导致程序报错,tf不会报错是因为tf计算图 - # 检测到这个变量后续不会被真正使用到,所以自动进行了优化。 - # nframes由于没有被使用到,所以这段代码没有被执行,所以tf实际运行是没有报错。 - # 复现报错很简单,只需要把这个nframes run出来,会导致这段代码被执行,然后报错。 - # 给 nframes 设置一个无用值 1 即可 nframes = 1 # natom x (nei x 4) shape = inputs.shape @@ -1168,31 +1052,23 @@ def _filter( rets = [] # execute this branch for type_i in range(self.ntypes): - # 计算type_input和type_i的原子之间的特征 ret = self._filter_lower( type_i, type_input, start_index, self.sel_a[type_i], # 46(O)/92(H) - inputs, # [1, 原子个数(64或128), 552(nnei*4)] + inputs, nframes, natoms, type_embedding=type_embedding, is_exclude=(type_input, type_i) in self.exclude_types, - # activation_fn=activation_fn, - # stddev=stddev, - # bavg=bavg, - # trainable=trainable, - # suffix="_" + str(type_i), - ) # ==> [natom_i, 4, 100] + ) if (type_input, type_i) not in self.exclude_types: # add zero is meaningless; skip rets.append(ret) start_index += self.sel_a[type_i] # faster to use accumulate_n than multiple add - xyz_scatter_1 = paddle.add_n( - rets - ) # 得到所有(R_i).T*g_i: [当前类型原子个数64/128, 4, embedding维度M1] + xyz_scatter_1 = paddle.add_n(rets) else: xyz_scatter_1 = self._filter_lower( type_i, @@ -1204,13 +1080,7 @@ def _filter( natoms, type_embedding=type_embedding, is_exclude=False, - # activation_fn=activation_fn, - # stddev=stddev, - # bavg=bavg, - # trainable=trainable, ) - # if nvnmd_cfg.enable: - # return filter_GR2D(xyz_scatter_1) # natom x nei x outputs_size # xyz_scatter = tf.concat(xyz_scatter_total, axis=1) # natom x nei x 4 @@ -1229,17 +1099,14 @@ def _filter( ), self.filter_precision, ) - xyz_scatter_1 = ( - xyz_scatter_1 / nnei - ) # (R_i).T*g_i: [当前类型原子个数64/128, 4, embedding维度M1] + xyz_scatter_1 = xyz_scatter_1 / nnei # natom x 4 x outputs_size_2 xyz_scatter_2 = paddle.slice( xyz_scatter_1, [0, 1, 2], [0, 0, 0], [xyz_scatter_1.shape[0], xyz_scatter_1.shape[1], outputs_size_2], - ) # [当前类型原子个数, R矩阵描述特征数4, 隐层特征数里的前16维特征(M2)], [64, 4, 16] - # (g_i<).T*(R_i): [当前类型原子个数64/128, 4, embedding前M2列] + ) # natom x 3 x outputs_size_2 # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1]) # natom x 3 x outputs_size_1 @@ -1252,16 +1119,10 @@ def _filter( # natom x outputs_size_1 x 3 qmat = paddle.transpose(qmat, perm=[0, 2, 1]) # [64/128, M1, 3] # natom x outputs_size x outputs_size_2 - result = paddle.matmul( - xyz_scatter_1, xyz_scatter_2, transpose_x=True - ) # [64/128,M1,4]x[64/128,4,M2]==>[64/128,M1,M2] + result = paddle.matmul(xyz_scatter_1, xyz_scatter_2, transpose_x=True) # natom x (outputs_size x outputs_size_2) - result = paddle.reshape( - result, [-1, outputs_size_2 * outputs_size[-1]] - ) # [64,M1*M2] + result = paddle.reshape(result, [-1, outputs_size_2 * outputs_size[-1]]) - # result: [64/128, M1*M2] - # qmat: [64/128, M1, 3] return result, qmat def init_variables( diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py index a81f7109c5..dba1f7bd15 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/entrypoints/freeze.py @@ -311,6 +311,14 @@ def _make_node_names( def freeze_graph( model_file: str, output: str, + # sess, + # input_graph, + # input_node, + # freeze_type, + # modifier, + # out_graph_name, + # node_names=None, + # out_suffix="", ): """Freeze the single graph with chosen out_suffix. @@ -356,7 +364,7 @@ def freeze_graph( print( f"[{name}, {param.dtype}, {param.shape}] generated name in static_model is: {param.name}" ) - # 跳过对program的裁剪,从而保留rcut、ntypes等不参与前向的参数,从而在C++端可以获取这些参数 + # skip pruning for program so as to keep buffers into files skip_prune_program = True print(f"==>> Set skip_prune_program = {skip_prune_program}") paddle.jit.save(st_model, output, skip_prune_program=skip_prune_program) @@ -441,12 +449,8 @@ def freeze_graph_multi( def freeze( *, - # checkpoint_folder: str, input_file: str, output: str, - # node_names: Optional[str] = None, - # nvnmd_weight: Optional[str] = None, - # united_model: bool = False, **kwargs, ): """Freeze the graph in supplied folder. @@ -460,78 +464,7 @@ def freeze( **kwargs other arguments """ - # We retrieve our checkpoint fullpath - # checkpoint = tf.train.get_checkpoint_state(checkpoint_folder) - # input_checkpoint = checkpoint.model_checkpoint_path - - # # expand the output file to full path - # output_graph = abspath(output) - - # # Before exporting our graph, we need to precise what is our output node - # # This is how TF decides what part of the Graph he has to keep - # # and what part it can dump - # # NOTE: this variable is plural, because you can have multiple output nodes - # # node_names = "energy_test,force_test,virial_test,t_rcut" - - # # We clear devices to allow TensorFlow to control - # # on which device it will load operations - # clear_devices = True - - # # We import the meta graph and retrieve a Saver - # try: - # # In case paralle training - # import horovod.tensorflow as _ # noqa: F401 - # except ImportError: - # pass - # saver = tf.train.import_meta_graph( - # f"{input_checkpoint}.meta", clear_devices=clear_devices - # ) - - # # We retrieve the protobuf graph definition - # graph = tf.get_default_graph() - # try: - # input_graph_def = graph.as_graph_def() - # except google.protobuf.message.DecodeError as e: - # raise GraphTooLargeError( - # "The graph size exceeds 2 GB, the hard limitation of protobuf." - # " Then a DecodeError was raised by protobuf. You should " - # "reduce the size of your model." - # ) from e - # nodes = [n.name for n in input_graph_def.node] - - # # We start a session and restore the graph weights - # with tf.Session() as sess: - # saver.restore(sess, input_checkpoint) - # model_type = run_sess(sess, "model_attr/model_type:0", feed_dict={}).decode( - # "utf-8" - # ) - # if "modifier_attr/type" in nodes: - # modifier_type = run_sess(sess, "modifier_attr/type:0", feed_dict={}).decode( - # "utf-8" - # ) - # else: - # modifier_type = None - # if nvnmd_weight is not None: - # save_weight(sess, nvnmd_weight) # nvnmd - # if model_type != "multi_task": freeze_graph( input_file, output, - # sess, - # input_graph_def, - # nodes, - # model_type, - # modifier_type, - # output_graph, - # node_names, ) - # else: - # freeze_graph_multi( - # sess, - # input_graph_def, - # nodes, - # modifier_type, - # output_graph, - # node_names, - # united_model=united_model, - # ) diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index 472136a08e..229a8bb0f5 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -419,15 +419,6 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False): min_nbor_dist, max_nbor_size = neistat.get_stat(train_data) - # moved from traier.py as duplicated - # TODO: this is a simple fix but we should have a clear - # architecture to call neighbor stat - # tf.constant( - # min_nbor_dist, - # name="train_attr/min_nbor_dist", - # dtype=GLOBAL_ENER_FLOAT_PRECISION, - # ) - # tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32) return min_nbor_dist, max_nbor_size @@ -473,9 +464,7 @@ def update_one_sel(jdata, descriptor): if descriptor["type"] == "loc_frame": return descriptor rcut = descriptor["rcut"] - tmp_sel = get_sel( - jdata, rcut, one_type=descriptor["type"] in ("se_atten",) - ) # [38 72],每个原子截断半径内,最多的邻域原子个数 + tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",)) sel = descriptor["sel"] # [46, 92] if isinstance(sel, int): # convert to list and finnally convert back to int diff --git a/deepmd/env.py b/deepmd/env.py index 9eb7e1e6a8..044301c628 100644 --- a/deepmd/env.py +++ b/deepmd/env.py @@ -372,10 +372,7 @@ def get_module(module_name: str) -> "ModuleType": raise FileNotFoundError(f"module {module_name} does not exist") else: try: - # module = tf.load_op_library(str(module_file)) - import paddle_deepmd_lib - - module = paddle_deepmd_lib + import paddle_deepmd_lib as module except tf.errors.NotFoundError as e: # check CXX11_ABI_FLAG is compatiblity diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py index 8f1be7d30a..7ff5d3d5b1 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/fit/ener.py @@ -452,7 +452,6 @@ def _build_lower( bias_atom_e=0.0, type_suffix="", suffix="", - # reuse=None, type_i=None, ): # cut-out inputs @@ -483,47 +482,15 @@ def _build_lower( ext_aparam = paddle.cast(ext_aparam, self.fitting_precision) layer = paddle.concat([layer, ext_aparam], axis=1) - # if nvnmd_cfg.enable: - # one_layer = one_layer_nvnmd - # else: - # one_layer = one_layer_deepmd for ii in range(0, len(self.n_neuron)): - # if self.layer_name is not None and self.layer_name[ii] is not None: - # layer_suffix = "share_" + self.layer_name[ii] + type_suffix - # layer_reuse = tf.AUTO_REUSE - # else: - # layer_suffix = "layer_" + str(ii) + type_suffix + suffix - # layer_reuse = reuse if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]: layer += self.one_layers[type_i][ii](layer) else: layer = self.one_layers[type_i][ii](layer) - # print(f"use {ii} of {len(self.one_layers)}_{type_i}") - # if (not self.uniform_seed) and (self.seed is not None): - # self.seed += self.seed_shift - # if self.layer_name is not None and self.layer_name[-1] is not None: - # layer_suffix = "share_" + self.layer_name[-1] + type_suffix - # layer_reuse = tf.AUTO_REUSE - # else: - # layer_suffix = "final_layer" + type_suffix + suffix - # layer_reuse = reuse if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift - final_layer = self.final_layers[type_i]( - layer, - # 1, - # activation_fn=None, - # bavg=bias_atom_e, - # name=layer_suffix, - # reuse=layer_reuse, - # seed=self.seed, - # precision=self.fitting_precision, - # trainable=self.trainable[-1], - # uniform_seed=self.uniform_seed, - # initial_variables=self.fitting_net_variables, - # mixed_prec=self.mixed_prec, - # final_layer=True, - ) + + final_layer = self.final_layers[type_i](layer) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift @@ -646,18 +613,6 @@ def forward( start_index = 0 outs_list = [] for type_i in range(ntypes_atom): - # final_layer = inputs - # for layer_j in range(type_i * ntypes_atom, (type_i + 1) * ntypes_atom): - # final_layer = self.one_layers[layer_j](final_layer) - # final_layer = self.final_layers[type_i](final_layer) - # print(final_layer.shape) - - # # concat the results - # if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None: - # zero_layer = inputs_zero - # for layer_j in range(type_i * ntypes_atom, (type_i + 1) * ntypes_atom): - # zero_layer = self.one_layers[layer_j](zero_layer) - # zero_layer = self.final_layers[type_i](zero_layer) final_layer = self._build_lower( start_index, natoms[2 + type_i], @@ -667,7 +622,6 @@ def forward( bias_atom_e=0.0, type_suffix="_type_" + str(type_i), suffix=suffix, - # reuse=reuse, type_i=type_i, ) # concat the results @@ -681,13 +635,12 @@ def forward( bias_atom_e=0.0, type_suffix="_type_" + str(type_i), suffix=suffix, - # reuse=True, type_i=type_i, ) final_layer -= zero_layer final_layer = paddle.reshape( final_layer, [paddle.shape(inputs)[0], natoms[2 + type_i]] - ) # [1, natoms] + ) outs_list.append(final_layer) start_index += natoms[2 + type_i] # concat the results @@ -734,7 +687,7 @@ def forward( ), [paddle.shape(inputs)[0], paddle.sum(natoms[2 : 2 + ntypes_atom]).item()], ) - outs = outs + self.add_type # 类型编码(类似于transformer的位置编码,每种类型自己有一个特征,加到原特征上) + outs = outs + self.add_type outs *= atype_filter self.atom_ener_after = outs diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index 666101dd1c..3da589a514 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -327,18 +327,18 @@ def __init__( for k, v in load_state_dict.items(): if k in self.model.state_dict(): if load_state_dict[k].dtype != self.model.state_dict()[k].dtype: - print( - f"convert {k}'s dtype from {load_state_dict[k].dtype} to {self.model.state_dict()[k].dtype}" - ) + # print( + # f"convert {k}'s dtype from {load_state_dict[k].dtype} to {self.model.state_dict()[k].dtype}" + # ) load_state_dict[k] = load_state_dict[k].astype( self.model.state_dict()[k].dtype ) if list(load_state_dict[k].shape) != list( self.model.state_dict()[k].shape ): - print( - f"convert {k}'s shape from {load_state_dict[k].shape} to {self.model.state_dict()[k].shape}" - ) + # print( + # f"convert {k}'s shape from {load_state_dict[k].shape} to {self.model.state_dict()[k].shape}" + # ) load_state_dict[k] = load_state_dict[k].reshape( self.model.state_dict()[k].shape ) @@ -492,7 +492,6 @@ def _load_graph( # # print(param.shape) # if param.shape == (2,): # print(constant_op.outputs[0], param) - return graph @staticmethod diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py index ad4ee4a48d..e9c5c8ae09 100644 --- a/deepmd/infer/deep_pot.py +++ b/deepmd/infer/deep_pot.py @@ -86,24 +86,6 @@ def __init__( # fitting attrs "dfparam": "fitting.t_dfparam", "daparam": "fitting.t_daparam", - # # fitting attrs - # "t_dfparam": "fitting_attr/dfparam:0", - # "t_daparam": "fitting_attr/daparam:0", - # # model attrs - # "t_tmap": "model_attr/tmap:0", - # # inputs - # "t_coord": "t_coord:0", - # "t_type": "t_type:0", - # "t_natoms": "t_natoms:0", - # "t_box": "t_box:0", - # "t_mesh": "t_mesh:0", - # # add output tensors - # "t_energy": "o_energy:0", - # "t_force": "o_force:0", - # "t_virial": "o_virial:0", - # "t_ae": "o_atom_energy:0", - # "t_av": "o_atom_virial:0", - # "t_descriptor": "o_descriptor:0", }, ) DeepEval.__init__( @@ -115,45 +97,13 @@ def __init__( ) # # load optional tensors - # operations = [op.name for op in self.graph.get_operations()] - # # check if the graph has these operations: - # # if yes add them - # if "t_efield" in operations: - # # self._get_tensor("t_efield:0", "t_efield") - # if self._get_value("t_efield") is not None: - # self._get_value("t_efield", "t_efield") - # self.has_efield = True - # else: - # log.debug("Could not get tensor 't_efield'") - # self.t_efield = None self.has_efield = False - # if self._get_value("load/t_fparam") is not None: - # self.tensors.update({"t_fparam": "t_fparam"}) - # self.has_fparam = True - # else: - # log.debug("Could not get tensor 't_fparam'") - # self.t_fparam = None self.has_fparam = False - # if self._get_value("load/t_aparam") is not None: - # self.tensors.update({"t_aparam": "t_aparam"}) - # self.has_aparam = True - # else: - # log.debug("Could not get tensor 't_aparam'") - # self.t_aparam = None self.has_aparam = False - - # if self._get_value("load/spin_attr/ntypes_spin") is not None: - # self.tensors.update({"t_ntypes_spin": "spin_attr/ntypes_spin"}) - # self.has_spin = True - # else: - self.ntypes_spin = ( - 0 - if self.model.descrpt.spin is None - else self.model.descrpt.spin.ntypes_spin - ) - self.has_spin = self.model.descrpt.spin is not None + self.ntypes_spin = 0 + self.has_spin = False # now load tensors to object attributes for attr_name, tensor_name in self.tensors.items(): @@ -182,43 +132,6 @@ def __init__( self.modifier_type = None self.descriptor_type = "se_e2_a" - # try: - # t_jdata = self._get_tensor("train_attr/training_script") - # jdata = run_sess(self.sess, t_jdata).decode("UTF-8") - # import json - - # jdata = json.loads(jdata) - # self.descriptor_type = jdata["model"]["descriptor"]["type"] - # except (ValueError, KeyError): - # self.descriptor_type = None - - # if self.modifier_type == "dipole_charge": - # t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0") - # t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0") - # t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0") - # t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0") - # t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0") - # [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess( - # self.sess, - # [ - # t_mdl_name, - # t_mdl_charge_map, - # t_sys_charge_map, - # t_ewald_h, - # t_ewald_beta, - # ], - # ) - # mdl_name = mdl_name.decode("UTF-8") - # mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()] - # sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()] - # self.dm = DipoleChargeModifier( - # mdl_name, - # mdl_charge_map, - # sys_charge_map, - # ewald_h=ewald_h, - # ewald_beta=ewald_beta, - # ) - def _run_default_sess(self): if self.has_spin is True: [ @@ -491,32 +404,6 @@ def _prepare_feed_dict( assert natoms_vec[0] == natoms # evaluate - # feed_dict_test = {} - # feed_dict_test[self.t_natoms] = natoms_vec - # if mixed_type: - # feed_dict_test[self.t_type] = atom_types.reshape([-1]) - # else: - # feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - # [-1] - # ) - # feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - - # if len(self.t_box.shape) == 1: - # feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - # elif len(self.t_box.shape) == 2: - # feed_dict_test[self.t_box] = cells - # else: - # raise RuntimeError - # if self.has_efield: - # feed_dict_test[self.t_efield] = np.reshape(efield, [-1]) - # if pbc: - # feed_dict_test[self.t_mesh] = make_default_mesh(cells) - # else: - # feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32) - # if self.has_fparam: - # feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1]) - # if self.has_aparam: - # feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1]) return None, None, natoms_vec def _eval_inner( @@ -537,48 +424,6 @@ def _eval_inner( coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type ) - # t_out = [self.t_energy, self.t_force, self.t_virial] - # if atomic: - # t_out += [self.t_ae, self.t_av] - - # v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) - # energy = v_out[0] - # force = v_out[1] - # virial = v_out[2] - # if atomic: - # ae = v_out[3] - # av = v_out[4] - - # if self.has_spin: - # ntypes_real = self.ntypes - self.ntypes_spin - # natoms_real = sum( - # [ - # np.count_nonzero(np.array(atom_types) == ii) - # for ii in range(ntypes_real) - # ] - # ) - # else: - # natoms_real = natoms - - # # reverse map of the outputs - # force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap) - # if atomic: - # ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap[:natoms_real]) - # av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap) - - # energy = np.reshape(energy, [nframes, 1]) - # force = np.reshape(force, [nframes, natoms, 3]) - # virial = np.reshape(virial, [nframes, 9]) - # if atomic: - # ae = np.reshape(ae, [nframes, natoms_real, 1]) - # av = np.reshape(av, [nframes, natoms, 9]) - # return energy, force, virial, ae, av - # else: - # atom_types = np.array(atom_types, dtype=int).reshape([-1]) - # natoms = atom_types.size - # coords = np.reshape(np.array(coords), [-1, natoms * 3]) - # nframes = coords.shape[0] - eval_inputs = {} eval_inputs["coord"] = paddle.to_tensor( np.reshape(coords, [-1]), dtype="float64" @@ -627,11 +472,11 @@ def _eval_inner( else: # NOTE: 使用动态图模型推理 eval_outputs = self.model( - eval_inputs["coord"], # [2880] paddle.float64 - eval_inputs["type"], # [960] paddle.int32 - eval_inputs["natoms_vec"], # [2+num_type_atoms] paddle.int32 - eval_inputs["box"], # [45] paddle.float64 - eval_inputs["default_mesh"], # [6] paddle.int32 + eval_inputs["coord"], + eval_inputs["type"], + eval_inputs["natoms_vec"], + eval_inputs["box"], + eval_inputs["default_mesh"], eval_inputs, suffix="", reuse=False, diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py index 3a2b03ae6c..9d9d720077 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/loss/ener.py @@ -195,11 +195,6 @@ def compute_loss(self, learning_rate, natoms, model_dict, label_dict, suffix): l2_loss = 0 more_loss = {} - # print(self.has_e) - # print(self.has_f) - # print(self.has_v) - # print(self.has_ae) - # print(self.has_pf) if self.has_e: # true l2_loss += atom_norm_ener * (pref_e * l2_ener_loss) more_loss["l2_ener_loss"] = l2_ener_loss @@ -216,24 +211,6 @@ def compute_loss(self, learning_rate, natoms, model_dict, label_dict, suffix): l2_loss += pref_pf * l2_pref_force_loss more_loss["l2_pref_force_loss"] = l2_pref_force_loss - # only used when tensorboard was set as true - # self.l2_loss_summary = paddle.summary.scalar("l2_loss_" + suffix, paddle.sqrt(l2_loss)) - # if self.has_e: - # self.l2_loss_ener_summary = paddle.summary.scalar( - # "l2_ener_loss_" + suffix, - # global_cvt_2_tf_float(paddle.sqrt(l2_ener_loss)) - # / global_cvt_2_tf_float(natoms[0]), - # ) - # if self.has_f: - # self.l2_loss_force_summary = paddle.summary.scalar( - # "l2_force_loss_" + suffix, paddle.sqrt(l2_force_loss) - # ) - # if self.has_v: - # self.l2_loss_virial_summary = paddle.summary.scalar( - # "l2_virial_loss_" + suffix, - # paddle.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0]), - # ) - self.l2_l = l2_loss self.l2_more = more_loss return l2_loss, more_loss @@ -273,7 +250,6 @@ def eval(self, model, batch_data, natoms): reuse=False, ) l2_l, l2_more = self.compute_loss( - # 0.0, natoms, model_dict, batch_data 0.0, model_inputs["natoms_vec"], model_pred, diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py index 2272a0b4b2..1d3874b533 100644 --- a/deepmd/model/ener.py +++ b/deepmd/model/ener.py @@ -77,6 +77,7 @@ def __init__( super().__init__() # super(EnerModel, self).__init__(name_scope="EnerModel") """Constructor.""" + super().__init__() # descriptor self.descrpt = descrpt self.rcut = self.descrpt.get_rcut() @@ -105,7 +106,7 @@ def __init__( self.srtab = None # content of build below - self.t_tmap = " ".join(self.type_map) # "Ni O" + self.t_tmap = " ".join(self.type_map) self.t_mt = self.model_type self.t_ver = str(MODEL_VERSION) # NOTE: workaround for string type is not supported in Paddle @@ -148,7 +149,6 @@ def data_stat(self, data): m_all_stat, protection=self.data_stat_protect, mixed_type=data.mixed_type ) self._compute_output_stat(all_stat, mixed_type=data.mixed_type) - # self.bias_atom_e = data.compute_energy_shift(self.rcond) def _compute_input_stat(self, all_stat, protection=1e-2, mixed_type=False): if mixed_type: @@ -197,7 +197,7 @@ def forward( coord = paddle.reshape(coord_, [-1, natoms[1] * 3]) atype = paddle.reshape(atype_, [-1, natoms[1]]) - # input_dict["nframes"] = paddle.shape(coord)[0] # 推理模型导出的时候注释掉这里,否则会报错 + # input_dict["nframes"] = paddle.shape(coord)[0] # 推理模型导出的时候注释掉这里,否则会报错 # type embedding if any # if self.typeebd is not None: @@ -222,12 +222,9 @@ def forward( box, mesh, input_dict, - # frz_model=frz_model, - # ckpt_meta=ckpt_meta, suffix=suffix, reuse=reuse, - ) # [1, all_atom, M1*M2] - # self.dout = dout + ) if self.srtab is not None: nlist, rij, sel_a, sel_r = self.descrpt.get_nlist() @@ -238,6 +235,9 @@ def forward( self.atom_ener = atom_ener if self.srtab is not None: + raise NotImplementedError( + f"srtab not implemented in {self.__class__.__name__}" + ) # sw_lambda, sw_deriv = op_module.soft_min_switch( # atype, # rij, @@ -270,11 +270,8 @@ def forward( # ) # atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener # energy_raw = tab_atom_ener + atom_ener - raise NotImplementedError( - f"srtab not implemented in {self.__class__.__name__}" - ) else: - energy_raw = atom_ener # [1, all_atoms] + energy_raw = atom_ener nloc_atom = ( natoms[0] @@ -289,7 +286,7 @@ def forward( force, virial, atom_virial = self.descrpt.prod_force_virial(atom_ener, natoms) # force: [1, all_atoms*3] # virial: [1, 9] - # force: [1, all_atoms*9] + # atom_virial: [1, all_atoms*9] if self.srtab is not None: raise NotImplementedError() @@ -333,13 +330,13 @@ def forward( ) model_dict = {} - model_dict["energy"] = energy # [batch_size] - model_dict["force"] = force # [batch_size, 576] - model_dict["virial"] = virial # [batch_size, 9] - model_dict["atom_ener"] = energy_raw # [batch_size, 192] - model_dict["atom_virial"] = atom_virial # [batch_size, 1728] - model_dict["coord"] = coord # [batch_size, 576] - model_dict["atype"] = atype # [batch_size, 192] + model_dict["energy"] = energy + model_dict["force"] = force + model_dict["virial"] = virial + model_dict["atom_ener"] = energy_raw + model_dict["atom_virial"] = atom_virial + model_dict["coord"] = coord + model_dict["atype"] = atype return model_dict def init_variables( diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 2d13b8874e..18c2d9e3c6 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -848,39 +848,6 @@ def train(self, train_data=None, valid_data=None, stop_batch: int = 10): ) ) - # prf_options = None - # prf_run_metadata = None - # if self.profiling: - # prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - # prf_run_metadata = tf.RunMetadata() - - # set tensorboard execution environment - # if self.tensorboard: - # summary_merged_op = tf.summary.merge_all() - # # Remove TB old logging directory from previous run - # try: - # shutil.rmtree(self.tensorboard_log_dir) - # except FileNotFoundError: - # pass # directory does not exist, this is OK - # except Exception as e: - # # general error when removing directory, warn user - # log.exception( - # f"Could not remove old tensorboard logging directory: " - # f"{self.tensorboard_log_dir}. Error: {e}" - # ) - # else: - # log.debug("Removing old tensorboard log directory.") - # tb_train_writer = tf.summary.FileWriter( - # self.tensorboard_log_dir + "/train", self.sess.graph - # ) - # tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + "/test") - # else: - # tb_train_writer = None - # tb_valid_writer = None - # if self.enable_profiler: - # # https://www.tensorflow.org/guide/profiler - # tfv2.profiler.experimental.start(self.tensorboard_log_dir) - train_time = 0 total_train_time = 0.0 wall_time_tic = time.time() @@ -898,35 +865,6 @@ def train(self, train_data=None, valid_data=None, stop_batch: int = 10): while cur_batch < stop_batch: train_batch = datasetloader.get_data_dict() - # first round validation: - # if is_first_step: - # if not self.multi_task_mode: - # train_batch = train_data.get_batch() - # # batch_train_op = self.train_op - # else: - # fitting_idx = dp_random.choice( - # np.arange(self.nfitting), p=np.array(self.fitting_prob) - # ) - # fitting_key = self.fitting_key_list[fitting_idx] - # train_batch = train_data[fitting_key].get_batch() - # # batch_train_op = self.train_op[fitting_key] - # else: - # train_batch = next_datasetloader.get_data_dict(next_train_batch_list) - # # batch_train_op = next_batch_train_op - # fitting_key = next_fitting_key - # for next round - # if not self.multi_task_mode: - # next_datasetloader = datasetloader - # next_batch_train_op = self.train_op - # next_train_batch_op = data_op - # else: - # fitting_idx = dp_random.choice( - # np.arange(self.nfitting), p=np.array(self.fitting_prob) - # ) - # next_fitting_key = self.fitting_key_list[fitting_idx] - # next_datasetloader = datasetloader[next_fitting_key] - # next_batch_train_op = self.train_op[fitting_key] - # next_train_batch_op = data_op[fitting_key] if self.display_in_training and is_first_step: if self.run_opt.is_chief: @@ -976,48 +914,8 @@ def train(self, train_data=None, valid_data=None, stop_batch: int = 10): # use tensorboard to visualize the training of deepmd-kit # it will takes some extra execution time to generate the tensorboard data if self.tensorboard and (cur_batch % self.tensorboard_freq == 0): - # summary, _, next_train_batch_list = run_sess( - # self.sess, - # [summary_merged_op, batch_train_op, next_train_batch_op], - # feed_dict=train_feed_dict, - # options=prf_options, - # run_metadata=prf_run_metadata, - # ) - # tb_train_writer.add_summary(summary, cur_batch) pass - # model_pred = self.model( - # paddle.to_tensor(train_batch["coord"], "float64"), - # paddle.to_tensor(train_batch["type"], "int32"), - # paddle.to_tensor(train_batch["natoms_vec"], "int32", "cpu"), - # paddle.to_tensor(train_batch["box"], "float64"), - # paddle.to_tensor(train_batch["default_mesh"], "int32"), - # train_batch, - # suffix="", - # reuse=False, - # ) else: - """ - find_box:0", dtype=float32) () - find_coord:0", dtype=float32) () - find_numb_copy:0", dtype=float32) () - find_energy:0", dtype=float32) () - find_force:0", dtype=float32) () - find_virial:0", dtype=float32) () - find_atom_ener:0", dtype=float32) () - find_atom_pref:0", dtype=float32) () - box:0", shape=(?,), dtype=float64) (9,) - coord:0", shape=(?,), dtype=float64) (576,) - numb_copy:0", shape=(?,), dtype=float64) (1,) - energy:0", shape=(?,), dtype=float64) (1,) - force:0", shape=(?,), dtype=float64) (576,) - virial:0", shape=(?,), dtype=float64) (9,) - atom_ener:0", shape=(?,), dtype=float64) (192,) - atom_pref:0", shape=(?,), dtype=float64) (576,) - natoms:0", shape=(4,), dtype=int32) (4,) - mesh:0", shape=(?,), dtype=int32) (6,) - type:0", shape=(?,), dtype=int32) (192,) - aceholder:0", dtype=bool) True - """ model_inputs = {} for kk in train_batch.keys(): if kk == "find_type" or kk == "type": @@ -1147,42 +1045,7 @@ def train(self, train_data=None, valid_data=None, stop_batch: int = 10): total_train_time / (stop_batch // self.disp_freq * self.disp_freq), ) - # if self.profiling and self.run_opt.is_chief: - # fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats) - # chrome_trace = fetched_timeline.generate_chrome_trace_format() - # with open(self.profiling_file, "w") as f: - # f.write(chrome_trace) - # if self.enable_profiler and self.run_opt.is_chief: - # tfv2.profiler.experimental.stop() - def save_checkpoint(self, cur_batch: int): - # try: - # ckpt_prefix = self.saver.save( - # self.sess, - # os.path.join(os.getcwd(), self.save_ckpt), - # global_step=cur_batch, - # ) - # except google.protobuf.message.DecodeError as e: - # raise GraphTooLargeError( - # "The graph size exceeds 2 GB, the hard limitation of protobuf." - # " Then a DecodeError was raised by protobuf. You should " - # "reduce the size of your model." - # ) from e - # # make symlinks from prefix with step to that without step to break nothing - # # get all checkpoint files - # original_files = glob.glob(ckpt_prefix + ".*") - # for ori_ff in original_files: - # new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :] - # try: - # # remove old one - # os.remove(new_ff) - # except OSError: - # pass - # if platform.system() != "Windows": - # # by default one does not have access to create symlink on Windows - # os.symlink(ori_ff, new_ff) - # else: - # shutil.copyfile(ori_ff, new_ff) paddle.save(self.model.state_dict(), f"Model_{cur_batch}.pdparams") paddle.save(self.optimizer.state_dict(), f"Optimier_{cur_batch}.pdopt") log.info("saved checkpoint %s" % self.save_ckpt) diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py index d17e99dd1b..0f1ccdf5cf 100644 --- a/deepmd/utils/learning_rate.py +++ b/deepmd/utils/learning_rate.py @@ -92,16 +92,10 @@ def build( np.log(self.stop_lr_ / self.start_lr_) / (stop_step / self.decay_steps_) ) - # print("decay_steps_ = ", self.decay_steps_) return lr.ExponentialDecay( self.start_lr_, gamma=self.decay_rate_, ) - # return paddle.optimizer.lr.ExponentialDecay( - # learning_rate=self.start_lr_, - # gamma=self.decay_rate_ ** (1 / self.decay_steps_), - # # verbose=True, - # ) def start_lr(self) -> float: """Get the start lr.""" diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index 7bbdbff8a3..966e19cf00 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -6,6 +6,7 @@ ) import numpy as np +import paddle from deepmd.env import ( GLOBAL_PD_FLOAT_PRECISION, @@ -43,44 +44,6 @@ def __init__( self.rcut = rcut self.ntypes = ntypes self.one_type = one_type - # sub_graph = tf.Graph() - - # def builder(): - # place_holders = {} - # for ii in ["coord", "box"]: - # place_holders[ii] = tf.placeholder( - # GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii - # ) - # place_holders["type"] = tf.placeholder( - # tf.int32, [None, None], name="t_type" - # ) - # place_holders["natoms_vec"] = tf.placeholder( - # tf.int32, [self.ntypes + 2], name="t_natoms" - # ) - # place_holders["default_mesh"] = tf.placeholder( - # tf.int32, [None], name="t_mesh" - # ) - # t_type = place_holders["type"] - # t_natoms = place_holders["natoms_vec"] - # if self.one_type: - # # all types = 0, natoms_vec = [natoms, natoms, natoms] - # t_type = tf.clip_by_value(t_type, -1, 0) - # t_natoms = tf.tile(t_natoms[0:1], [3]) - # _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( # 这里只计算一次 - # place_holders["coord"], - # t_type, - # t_natoms, - # place_holders["box"], - # place_holders["default_mesh"], - # rcut=self.rcut, - # ) - # place_holders["dir"] = tf.placeholder(tf.string) - # return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"]) - - # with sub_graph.as_default(): - # self.p = ParallelOp(builder, config=default_tf_session_config) - - # self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms. @@ -102,37 +65,6 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: if not self.one_type: self.max_nbor_size *= self.ntypes - # def feed(): - # for ii in range(len(data.system_dirs)): - # for jj in data.data_systems[ii].dirs: - # data_set = data.data_systems[ii]._load_set(jj) - # for kk in range(np.array(data_set["type"]).shape[0]): - # ret = { - # "coord": np.array(data_set["coord"])[kk].reshape( - # [-1, data.natoms[ii] * 3] - # ), # (1, 576) - # "type": np.array(data_set["type"])[kk].reshape( - # [-1, data.natoms[ii]] - # ), # (1, 192) - # "natoms_vec": np.array(data.natoms_vec[ii]), # (4,) - # "box": np.array(data_set["box"])[kk].reshape([-1, 9]), # (1, 9) - # "default_mesh": np.array(data.default_mesh[ii]), # (6,) - # "dir": str(jj), # ../data/data_0/set.xxx - # } - # print(str(jj)) - # print("coord", ret["coord"].shape, ret["coord"].dtype) - # print("type", ret["type"].shape, ret["type"].dtype) - # print("natoms_vec", ret["natoms_vec"].shape, ret["natoms_vec"].dtype) - # print("box", ret["box"].shape, ret["box"].dtype) - # print("default_mesh", ret["default_mesh"].shape, ret["default_mesh"].dtype) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/coord.npy", ret["coord"]) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/type.npy", ret["type"]) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/natoms_vec.npy", ret["natoms_vec"]) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/box.npy", ret["box"]) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/default_mesh.npy", ret["default_mesh"]) - # yield ret - import paddle - for ii in range(len(data.system_dirs)): for jj in data.data_systems[ii].dirs: data_set = data.data_systems[ii]._load_set(jj) @@ -189,30 +121,6 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: var = paddle.max(mn, axis=0).numpy() self.max_nbor_size = np.maximum(var, self.max_nbor_size) - # for mn, dt, jj in self.p.generate(self.sub_sess, feed()): # _max_nbor_size, _min_nbor_dist, dir - # # print(mn.shape, dt.shape, jj) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/max_nbor_size.npy", mn) - # # np.save("/workspace/hesensen/deepmd-kit/cuda_ext/min_nbor_dist.npy", dt) - # if dt.size != 0: - # dt = np.min(dt) - # else: - # dt = self.rcut - # log.warning( - # "Atoms with no neighbors found in %s. Please make sure it's what you expected." - # % jj - # ) - # if dt < self.min_nbor_dist: - # if math.isclose(dt, 0.0, rel_tol=1e-6): - # # it's unexpected that the distance between two atoms is zero - # # zero distance will cause nan (#874) - # raise RuntimeError( - # "Some atoms are overlapping in %s. Please check your" - # " training data to remove duplicated atoms." % jj - # ) - # self.min_nbor_dist = dt - # var = np.max(mn, axis=0) - # self.max_nbor_size = np.maximum(var, self.max_nbor_size) - log.info("training data with min nbor dist: " + str(self.min_nbor_dist)) log.info("training data with max nbor size: " + str(self.max_nbor_size)) return self.min_nbor_dist, self.max_nbor_size diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py index d15153e9ec..dbcb0f1602 100644 --- a/deepmd/utils/network.py +++ b/deepmd/utils/network.py @@ -411,7 +411,6 @@ def __init__( self.activation_fn = activation_fn self.resnet_dt = resnet_dt self.seed = seed - # paddle.seed(seed) outputs_size = self.outputs_size weight = []