diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index d22db2d41b..301b1167de 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -145,13 +145,14 @@ def __init__( raise RuntimeError( f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!" ) - self.sel_a = sel - self.rcut_r = rcut + self.sel_a = sel # [46(O), 92(H)] + self.rcut_r = rcut # 6.0 + # NOTE: register 'rcut' in buffer to be accessed in inference self.register_buffer("buffer_rcut", paddle.to_tensor(rcut, dtype="float64")) - self.rcut_r_smth = rcut_smth - self.filter_neuron = neuron - self.n_axis_neuron = axis_neuron - self.filter_resnet_dt = resnet_dt + self.rcut_r_smth = rcut_smth # 0.5 + self.filter_neuron = neuron # [25, 50, 100] + self.n_axis_neuron = axis_neuron # 16 + self.filter_resnet_dt = resnet_dt # False self.seed = seed self.uniform_seed = uniform_seed self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron) @@ -159,15 +160,15 @@ def __init__( self.compress_activation_fn = get_activation_func(activation_function) self.filter_activation_fn = get_activation_func(activation_function) self.filter_precision = get_precision(precision) - self.exclude_types = set() + self.exclude_types = set() # empty for tt in exclude_types: assert len(tt) == 2 self.exclude_types.add((tt[0], tt[1])) self.exclude_types.add((tt[1], tt[0])) - self.set_davg_zero = set_davg_zero - self.type_one_side = type_one_side + self.set_davg_zero = set_davg_zero # False + # self.type_one_side = type_one_side # False self.type_one_side = False - self.spin = spin + self.spin = spin # None # extend sel_a for spin system if self.spin is not None: @@ -176,28 +177,52 @@ def __init__( self.sel_a.extend(self.sel_a_spin) else: self.ntypes_spin = 0 - self.register_buffer("buffer_ntypes_spin", paddle.to_tensor(self.ntypes_spin)) + # NOTE: register 'ntypes_spin' in buffer to be accessed in inference + self.register_buffer( + "buffer_ntypes_spin", paddle.to_tensor(self.ntypes_spin, dtype="int32") + ) # descrpt config - self.sel_r = [0 for ii in range(len(self.sel_a))] - self.ntypes = len(self.sel_a) - self.register_buffer("buffer_ntypes", paddle.to_tensor(self.ntypes)) + self.sel_r = [0 for ii in range(len(self.sel_a))] # [0, 0] + self.ntypes = len(self.sel_a) # 2 + # NOTE: register 'ntypes' in buffer to be accessed in inference + self.register_buffer( + "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int32") + ) assert self.ntypes == len(self.sel_r) self.rcut_a = -1 # numb of neighbors and numb of descrptors - self.nnei_a = np.cumsum(self.sel_a)[-1] - self.nnei_r = np.cumsum(self.sel_r)[-1] - self.nnei = self.nnei_a + self.nnei_r - self.ndescrpt_a = self.nnei_a * 4 - self.ndescrpt_r = self.nnei_r * 1 - self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r + self.nnei_a = np.cumsum(self.sel_a)[-1] # 138 邻域内原子个数 + self.nnei_r = np.cumsum(self.sel_r)[-1] # 0 + self.nnei = self.nnei_a + self.nnei_r # 138 + self.ndescrpt_a = self.nnei_a * 4 # 552 原子个数*4([s, s/x, s/y, s/z]) + self.ndescrpt_r = self.nnei_r * 1 # 0 + self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r # 552 self.useBN = False self.dstd = None self.davg = None - - self.avg_zero = paddle.zeros([self.ntypes, self.ndescrpt], dtype="float32") - self.std_ones = paddle.ones([self.ntypes, self.ndescrpt], dtype="float32") + # self.compress = False + # self.embedding_net_variables = None + # self.mixed_prec = None + # self.place_holders = {} + # self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) + """ + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1]) + """ + self.avg_zero = paddle.zeros( + [self.ntypes, self.ndescrpt], dtype="float32" + ) # [2, 552] + self.std_ones = paddle.ones( + [self.ntypes, self.ndescrpt], dtype="float32" + ) # [2, 552] nets = [] + # self._pass_filter => self._filter => self._filter_lower for type_input in range(self.ntypes): layer = [] for type_i in range(self.ntypes): @@ -601,14 +626,6 @@ def forward( # op_descriptor = ( # build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a # ) - # print(coord.dtype) # paddle.float64 - # print(atype.dtype) # paddle.int32 - # print(box.dtype) # paddle.float64 - # print(mesh.dtype) # paddle.int32 - # print(self.t_avg.dtype) # paddle.float32 - # print(self.t_std.dtype) # paddle.float32 - # print(natoms) - # exit() ( self.descrpt, self.descrpt_deriv, @@ -628,38 +645,6 @@ def forward( sel_a=self.sel_a, sel_r=self.sel_r, ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_descrpt", - # self.descrpt, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_descrpt_deriv", - # self.descrpt_deriv, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_rij", - # self.rij, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", - # self.nlist, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", - # self.nlist, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_nlist", - # self.nlist, - # ) - # exit() - # self.descrpt.shape = [1, 105984] # only used when tensorboard was set as true # tf.summary.histogram("descrpt", self.descrpt) # tf.summary.histogram("rij", self.rij) @@ -676,24 +661,10 @@ def forward( suffix=suffix, reuse=reuse, trainable=self.trainable, - ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_dout", - # self.dout, - # ) - # np.save( - # "/workspace/hesensen/deepmd_backend/" - # "deepmd-kit/examples/water/se_e2_a/align_input/pred_qmat", - # self.qmat, - # ) - # exit() + ) # [1, all_atom, M1*M2], output_qmat: [1, all_atom, M1*3] # only used when tensorboard was set as true # tf.summary.histogram("embedding_net_output", self.dout) - # print(self.dout.shape) - # np.save(f"/workspace/hesensen/deepmd_backend/infer_align/dout_pd.npy", self.dout) - # exit() return self.dout def get_rot_mat(self) -> paddle.Tensor: @@ -756,23 +727,35 @@ def prod_force_virial( def _pass_filter( self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True ): + """pass_filter. + + Args: + inputs (_type_): _description_ + atype (_type_): _description_ + natoms (_type_): _description_ + input_dict (_type_): _description_ + reuse (_type_, optional): _description_. Defaults to None. + suffix (str, optional): _description_. Defaults to "". + trainable (bool, optional): _description_. Defaults to True. + + Returns: + Tuple[Tensor, Tensor]: output: [1, all_atom, M1*M2], output_qmat: [1, all_atom, M1*3] + """ # natoms = [192, 192, 64 , 128] if input_dict is not None: type_embedding = input_dict.get("type_embedding", None) else: type_embedding = None start_index = 0 - # print(inputs.shape) # [192, 552] + # print(inputs.shape) # [192, 552(nnei*4)],每个原子和它周围nnei个原子的R矩阵(展平后) inputs = paddle.reshape(inputs, [-1, int(natoms[0].item()), int(self.ndescrpt)]) - # print(inputs.shape) # [1, 192, 552] - # exit() output = [] output_qmat = [] # print(self.type_one_side, type_embedding) # exit() if not self.type_one_side and type_embedding is None: - # print("here", self.ntypes) for type_i in range(self.ntypes): + # 按不同原子类型进行处理 inputs_i = paddle.slice( inputs, [0, 1, 2], @@ -782,10 +765,12 @@ def _pass_filter( start_index + natoms[2 + type_i], inputs.shape[2], ], - ) # [1, 192, 552] --> [1, 64, 552] - inputs_i = paddle.reshape(inputs_i, [-1, self.ndescrpt]) # [64, 552] + ) # [1, 某种类型原子个数64/128, 552] + inputs_i = paddle.reshape( + inputs_i, [-1, self.ndescrpt] + ) # [某种类型原子个数64/128, 552] filter_name = "filter_type_" + str(type_i) + suffix - layer, qmat = self._filter( + layer, qmat = self._filter( # 计算某个类型的原子的 result 和 qmat inputs_i, type_i, name=filter_name, @@ -793,10 +778,10 @@ def _pass_filter( reuse=reuse, trainable=trainable, activation_fn=self.filter_activation_fn, - ) + ) # [natom, M1*M2], qmat: [natom, M1, 3] layer = paddle.reshape( layer, [inputs.shape[0], natoms[2 + type_i], self.get_dim_out()] - ) + ) # [1, 某种类型原子个数64/128, M1*M2] qmat = paddle.reshape( qmat, [ @@ -804,20 +789,22 @@ def _pass_filter( natoms[2 + type_i], self.get_dim_rot_mat_1() * 3, ], - ) + ) # [1, 某种类型原子个数64/128, 100*3] output.append(layer) output_qmat.append(qmat) start_index += natoms[2 + type_i] else: ... + # This branch will not be excecuted at current # inputs_i = inputs # inputs_i = paddle.reshape(inputs_i, [-1, self.ndescrpt]) # type_i = -1 - # if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: - # inputs_i = descrpt2r4(inputs_i, natoms) + # # if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: + # # inputs_i = descrpt2r4(inputs_i, natoms) # if len(self.exclude_types): # atype_nloc = paddle.reshape( - # paddle.slice(atype, [0, 0], [-1, natoms[0]]), [-1] + # paddle.slice(atype, [0, 1], [0, 0], [atype.shape[0], natoms[0]]), + # [-1], # ) # when nloc != nall, pass nloc to mask # mask = self.build_type_exclude_mask( # self.exclude_types, @@ -847,9 +834,10 @@ def _pass_filter( # ) # output.append(layer) # output_qmat.append(qmat) - # print(f"len(output) = {len(output)}") output = paddle.concat(output, axis=1) output_qmat = paddle.concat(output_qmat, axis=1) + # output: [1, 192, M1*M2] + # output_qmat: [1, 192, M1*3] return output, output_qmat def _compute_dstats_sys_smth( @@ -875,15 +863,6 @@ def _compute_dstats_sys_smth( ) input_dict["default_mesh"] = paddle.to_tensor(mesh, dtype="int32") - # print(input_dict["coord"].dtype) # fp64 - # print(input_dict["type"].dtype) # int32 - # print(input_dict["natoms_vec"].dtype) # int32 - # print(input_dict["box"].dtype) # fp64 - # print(input_dict["default_mesh"].dtype) # int32 - # print(self.avg_zero) - # print(self.std_ones) - # print(self.sel_a) - # print(self.sel_r) self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a( input_dict["coord"], # fp32 input_dict["type"], # int32 @@ -993,65 +972,47 @@ def _concat_type_embedding( def _filter_lower( self, - type_i, - type_input, - start_index, - incrs_index, - inputs, - nframes, - natoms, + type_i: int, # inner-loop + type_input: int, # outer-loop + start_index: int, + incrs_index: int, + inputs: paddle.Tensor, # [1, 原子个数(64或128), 552(embedding_dim)] + nframes: int, + natoms: int, type_embedding=None, is_exclude=False, - activation_fn=None, - bavg=0.0, - stddev=1.0, - trainable=True, - suffix="", + # activation_fn=None, + # bavg=0.0, + # stddev=1.0, + # trainable=True, + # suffix="", ): """Input env matrix, returns R.G.""" outputs_size = [1] + self.filter_neuron # cut-out inputs # with natom x (nei_type_i x 4) - # if not hasattr(self, "debug_inputs"): - # self.debug_inputs = inputs - # paddle.save(self.debug_inputs, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs.pddata") - # print(__file__, "inputs.shape", inputs.shape) - inputs_i = paddle.slice( inputs, [0, 1], [0, start_index * 4], [inputs.shape[0], start_index * 4 + incrs_index * 4], - ) - # if not hasattr(self, "debug_inputs_i"): - # self.debug_inputs_i = inputs_i - # paddle.save(self.debug_inputs_i, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs_i.pddata") - # print(__file__, "inputs_i.shape", inputs_i.shape) + ) # 得到某个类型的原子i对邻域内类型为j的的原子关系,取出二者之间的描述矩阵R natom x nei_type_i x 4 shape_i = inputs_i.shape natom = inputs_i.shape[0] # with (natom x nei_type_i) x 4 inputs_reshape = paddle.reshape(inputs_i, [-1, 4]) - # if not hasattr(self, "debug_inputs_reshape"): - # self.debug_inputs_reshape = inputs_reshape - # paddle.save(self.debug_inputs_reshape, "/workspace/hesensen/deepmd_backend/small_case/debug_inputs_reshape.pddata") - # print(__file__, "inputs_reshape.shape", inputs_reshape.shape) - # with (natom x nei_type_i) x 1 xyz_scatter = paddle.reshape( paddle.slice(inputs_reshape, [0, 1], [0, 0], [inputs_reshape.shape[0], 1]), [-1, 1], - ) - # if not hasattr(self, "debug_xyz_scatter"): - # self.debug_xyz_scatter = xyz_scatter - # paddle.save(self.debug_xyz_scatter, "/workspace/hesensen/deepmd_backend/small_case/debug_xyz_scatter.pddata") - # print(__file__, "xyz_scatter.shape", xyz_scatter.shape) + ) # 得到某个类型的原子i对邻域内类型为j的的原子关系,取出二者之间的描述矩阵R矩阵的第一列s(rij) if type_embedding is not None: xyz_scatter = self._concat_type_embedding( xyz_scatter, nframes, natoms, type_embedding - ) + ) # if self.compress: raise RuntimeError( "compression of type embedded descriptor is not supported at the moment" @@ -1098,35 +1059,10 @@ def _filter_lower( ) else: if not is_exclude: - # with (natom x nei_type_i) x out_size - # if not hasattr(self, "xyz_scatter_input"): - # self.debug_xyz_scatter_input = xyz_scatter - # paddle.save(self.xyz_scatter_input, "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0_input.pddata") - # paddle.save(self.embedding_nets[type_input][type_i].state_dict(), "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0.pdparams") - # print(__file__, "saved") - xyz_scatter_out = self.embedding_nets[type_input][type_i](xyz_scatter) - # print(__file__, "xyz_scatter.shape", xyz_scatter.shape) - # if not hasattr(self, "xyz_scatter_output"): - # self.debug_xyz_scatter_output = xyz_scatter_out - # paddle.save(self.xyz_scatter_output, "/workspace/hesensen/deepmd_backend/small_case/embd_net_0_0_output.pddata") - # print(__file__, "saved") - - # xyz_scatter = embedding_net( - # xyz_scatter, - # self.filter_neuron, - # self.filter_precision, - # activation_fn=activation_fn, - # resnet_dt=self.filter_resnet_dt, - # name_suffix=suffix, - # stddev=stddev, - # bavg=bavg, - # seed=self.seed, - # trainable=trainable, - # uniform_seed=self.uniform_seed, - # initial_variables=self.embedding_net_variables, - # mixed_prec=self.mixed_prec, - # ) - # xyz_scatter = paddle.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1])) + # excuted this branch + xyz_scatter_out = self.embedding_nets[type_input][type_i]( + xyz_scatter + ) # 对 s(rij) 进行embedding映射, (natom x nei_type_i) x 1==>(natom x nei_type_i) x 100,得到每个原子i对邻域内类型为j的的原子特征,所有该类型的原子的g_i的concat if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift else: @@ -1138,24 +1074,32 @@ def _filter_lower( # natom x nei_type_i x out_size xyz_scatter_out = paddle.reshape( xyz_scatter_out, (-1, shape_i[1] // 4, outputs_size[-1]) - ) + ) # (natom x nei_type_i) x 100 ==> natom x nei_type_i x 100 # When using paddle.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below # [588 24] -> [588 6 4] correct # but if sel is zero # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4] # So we need to explicitly assign the shape to paddle.shape(inputs_i)[0] instead of -1 # natom x 4 x outputs_size + + # [natom, nei_type_i, 4].T x [natom, nei_type_i, 100] + # 等价于 + # [natom, 4, nei_type_i] x [natom, nei_type_i, 100] + # ==> + # [natom, 4, 100] return paddle.matmul( - paddle.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), - xyz_scatter_out, + paddle.reshape( + inputs_i, [natom, shape_i[1] // 4, 4] + ), # [natom, nei_type_i, 4] + xyz_scatter_out, # [natom, nei_type_i, 100] transpose_x=True, - ) + ) # 得到(R_i).T*g_i,即D_i表达式的右半部分 # @cast_precision def _filter( self, - inputs, - type_input, + inputs: paddle.Tensor, # [1, 原子个数(64或128), 552(nnei*4)] + type_input: int, natoms, type_embedding=None, activation_fn=paddle.nn.functional.tanh, @@ -1165,19 +1109,40 @@ def _filter( reuse=None, trainable=True, ): + """_filter + + Args: + inputs (paddle.Tensor): _description_ + natoms (_type_): _description_ + type_embedding (_type_, optional): _description_. Defaults to None. + activation_fn (_type_, optional): _description_. Defaults to paddle.nn.functional.tanh. + stddev (float, optional): _description_. Defaults to 1.0. + bavg (float, optional): _description_. Defaults to 0.0. + name (str, optional): _description_. Defaults to "linear". + reuse (_type_, optional): _description_. Defaults to None. + trainable (bool, optional): _description_. Defaults to True. + + Returns: + Tuple[Tensor, Tensor]: result: [64/128, M1*M2], qmat: [64/128, M1, 3] + """ # nframes = paddle.shape(paddle.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0] + # 上述 nframes的计算代码是错误的,reshape前后numel根本不相等,会导致程序报错,tf不会报错是因为tf计算图 + # 检测到这个变量后续不会被真正使用到,所以自动进行了优化。 + # nframes由于没有被使用到,所以这段代码没有被执行,所以tf实际运行是没有报错。 + # 复现报错很简单,只需要把这个nframes run出来,会导致这段代码被执行,然后报错。 + + # 给 nframes 设置一个无用值 1 即可 nframes = 1 # natom x (nei x 4) shape = inputs.shape outputs_size = [1] + self.filter_neuron - outputs_size_2 = self.n_axis_neuron + outputs_size_2 = self.n_axis_neuron # 16 all_excluded = all( [ - (type_input, type_i) in self.exclude_types + (type_input, type_i) in self.exclude_types # set() for type_i in range(self.ntypes) ] - ) - # print(__file__, all_excluded) + ) # False if all_excluded: # all types are excluded so result and qmat should be zeros # we can safaly return a zero matrix... @@ -1201,29 +1166,33 @@ def _filter( # natom x 4 x outputs_size if type_embedding is None: rets = [] + # execute this branch for type_i in range(self.ntypes): + # 计算type_input和type_i的原子之间的特征 ret = self._filter_lower( type_i, type_input, start_index, - self.sel_a[type_i], - inputs, + self.sel_a[type_i], # 46(O)/92(H) + inputs, # [1, 原子个数(64或128), 552(nnei*4)] nframes, natoms, type_embedding=type_embedding, is_exclude=(type_input, type_i) in self.exclude_types, - activation_fn=activation_fn, - stddev=stddev, - bavg=bavg, - trainable=trainable, - suffix="_" + str(type_i), - ) + # activation_fn=activation_fn, + # stddev=stddev, + # bavg=bavg, + # trainable=trainable, + # suffix="_" + str(type_i), + ) # ==> [natom_i, 4, 100] if (type_input, type_i) not in self.exclude_types: # add zero is meaningless; skip rets.append(ret) start_index += self.sel_a[type_i] # faster to use accumulate_n than multiple add - xyz_scatter_1 = paddle.add_n(rets) + xyz_scatter_1 = paddle.add_n( + rets + ) # 得到所有(R_i).T*g_i: [当前类型原子个数64/128, 4, embedding维度M1] else: xyz_scatter_1 = self._filter_lower( type_i, @@ -1235,10 +1204,10 @@ def _filter( natoms, type_embedding=type_embedding, is_exclude=False, - activation_fn=activation_fn, - stddev=stddev, - bavg=bavg, - trainable=trainable, + # activation_fn=activation_fn, + # stddev=stddev, + # bavg=bavg, + # trainable=trainable, ) # if nvnmd_cfg.enable: # return filter_GR2D(xyz_scatter_1) @@ -1260,15 +1229,18 @@ def _filter( ), self.filter_precision, ) - xyz_scatter_1 = xyz_scatter_1 / nnei + xyz_scatter_1 = ( + xyz_scatter_1 / nnei + ) # (R_i).T*g_i: [当前类型原子个数64/128, 4, embedding维度M1] # natom x 4 x outputs_size_2 xyz_scatter_2 = paddle.slice( xyz_scatter_1, [0, 1, 2], [0, 0, 0], [xyz_scatter_1.shape[0], xyz_scatter_1.shape[1], outputs_size_2], - ) - # # natom x 3 x outputs_size_2 + ) # [当前类型原子个数, R矩阵描述特征数4, 隐层特征数里的前16维特征(M2)], [64, 4, 16] + # (g_i<).T*(R_i): [当前类型原子个数64/128, 4, embedding前M2列] + # natom x 3 x outputs_size_2 # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1]) # natom x 3 x outputs_size_1 qmat = paddle.slice( @@ -1278,12 +1250,18 @@ def _filter( [xyz_scatter_1.shape[0], 1 + 3, xyz_scatter_1.shape[2]], ) # natom x outputs_size_1 x 3 - qmat = paddle.transpose(qmat, perm=[0, 2, 1]) + qmat = paddle.transpose(qmat, perm=[0, 2, 1]) # [64/128, M1, 3] # natom x outputs_size x outputs_size_2 - result = paddle.matmul(xyz_scatter_1, xyz_scatter_2, transpose_x=True) + result = paddle.matmul( + xyz_scatter_1, xyz_scatter_2, transpose_x=True + ) # [64/128,M1,4]x[64/128,4,M2]==>[64/128,M1,M2] # natom x (outputs_size x outputs_size_2) - result = paddle.reshape(result, [-1, outputs_size_2 * outputs_size[-1]]) + result = paddle.reshape( + result, [-1, outputs_size_2 * outputs_size[-1]] + ) # [64,M1*M2] + # result: [64/128, M1*M2] + # qmat: [64/128, M1, 3] return result, qmat def init_variables( diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py index 3582c3eb96..d82e464c8a 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/entrypoints/freeze.py @@ -338,40 +338,6 @@ def freeze_graph( out_suffix : str The chosen suffix to freeze in the input_graph. """ - # output_node = _make_node_names( - # freeze_type, modifier, out_suffix=out_suffix, node_names=node_names - # ) - # different_set = set(output_node) - set(input_node) - # if different_set: - # log.warning( - # "The following nodes are not in the graph: %s. " - # "Skip freezeing these nodes. You may be freezing " - # "a checkpoint generated by an old version." % different_set - # ) - # # use intersection as output list - # output_node = list(set(output_node) & set(input_node)) - # log.info(f"The following nodes will be frozen: {output_node}") - # # We use a built-in TF helper to export variables to constants - # output_graph_def = tf.graph_util.convert_variables_to_constants( - # sess, # The session is used to retrieve the weights - # input_graph, # The graph_def is used to retrieve the nodes - # output_node, # The output node names are used to select the usefull nodes - # ) - # # if multi-task, change fitting_net suffix and model_type - # if out_suffix != "": - # output_graph_def = _modify_model_suffix( - # output_graph_def, out_suffix, freeze_type - # ) - - # # If we need to transfer the fitting net variables - # output_graph_def = _transfer_fitting_net_trainable_variables( - # sess, output_graph_def, input_graph - # ) - - # # Finally we serialize and dump the output graph to the filesystem - # with tf.gfile.GFile(out_graph_name, "wb") as f: - # f.write(output_graph_def.SerializeToString()) - # log.info(f"{len(output_graph_def.node):d} ops in the final graph.") import paddle from deepmd.infer import DeepPot @@ -381,15 +347,6 @@ def freeze_graph( load_prefix="load", default_tf_graph=False, ) - # print(dp.model.descrpt.embedding_nets[0][0].weight[0]) - # for w in dp.model.descrpt.embedding_nets[0][0].weight: - # print(f"w {w.shape} {w.mean().item()} {w.var().item()}") - # print("从state_dict打印载入的参数") - # for k, v in dp.model.state_dict().items(): - # print(f"{k} {v.shape} {v.dtype} {v.mean().item()} {v.var().item()}") - # exit() - # for b in dp.model.descrpt.embedding_nets[0][0].bias: - # print(f"b {b.shape} {b.mean().item()} {b.var().item()}") dp.model.eval() from paddle.static import InputSpec @@ -424,21 +381,25 @@ def freeze_graph( False, ], ) - print(f"st_model.descrpt.buffer_rcut.name = {st_model.descrpt.buffer_rcut.name}") - print( - f"st_model.descrpt.buffer_ntypes.name = {st_model.descrpt.buffer_ntypes.name}" - ) - print( - f"st_model.fitting.buffer_dfparam.name = {st_model.fitting.buffer_dfparam.name}" - ) - print( - f"st_model.fitting.buffer_daparam.name = {st_model.fitting.buffer_daparam.name}" - ) + for name, param in st_model.named_buffers(): + print( + f"[{name}, {param.shape}] generated name in static_model is: {param.name}" + ) + # print(f"st_model.descrpt.buffer_rcut.name = {st_model.descrpt.buffer_rcut.name}") + # print( + # f"st_model.descrpt.buffer_ntypes.name = {st_model.descrpt.buffer_ntypes.name}" + # ) + # print( + # f"st_model.fitting.buffer_dfparam.name = {st_model.fitting.buffer_dfparam.name}" + # ) + # print( + # f"st_model.fitting.buffer_daparam.name = {st_model.fitting.buffer_daparam.name}" + # ) # 跳过对program的裁剪,从而保留rcut、ntypes等不参与前向的参数,从而在C++端可以获取这些参数 skip_prune_program = True - print(f"==>> skip_prune_program = {skip_prune_program}") + print(f"==>> Set skip_prune_program = {skip_prune_program}") paddle.jit.save(st_model, output, skip_prune_program=skip_prune_program) - print(f"Saved to path: {output}") + print(f"Infernece model has been saved to: {output}") def freeze_graph_multi( diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py index a13ddec13d..f41c39b21e 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/fit/ener.py @@ -125,7 +125,7 @@ def __init__( """Constructor.""" # model param self.ntypes = descrpt.get_ntypes() - self.dim_descrpt = descrpt.get_dim_out() + self.dim_descrpt = descrpt.get_dim_out() # M1*M2 self.use_aparam_as_mask = use_aparam_as_mask # args = ()\ # .add('numb_fparam', int, default = 0)\ @@ -255,11 +255,6 @@ def __init__( ) ) - # print("create bias_atom_e", self.bias_atom_e.shape, self.bias_atom_e) - # self.register_buffer( - # "t_bias_atom_e", - # paddle.to_tensor(self.bias_atom_e), - # ) if self.numb_fparam > 0: if self.fparam_avg is None: self.fparam_avg = 0.0 @@ -316,10 +311,6 @@ def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None all_stat, rcond=self.rcond, mixed_type=mixed_type ) paddle.assign(self.bias_atom_e, self.t_bias_atom_e) - # self.register_buffer( - # "t_bias_atom_e", - # paddle.to_tensor(self.bias_atom_e), - # ) def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False): data = all_stat["energy"] @@ -439,7 +430,7 @@ def _build_lower( [0, start_index, 0], [inputs.shape[0], start_index + natoms, inputs.shape[2]], ) - inputs_i = paddle.reshape(inputs_i, [-1, self.dim_descrpt]) + inputs_i = paddle.reshape(inputs_i, [-1, self.dim_descrpt]) # [natoms, M1*M2] layer = inputs_i if fparam is not None: ext_fparam = paddle.tile(fparam, [1, natoms]) @@ -504,7 +495,7 @@ def _build_lower( if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift - return final_layer + return final_layer # [natoms, 1] def forward( self, @@ -519,7 +510,7 @@ def forward( Parameters ---------- inputs - The input descriptor + The input descriptor, [1, all_atoms, M1*M2] input_dict Additional dict for inputs. if numb_fparam > 0, should have input_dict['fparam'] @@ -575,7 +566,9 @@ def forward( self.bias_atom_e[type_i] = self.bias_atom_e[type_i] self.bias_atom_e = self.bias_atom_e[:ntypes_atom] - inputs = paddle.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) + inputs = paddle.reshape( + inputs, [-1, natoms[0], self.dim_descrpt] + ) # [1, all_atoms, M1*M2] if len(self.atom_ener): # only for atom_ener nframes = input_dict.get("nframes") @@ -680,7 +673,7 @@ def forward( final_layer -= zero_layer final_layer = paddle.reshape( final_layer, [paddle.shape(inputs)[0], natoms[2 + type_i]] - ) + ) # [1, natoms] outs_list.append(final_layer) start_index += natoms[2 + type_i] # concat the results @@ -727,9 +720,7 @@ def forward( ), [paddle.shape(inputs)[0], paddle.sum(natoms[2 : 2 + ntypes_atom]).item()], ) - # print(__file__, self.t_bias_atom_e) - # exit() - outs = outs + self.add_type + outs = outs + self.add_type # 类型编码(类似于transformer的位置编码,每种类型自己有一个特征,加到原特征上) outs *= atype_filter self.atom_ener_after = outs @@ -747,7 +738,7 @@ def forward( ) outs = outs - outs_mean outs = paddle.reshape(outs, [-1]) - return paddle.reshape(outs, [-1]) + return paddle.reshape(outs, [-1]) # [all_atoms] def init_variables( self, diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index 7b0b0d5536..7b3b3fa75f 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -13,6 +13,7 @@ from deepmd.common import expand_sys_str from deepmd.common import j_loader from deepmd.common import j_must_have +from deepmd.descriptor import DescrptSeA from deepmd.env import MODEL_VERSION from deepmd.env import default_tf_session_config from deepmd.env import paddle @@ -54,12 +55,24 @@ def __init__( model_param = j_must_have(jdata, "model") descrpt_param = j_must_have(model_param, "descriptor") - from deepmd.descriptor import DescrptSeA + explicit_ntypes_descrpt = ["se_atten"] + # hybrid_with_tebd = False + if descrpt_param["type"] in explicit_ntypes_descrpt: + descrpt_param["ntypes"] = len(model_param["type_map"]) + elif descrpt_param["type"] == "hybrid": + for descrpt_item in descrpt_param["list"]: + if descrpt_item["type"] in explicit_ntypes_descrpt: + descrpt_item["ntypes"] = len(model_param["type_map"]) + # hybrid_with_tebd = True + + # if descrpt_param["type"] in ["se_e2_a", "se_a", "se_e2_r", "se_r", "hybrid"]: + descrpt_param["spin"] = None + descrpt_param["type_one_side"] = False descrpt_param.pop("type", None) descrpt_param.pop("_comment", None) self.spin = None - descrpt_param["spin"] = self.spin + # descrpt_param["spin"] = self.spin self.descrpt = DescrptSeA(**descrpt_param) self.multi_task_mode = "fitting_net_dict" in model_param @@ -90,28 +103,32 @@ def __init__( model_param.get("sw_rmax"), self.spin, ) - load_state_dict = paddle.load(str(model_file)) - for k, v in load_state_dict.items(): - if k in self.model.state_dict(): - if load_state_dict[k].dtype != self.model.state_dict()[k].dtype: - print( - f"convert {k}'s dtype from {load_state_dict[k].dtype} to {self.model.state_dict()[k].dtype}" - ) - load_state_dict[k] = load_state_dict[k].astype( - self.model.state_dict()[k].dtype - ) - if list(load_state_dict[k].shape) != list( - self.model.state_dict()[k].shape - ): - print( - f"convert {k}'s shape from {load_state_dict[k].shape} to {self.model.state_dict()[k].shape}" - ) - load_state_dict[k] = load_state_dict[k].reshape( + model_file_str = str(model_file) + if model_file_str.endswith((".pdmodel", ".pdiparams")): + st_model_prefix = model_file_str.rsplit(".", 1)[0] + self.st_model = paddle.jit.load(st_model_prefix) + else: + load_state_dict = paddle.load(str(model_file)) + for k, v in load_state_dict.items(): + if k in self.model.state_dict(): + if load_state_dict[k].dtype != self.model.state_dict()[k].dtype: + print( + f"convert {k}'s dtype from {load_state_dict[k].dtype} to {self.model.state_dict()[k].dtype}" + ) + load_state_dict[k] = load_state_dict[k].astype( + self.model.state_dict()[k].dtype + ) + if list(load_state_dict[k].shape) != list( self.model.state_dict()[k].shape - ) - # print(f"==>> Load pretraied model successfully from: {str(model_file)}") - # exit() - self.model.set_state_dict(load_state_dict) + ): + print( + f"convert {k}'s shape from {load_state_dict[k].shape} to {self.model.state_dict()[k].shape}" + ) + load_state_dict[k] = load_state_dict[k].reshape( + self.model.state_dict()[k].shape + ) + self.model.set_state_dict(load_state_dict) + print(f"==>> Load pretraied model successfully from: {str(model_file)}") self.load_prefix = load_prefix # graph_compatable should be called after graph and prefix are set diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py index b0ade1fc1a..1c91059944 100644 --- a/deepmd/infer/deep_pot.py +++ b/deepmd/infer/deep_pot.py @@ -202,40 +202,6 @@ def __init__( # ewald_beta=ewald_beta, # ) - # NOTE: 使用静态图模型推理 - if not hasattr(self, "st_model"): - self.st_model = paddle.jit.load( - "/workspace/hesensen/deepmd_backend/deepmd-kit/examples/water/se_e2_a/Model_1000000" - ) - # for k, v in self.st_model.named_parameters(): - # print(f"{k} {v.shape} {v.mean().item()} {v.var().item()}") - # """ - # param_0 [1, 25] 0.9498768667019655 0.7340928425051493 - # param_1 [1, 50] 1.1214760345730344 0.9621536430386503 - # param_2 [1, 100] 1.168418946306086 1.0411743399117217 - # param_3 [1, 25] 0.002546645920014433 0.27806176560439083 - # param_4 [25, 50] -0.015372691466039676 0.10679961485782502 - # param_5 [50, 100] -0.0010681208730640539 0.09950205346985407 - # param_6 [1, 25] 1.0639599744616117 0.917256936729768 - # param_7 [1, 50] 1.142691803888668 0.9639366693005659 - # param_8 [1, 100] 1.1471394365452061 1.0091294911290036 - # param_9 [1, 25] 0.019013792716200625 0.1450311660373793 - # param_10 [25, 50] -0.006747145320748169 0.028971429954693633 - # param_11 [50, 100] -0.03750622755877242 0.04714041793007081 - # param_12 [1, 25] 1.0380588819220322 0.8904020425094114 - # param_13 [1, 50] 1.1245407895732316 0.9234643810098301 - # param_14 [1, 100] 1.1430567514092813 0.9876968977916372 - # param_15 [1, 25] 0.03272738992064966 0.1751917732380509 - # param_16 [25, 50] -0.017871745658352124 0.0384813911462805 - # param_17 [50, 100] -0.07345191324160481 0.1768254187693918 - # param_18 [1, 25] 1.0147830400771964 0.9070964180637516 - # param_19 [1, 50] 1.1198266551333698 1.034746190888665 - # param_20 [1, 100] 1.1410748813679754 1.0428001731414345 - # param_21 [1, 25] -0.022862385119536602 0.18038150422614693 - # param_22 [25, 50] -0.024970130750642985 0.07176423978220656 - # param_23 [50, 100] -0.012309303874398866 0.07227932085917015 - # """ - def _run_default_sess(self): if self.has_spin is True: [ @@ -611,7 +577,6 @@ def _eval_inner( # print(eval_inputs['type'].shape) # [960] # print(eval_inputs['natoms_vec'].shape) # [4] # print(eval_inputs['box'].shape) # [45] - # exit() if self.has_fparam: eval_inputs["fparam"] = paddle.to_tensor( @@ -629,6 +594,7 @@ def _eval_inner( # eval_inputs['default_mesh'] = paddle.to_tensor(np.array([], dtype = np.int32)) if hasattr(self, "st_model"): + # NOTE: 使用静态图模型推理 eval_outputs = self.st_model( eval_inputs["coord"], # [2880] paddle.float64 eval_inputs["type"], # [960] paddle.int32 @@ -636,17 +602,6 @@ def _eval_inner( eval_inputs["box"], # [45] paddle.float64 eval_inputs["default_mesh"], # [6] paddle.int32 ) - # print(eval_inputs["coord"].shape) - # print(eval_inputs["type"].shape) - # print(eval_inputs["natoms_vec"].shape) - # print(eval_inputs["box"].shape) - # print(eval_inputs["default_mesh"].shape) - # np.save("/workspace/hesensen/deepmd_backend/python_infer_data/coord.npy", eval_inputs["coord"].numpy()) - # np.save("/workspace/hesensen/deepmd_backend/python_infer_data/type.npy", eval_inputs["type"].numpy()) - # np.save("/workspace/hesensen/deepmd_backend/python_infer_data/natoms_vec.npy", eval_inputs["natoms_vec"].numpy()) - # np.save("/workspace/hesensen/deepmd_backend/python_infer_data/box.npy", eval_inputs["box"].numpy()) - # np.save("/workspace/hesensen/deepmd_backend/python_infer_data/default_mesh.npy", eval_inputs["default_mesh"].numpy()) - # exit() eval_outputs = { "atom_ener": eval_outputs[0], "atom_virial": eval_outputs[1], @@ -655,25 +610,9 @@ def _eval_inner( "energy": eval_outputs[4], "force": eval_outputs[5], "virial": eval_outputs[6], - # "z00_hidden1": eval_outputs[7], - # "z00_hidden2": eval_outputs[8], - # "z00_hidden3": eval_outputs[9], - # "z00_xx1": eval_outputs[7], - # "z00_xx2": eval_outputs[8], - # "z00_xx3": eval_outputs[9], - # "z00_xx4": eval_outputs[10], - # "weight_0": eval_outputs[7], - # "bias_0": eval_outputs[8], - # "xx1": eval_outputs[9], - # "hidden1": eval_outputs[10], } - - # for k, v in eval_outputs.items(): - # print(k, v.shape) - # np.save(f"/workspace/hesensen/deepmd_backend/python_infer_data/st_model_{k}.npy", v.numpy()) - # print(f"finished save {k}") - # exit() else: + # NOTE: 使用动态图模型推理 eval_outputs = self.model( eval_inputs["coord"], # [2880] paddle.float64 eval_inputs["type"], # [960] paddle.int32 @@ -681,11 +620,6 @@ def _eval_inner( eval_inputs["box"], # [45] paddle.float64 eval_inputs["default_mesh"], # [6] paddle.int32 eval_inputs, - # eval_inputs.coord: [2880] paddle.float64 - # eval_inputs.type: [960] paddle.int32 - # eval_inputs.natoms_vec: [4] paddle.int32 - # eval_inputs.box: [45] paddle.float64 - # eval_inputs.default_mesh: [6] paddle.int32 suffix="", reuse=False, ) diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py index 0c4e890e01..436fd3bcdb 100644 --- a/deepmd/model/ener.py +++ b/deepmd/model/ener.py @@ -1,3 +1,4 @@ +from typing import TYPE_CHECKING from typing import List from typing import Optional @@ -15,6 +16,9 @@ from .model_stat import make_stat_input from .model_stat import merge_sys_stat +if TYPE_CHECKING: + from deepmd.fit import ener + class EnerModel(Model, paddle.nn.Layer): """Energy model. @@ -47,7 +51,7 @@ class EnerModel(Model, paddle.nn.Layer): def __init__( self, descrpt, - fitting, + fitting: "ener.EnerFitting", typeebd=None, type_map: Optional[List[str]] = None, data_stat_nbatch: int = 10, @@ -211,7 +215,7 @@ def forward( # ckpt_meta=ckpt_meta, suffix=suffix, reuse=reuse, - ) + ) # [1, all_atom, M1*M2] # self.dout = dout # if self.srtab is not None: @@ -256,7 +260,7 @@ def forward( # atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener # energy_raw = tab_atom_ener + atom_ener # else: - energy_raw = atom_ener + energy_raw = atom_ener # [1, all_atoms] nloc_atom = ( natoms[0] @@ -269,6 +273,9 @@ def forward( energy = paddle.sum(energy_raw, axis=1, name="o_energy" + suffix) force, virial, atom_virial = self.descrpt.prod_force_virial(atom_ener, natoms) + # force: [1, all_atoms*3] + # virial: [1, 9] + # force: [1, all_atoms*9] # if self.srtab is not None: # sw_force = op_module.soft_min_force( @@ -276,7 +283,7 @@ def forward( # ) # force = force + sw_force + tab_force - force = paddle.reshape(force, [-1, 3 * natoms[1]]) + force = paddle.reshape(force, [-1, 3 * natoms[1]]) # [1, all_atoms*3] if self.spin is not None: # split and concatenate force to compute local atom force and magnetic force judge = paddle.equal(natoms[0], natoms[1]) @@ -311,41 +318,13 @@ def forward( ) model_dict = {} - model_dict["energy"] = energy # [5] - model_dict["force"] = force # [5, 576] - model_dict["virial"] = virial # [5, 9] - model_dict["atom_ener"] = energy_raw # [5, 192] - model_dict["atom_virial"] = atom_virial # [5, 1728] - model_dict["coord"] = coord # [5, 576] - model_dict["atype"] = atype # [5, 192] - - # model_dict["zdebug1"] = self.descrpt.descrpt - # model_dict["zdebug2"] = self.descrpt.descrpt_deriv - # model_dict["zdebug3"] = self.descrpt.rij - # model_dict["zdebug4"] = self.descrpt.nlist - # model_dict["zdebug5"] = self.descrpt.dout - # model_dict["zdebug6"] = self.descrpt.qmat - # model_dict["zdebug7"] = self.descrpt.xyz_scatter_input - # model_dict["zdebug8"] = self.descrpt.xyz_scatter_output - - # model_dict["zdebug9"] = self.descrpt.debug_inputs - # model_dict["zdebug99"] = self.descrpt.debug_inputs_i - # model_dict["zdebug999"] = self.descrpt.debug_inputs_reshape - # model_dict["zdebug9999"] = self.descrpt.debug_xyz_scatter - # model_dict["zdebug99999"] = self.descrpt.debug_xyz_scatter_input - # model_dict["zdebug999999"] = self.descrpt.debug_xyz_scatter_output - - # model_dict["z00_hidden1"] = self.descrpt.embedding_nets[0][0].hidden1 - # model_dict["z00_hidden2"] = self.descrpt.embedding_nets[0][0].hidden2 - # model_dict["z00_hidden3"] = self.descrpt.embedding_nets[0][0].hidden3 - # model_dict["z00_xx1"] = self.descrpt.embedding_nets[0][0].xx1 - # model_dict["z00_xx2"] = self.descrpt.embedding_nets[0][0].xx2 - # model_dict["z00_xx3"] = self.descrpt.embedding_nets[0][0].xx3 - # model_dict["z00_xx4"] = self.descrpt.embedding_nets[0][0].xx4 - # model_dict["z00_0"] = self.descrpt.embedding_nets[0][0].weight[0] - # model_dict["z00_1"] = self.descrpt.embedding_nets[0][0].bias[0] - # model_dict["z00_2"] = self.descrpt.embedding_nets[0][0].xx1 - # model_dict["z00_3"] = self.descrpt.embedding_nets[0][0].hidden1 + model_dict["energy"] = energy # [batch_size] + model_dict["force"] = force # [batch_size, 576] + model_dict["virial"] = virial # [batch_size, 9] + model_dict["atom_ener"] = energy_raw # [batch_size, 192] + model_dict["atom_virial"] = atom_virial # [batch_size, 1728] + model_dict["coord"] = coord # [batch_size, 576] + model_dict["atype"] = atype # [batch_size, 192] return model_dict def init_variables( diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 04e159d55e..86b26b996a 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -126,6 +126,8 @@ def _init_param(self, jdata): if descrpt_param["type"] in ["se_e2_a", "se_a", "se_e2_r", "se_r", "hybrid"]: descrpt_param["spin"] = self.spin descrpt_param.pop("type") + print(descrpt_param) + exit() self.descrpt = deepmd.descriptor.se_a.DescrptSeA(**descrpt_param) # fitting net diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py index 2d6e12305c..6ae3aafedb 100644 --- a/deepmd/utils/network.py +++ b/deepmd/utils/network.py @@ -329,7 +329,6 @@ def __init__( std=stddev / np.sqrt(in_features + out_features) ), ) - # print(bavg, stddev) self.bias = self.create_parameter( shape=[out_features], dtype=precision, @@ -448,7 +447,7 @@ def __init__( dtype=precision, attr=paddle.ParamAttr(trainable=trainable), default_initializer=paddle.nn.initializer.Normal( - mean=0.1, std=0.001 + mean=1.0, std=0.001 ), ) ) @@ -458,25 +457,56 @@ def __init__( self.idt = paddle.nn.ParameterList(idt) def forward(self, xx): - hidden = nn.functional.tanh( - nn.functional.linear(xx, self.weight[0], self.bias[0]) - ).reshape( - [-1, 25] - ) # 1 - xx = hidden # 7 - - hidden = nn.functional.tanh( - nn.functional.linear(xx, self.weight[1], self.bias[1]) - ).reshape( - [-1, 50] - ) # 1 - xx = paddle.concat([xx, xx], axis=1) + hidden # 6 - - hidden = nn.functional.tanh( - nn.functional.linear(xx, self.weight[2], self.bias[2]) - ).reshape( - [-1, 100] - ) # 1 - xx = paddle.concat([xx, xx], axis=1) + hidden # 6 + outputs_size = self.outputs_size + for ii in range(1, len(outputs_size)): + if self.activation_fn is not None: + hidden = paddle.reshape( + self.activation_fn( + paddle.matmul(xx, self.weight[ii - 1]) + self.bias[ii - 1] + ), + [-1, outputs_size[ii]], + ) + else: + hidden = paddle.reshape( + paddle.matmul(xx, self.weight[ii - 1]) + self.bias[ii - 1], + [-1, outputs_size[ii]], + ) + + if outputs_size[ii] == outputs_size[ii - 1]: + if self.resnet_dt: + xx += hidden * self.idt[ii] + else: + xx += hidden + elif outputs_size[ii] == outputs_size[ii - 1] * 2: + if self.resnet_dt: + xx = paddle.concat([xx, xx], axis=1) + hidden * self.idt[ii] + else: + xx = paddle.concat([xx, xx], axis=1) + hidden + else: + xx = hidden return xx + + # == debug code below ==# + # hidden = nn.functional.tanh( + # nn.functional.linear(xx, self.weight[0], self.bias[0]) + # ).reshape( + # [-1, 25] + # ) # 1 + # xx = hidden # 7 + + # hidden = nn.functional.tanh( + # nn.functional.linear(xx, self.weight[1], self.bias[1]) + # ).reshape( + # [-1, 50] + # ) # 1 + # xx = paddle.concat([xx, xx], axis=1) + hidden # 6 + + # hidden = nn.functional.tanh( + # nn.functional.linear(xx, self.weight[2], self.bias[2]) + # ).reshape( + # [-1, 100] + # ) # 1 + # xx = paddle.concat([xx, xx], axis=1) + hidden # 6 + + # return xx