From 17bd1ec7f81cf7687224b976e16670bcd8893dd5 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 28 Feb 2024 23:35:46 -0500 Subject: [PATCH] pt: apply argcheck to pt (#3342) Signed-off-by: Jinzhe Zeng --- deepmd/pt/entrypoints/main.py | 11 + deepmd/pt/model/descriptor/dpa1.py | 26 +- deepmd/pt/model/descriptor/dpa2.py | 4 +- deepmd/pt/model/descriptor/repformer_layer.py | 4 +- deepmd/pt/model/descriptor/repformers.py | 6 +- deepmd/pt/model/descriptor/se_atten.py | 4 +- deepmd/tf/descriptor/se_atten.py | 28 ++ deepmd/utils/argcheck.py | 441 ++++++++++++++++-- examples/water/se_atten/input_torch.json | 8 +- source/tests/common/test_examples.py | 3 + source/tests/pt/model/models/dpa1.json | 2 +- source/tests/pt/model/models/dpa2_hyb.json | 2 +- source/tests/pt/model/test_jit.py | 18 +- source/tests/pt/model/test_permutation.py | 5 +- source/tests/pt/model/water/se_atten.json | 3 +- source/tests/pt/test_training.py | 18 +- 16 files changed, 506 insertions(+), 77 deletions(-) diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py index a317cea6a9..5583ee0326 100644 --- a/deepmd/pt/entrypoints/main.py +++ b/deepmd/pt/entrypoints/main.py @@ -50,6 +50,12 @@ from deepmd.pt.utils.stat import ( make_stat_input, ) +from deepmd.utils.argcheck import ( + normalize, +) +from deepmd.utils.compat import ( + update_deepmd_input, +) from deepmd.utils.path import ( DPPath, ) @@ -67,6 +73,11 @@ def get_trainer( force_load=False, init_frz_model=None, ): + # argcheck + if "model_dict" not in config.get("model", {}): + config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json") + config = normalize(config) + # Initialize DDP local_rank = os.environ.get("LOCAL_RANK") if local_rank is not None: diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index b616d20cd8..6850c550fe 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -43,7 +43,7 @@ def __init__( post_ln=True, ffn=False, ffn_embed_dim=1024, - activation="tanh", + activation_function="tanh", scaling_factor=1.0, head_num=1, normalize=True, @@ -51,8 +51,30 @@ def __init__( return_rot=False, concat_output_tebd: bool = True, type: Optional[str] = None, + # not implemented + resnet_dt: bool = False, + type_one_side: bool = True, + precision: str = "default", + trainable: bool = True, + exclude_types: Optional[List[List[int]]] = None, + stripped_type_embedding: bool = False, + smooth_type_embdding: bool = False, ): super().__init__() + if resnet_dt: + raise NotImplementedError("resnet_dt is not supported.") + if not type_one_side: + raise NotImplementedError("type_one_side is not supported.") + if precision != "default" and precision != "float64": + raise NotImplementedError("precison is not supported.") + if not trainable: + raise NotImplementedError("trainable == False is not supported.") + if exclude_types is not None and exclude_types != []: + raise NotImplementedError("exclude_types is not supported.") + if stripped_type_embedding: + raise NotImplementedError("stripped_type_embedding is not supported.") + if smooth_type_embdding: + raise NotImplementedError("smooth_type_embdding is not supported.") del type self.se_atten = DescrptBlockSeAtten( rcut, @@ -71,7 +93,7 @@ def __init__( post_ln=post_ln, ffn=ffn, ffn_embed_dim=ffn_embed_dim, - activation=activation, + activation_function=activation_function, scaling_factor=scaling_factor, head_num=head_num, normalize=normalize, diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index b1df56a004..55bb77b366 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -197,7 +197,7 @@ def __init__( tebd_input_mode="concat", # tebd_input_mode='dot_residual_s', set_davg_zero=repinit_set_davg_zero, - activation=repinit_activation, + activation_function=repinit_activation, ) self.repformers = DescrptBlockRepformers( repformer_rcut, @@ -223,7 +223,7 @@ def __init__( attn2_hidden=repformer_attn2_hidden, attn2_nhead=repformer_attn2_nhead, attn2_has_gate=repformer_attn2_has_gate, - activation=repformer_activation, + activation_function=repformer_activation, update_style=repformer_update_style, set_davg_zero=repformer_set_davg_zero, smooth=True, diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py index 55a2cba708..08fcb17b09 100644 --- a/deepmd/pt/model/descriptor/repformer_layer.py +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -313,7 +313,7 @@ def __init__( attn2_hidden: int = 16, attn2_nhead: int = 4, attn2_has_gate: bool = False, - activation: str = "tanh", + activation_function: str = "tanh", update_style: str = "res_avg", set_davg_zero: bool = True, # TODO smooth: bool = True, @@ -332,7 +332,7 @@ def __init__( self.set_davg_zero = set_davg_zero self.do_bn_mode = do_bn_mode self.bn_momentum = bn_momentum - self.act = get_activation_fn(activation) + self.act = get_activation_fn(activation_function) self.update_g1_has_grrg = update_g1_has_grrg self.update_g1_has_drrd = update_g1_has_drrd self.update_g1_has_conv = update_g1_has_conv diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index ad523bcc2d..2425139e16 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -76,7 +76,7 @@ def __init__( attn2_hidden: int = 16, attn2_nhead: int = 4, attn2_has_gate: bool = False, - activation: str = "tanh", + activation_function: str = "tanh", update_style: str = "res_avg", set_davg_zero: bool = True, # TODO smooth: bool = True, @@ -109,7 +109,7 @@ def __init__( self.set_davg_zero = set_davg_zero self.g1_dim = g1_dim self.g2_dim = g2_dim - self.act = get_activation_fn(activation) + self.act = get_activation_fn(activation_function) self.direct_dist = direct_dist self.add_type_ebd_to_seq = add_type_ebd_to_seq @@ -140,7 +140,7 @@ def __init__( attn2_has_gate=attn2_has_gate, attn2_hidden=attn2_hidden, attn2_nhead=attn2_nhead, - activation=activation, + activation_function=activation_function, update_style=update_style, smooth=smooth, ) diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index 0b32bd9341..a2197213ad 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -53,7 +53,7 @@ def __init__( post_ln=True, ffn=False, ffn_embed_dim=1024, - activation="tanh", + activation_function="tanh", scaling_factor=1.0, head_num=1, normalize=True, @@ -86,7 +86,7 @@ def __init__( self.post_ln = post_ln self.ffn = ffn self.ffn_embed_dim = ffn_embed_dim - self.activation = activation + self.activation = activation_function # TODO: To be fixed: precision should be given from inputs self.prec = torch.float64 self.scaling_factor = scaling_factor diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py index 1c3c48e484..35b354c8da 100644 --- a/deepmd/tf/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -152,6 +152,16 @@ def __init__( multi_task: bool = False, stripped_type_embedding: bool = False, smooth_type_embdding: bool = False, + # not implemented + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + return_rot=False, + concat_output_tebd: bool = True, **kwargs, ) -> None: if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding): @@ -159,6 +169,24 @@ def __init__( "Set 'set_davg_zero' False in descriptor 'se_atten' " "may cause unexpected incontinuity during model inference!" ) + if not post_ln: + raise NotImplementedError("post_ln is not supported.") + if ffn: + raise NotImplementedError("ffn is not supported.") + if ffn_embed_dim != 1024: + raise NotImplementedError("ffn_embed_dim is not supported.") + if scaling_factor != 1.0: + raise NotImplementedError("scaling_factor is not supported.") + if head_num != 1: + raise NotImplementedError("head_num is not supported.") + if not normalize: + raise NotImplementedError("normalize is not supported.") + if temperature is not None: + raise NotImplementedError("temperature is not supported.") + if return_rot: + raise NotImplementedError("return_rot is not supported.") + if not concat_output_tebd: + raise NotImplementedError("concat_output_tebd is not supported.") DescrptSeA.__init__( self, rcut, diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index dbe4881952..8366f7bb38 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -45,6 +45,9 @@ "bfloat16": None, } +doc_only_tf_supported = "(Supported Backend: TensorFlow) " +doc_only_pt_supported = "(Supported Backend: PyTorch) " + def list_to_doc(xx): items = [] @@ -109,7 +112,7 @@ def __init__(self) -> None: self.__plugin = Plugin() def register( - self, name: str, alias: Optional[List[str]] = None + self, name: str, alias: Optional[List[str]] = None, doc: str = "" ) -> Callable[[], List[Argument]]: """Register a descriptor argument plugin. @@ -135,7 +138,7 @@ def descrpt_some_descrpt_args(): # convert alias to hashed item if isinstance(alias, list): alias = tuple(alias) - return self.__plugin.register((name, alias)) + return self.__plugin.register((name, alias, doc)) def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]: """Get all arguments. @@ -151,11 +154,11 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]: all arguments """ arguments = [] - for (name, alias), metd in self.__plugin.plugins.items(): + for (name, alias, doc), metd in self.__plugin.plugins.items(): if exclude_hybrid and name == "hybrid": continue arguments.append( - Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias) + Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias, doc=doc) ) return arguments @@ -163,7 +166,7 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]: descrpt_args_plugin = ArgsPlugin() -@descrpt_args_plugin.register("loc_frame") +@descrpt_args_plugin.register("loc_frame", doc=doc_only_tf_supported) def descrpt_local_frame_args(): doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor." doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius." @@ -244,7 +247,9 @@ def descrpt_se_a_args(): ] -@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"]) +@descrpt_args_plugin.register( + "se_e3", alias=["se_at", "se_a_3be", "se_t"], doc=doc_only_tf_supported +) def descrpt_se_t_args(): doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ @@ -283,7 +288,7 @@ def descrpt_se_t_args(): ] -@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"]) +@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"], doc=doc_only_tf_supported) def descrpt_se_a_tpe_args(): doc_type_nchanl = "number of channels for type embedding" doc_type_nlayer = "number of hidden layers of type embedding net" @@ -348,7 +353,7 @@ def descrpt_se_r_args(): ] -@descrpt_args_plugin.register("hybrid") +@descrpt_args_plugin.register("hybrid", doc=doc_only_tf_supported) def descrpt_hybrid_args(): doc_list = "A list of descriptor definitions" @@ -376,12 +381,25 @@ def descrpt_se_atten_common_args(): doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net is trainable" + doc_resnet_dt = ( + doc_only_tf_supported + 'Whether to use a "Timestep" in the skip connection' + ) + doc_type_one_side = ( + doc_only_tf_supported + + r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." + ) + doc_precision = ( + doc_only_tf_supported + + f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + ) + doc_trainable = ( + doc_only_tf_supported + "If the parameters in the embedding net is trainable" + ) doc_seed = "Random seed for parameter initialization" - doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + doc_exclude_types = ( + doc_only_tf_supported + + "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + ) doc_attn = "The length of hidden vectors in attention layers" doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True" doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates" @@ -432,7 +450,7 @@ def descrpt_se_atten_common_args(): ] -@descrpt_args_plugin.register("se_atten") +@descrpt_args_plugin.register("se_atten", alias=["dpa1"]) def descrpt_se_atten_args(): doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible." doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True." @@ -445,22 +463,60 @@ def descrpt_se_atten_args(): bool, optional=True, default=False, - doc=doc_stripped_type_embedding, + doc=doc_only_tf_supported + doc_stripped_type_embedding, ), Argument( "smooth_type_embdding", bool, optional=True, default=False, - doc=doc_smooth_type_embdding, + doc=doc_only_tf_supported + doc_smooth_type_embdding, ), Argument( "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero ), + # pt only + Argument("tebd_dim", int, optional=True, default=8, doc=doc_only_pt_supported), + Argument( + "tebd_input_mode", + str, + optional=True, + default="concat", + doc=doc_only_pt_supported, + ), + Argument( + "post_ln", bool, optional=True, default=True, doc=doc_only_pt_supported + ), + Argument("ffn", bool, optional=True, default=False, doc=doc_only_pt_supported), + Argument( + "ffn_embed_dim", int, optional=True, default=1024, doc=doc_only_pt_supported + ), + Argument( + "scaling_factor", + float, + optional=True, + default=1.0, + doc=doc_only_pt_supported, + ), + Argument("head_num", int, optional=True, default=1, doc=doc_only_pt_supported), + Argument( + "normalize", bool, optional=True, default=True, doc=doc_only_pt_supported + ), + Argument("temperature", float, optional=True, doc=doc_only_pt_supported), + Argument( + "return_rot", bool, optional=True, default=False, doc=doc_only_pt_supported + ), + Argument( + "concat_output_tebd", + bool, + optional=True, + default=True, + doc=doc_only_pt_supported, + ), ] -@descrpt_args_plugin.register("se_atten_v2") +@descrpt_args_plugin.register("se_atten_v2", doc=doc_only_tf_supported) def descrpt_se_atten_v2_args(): doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" @@ -472,12 +528,272 @@ def descrpt_se_atten_v2_args(): ] -@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"]) +@descrpt_args_plugin.register("dpa2", doc=doc_only_pt_supported) +def descrpt_dpa2_args(): + # Generate by GitHub Copilot + doc_repinit_rcut = "The cut-off radius of the repinit block" + doc_repinit_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repinit block." + doc_repinit_nsel = "Maximally possible number of neighbors for repinit block." + doc_repformer_rcut = "The cut-off radius of the repformer block" + doc_repformer_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repformer block." + doc_repformer_nsel = "Maximally possible number of neighbors for repformer block." + doc_tebd_dim = "The dimension of atom type embedding" + doc_concat_output_tebd = ( + "Whether to concat type embedding at the output of the descriptor." + ) + doc_repinit_neuron = "repinit block: the number of neurons in the embedding net." + doc_repinit_axis_neuron = ( + "repinit block: the number of dimension of split in the symmetrization op." + ) + doc_repinit_activation = ( + "repinit block: the activation function in the embedding net" + ) + doc_repformer_nlayers = "repformers block: the number of repformer layers" + doc_repformer_g1_dim = "repformers block: the dimension of single-atom rep" + doc_repformer_g2_dim = "repformers block: the dimension of invariant pair-atom rep" + doc_repformer_axis_dim = ( + "repformers block: the number of dimension of split in the symmetrization ops." + ) + doc_repformer_do_bn_mode = "repformers block: do batch norm in the repformer layers" + doc_repformer_bn_momentum = "repformers block: moment in the batch normalization" + doc_repformer_update_g1_has_conv = ( + "repformers block: update the g1 rep with convolution term" + ) + doc_repformer_update_g1_has_drrd = ( + "repformers block: update the g1 rep with the drrd term" + ) + doc_repformer_update_g1_has_grrg = ( + "repformers block: update the g1 rep with the grrg term" + ) + doc_repformer_update_g1_has_attn = ( + "repformers block: update the g1 rep with the localized self-attention" + ) + doc_repformer_update_g2_has_g1g1 = ( + "repformers block: update the g2 rep with the g1xg1 term" + ) + doc_repformer_update_g2_has_attn = ( + "repformers block: update the g2 rep with the gated self-attention" + ) + doc_repformer_update_h2 = "repformers block: update the h2 rep" + doc_repformer_attn1_hidden = ( + "repformers block: the hidden dimension of localized self-attention" + ) + doc_repformer_attn1_nhead = ( + "repformers block: the number of heads in localized self-attention" + ) + doc_repformer_attn2_hidden = ( + "repformers block: the hidden dimension of gated self-attention" + ) + doc_repformer_attn2_nhead = ( + "repformers block: the number of heads in gated self-attention" + ) + doc_repformer_attn2_has_gate = ( + "repformers block: has gate in the gated self-attention" + ) + doc_repformer_activation = "repformers block: the activation function in the MLPs." + doc_repformer_update_style = "repformers block: style of update a rep. can be res_avg or res_incr. res_avg updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) res_incr updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n)" + doc_repformer_set_davg_zero = "repformers block: set the avg to zero in statistics" + doc_repformer_add_type_ebd_to_seq = ( + "repformers block: concatenate the type embedding at the output" + ) + return [ + Argument("repinit_rcut", float, doc=doc_repinit_rcut), + Argument("repinit_rcut_smth", float, doc=doc_repinit_rcut_smth), + Argument("repinit_nsel", int, doc=doc_repinit_nsel), + Argument("repformer_rcut", float, doc=doc_repformer_rcut), + Argument("repformer_rcut_smth", float, doc=doc_repformer_rcut_smth), + Argument("repformer_nsel", int, doc=doc_repformer_nsel), + Argument("tebd_dim", int, optional=True, default=8, doc=doc_tebd_dim), + Argument( + "concat_output_tebd", + bool, + optional=True, + default=True, + doc=doc_concat_output_tebd, + ), + Argument( + "repinit_neuron", + list, + optional=True, + default=[25, 50, 100], + doc=doc_repinit_neuron, + ), + Argument( + "repinit_axis_neuron", + int, + optional=True, + default=16, + doc=doc_repinit_axis_neuron, + ), + Argument("repinit_set_davg_zero", bool, optional=True, default=True), + Argument( + "repinit_activation", + str, + optional=True, + default="tanh", + doc=doc_repinit_activation, + ), + Argument( + "repformer_nlayers", + int, + optional=True, + default=3, + doc=doc_repformer_nlayers, + ), + Argument( + "repformer_g1_dim", + int, + optional=True, + default=128, + doc=doc_repformer_g1_dim, + ), + Argument( + "repformer_g2_dim", int, optional=True, default=16, doc=doc_repformer_g2_dim + ), + Argument( + "repformer_axis_dim", + int, + optional=True, + default=4, + doc=doc_repformer_axis_dim, + ), + Argument( + "repformer_do_bn_mode", + str, + optional=True, + default="no", + doc=doc_repformer_do_bn_mode, + ), + Argument( + "repformer_bn_momentum", + float, + optional=True, + default=0.1, + doc=doc_repformer_bn_momentum, + ), + Argument( + "repformer_update_g1_has_conv", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_conv, + ), + Argument( + "repformer_update_g1_has_drrd", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_drrd, + ), + Argument( + "repformer_update_g1_has_grrg", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_grrg, + ), + Argument( + "repformer_update_g1_has_attn", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_attn, + ), + Argument( + "repformer_update_g2_has_g1g1", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g2_has_g1g1, + ), + Argument( + "repformer_update_g2_has_attn", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g2_has_attn, + ), + Argument( + "repformer_update_h2", + bool, + optional=True, + default=False, + doc=doc_repformer_update_h2, + ), + Argument( + "repformer_attn1_hidden", + int, + optional=True, + default=64, + doc=doc_repformer_attn1_hidden, + ), + Argument( + "repformer_attn1_nhead", + int, + optional=True, + default=4, + doc=doc_repformer_attn1_nhead, + ), + Argument( + "repformer_attn2_hidden", + int, + optional=True, + default=16, + doc=doc_repformer_attn2_hidden, + ), + Argument( + "repformer_attn2_nhead", + int, + optional=True, + default=4, + doc=doc_repformer_attn2_nhead, + ), + Argument( + "repformer_attn2_has_gate", + bool, + optional=True, + default=False, + doc=doc_repformer_attn2_has_gate, + ), + Argument( + "repformer_activation", + str, + optional=True, + default="tanh", + doc=doc_repformer_activation, + ), + Argument( + "repformer_update_style", + str, + optional=True, + default="res_avg", + doc=doc_repformer_update_style, + ), + Argument( + "repformer_set_davg_zero", + bool, + optional=True, + default=True, + doc=doc_repformer_set_davg_zero, + ), + Argument( + "repformer_add_type_ebd_to_seq", + bool, + optional=True, + default=False, + doc=doc_repformer_add_type_ebd_to_seq, + ), + ] + + +@descrpt_args_plugin.register( + "se_a_ebd_v2", alias=["se_a_tpe_v2"], doc=doc_only_tf_supported +) def descrpt_se_a_ebd_v2_args(): return descrpt_se_a_args() -@descrpt_args_plugin.register("se_a_mask") +@descrpt_args_plugin.register("se_a_mask", doc=doc_only_tf_supported) def descrpt_se_a_mask_args(): doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\ - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ @@ -637,7 +953,7 @@ def fitting_ener(): ] -@fitting_args_plugin.register("dos") +@fitting_args_plugin.register("dos", doc=doc_only_tf_supported) def fitting_dos(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." @@ -684,7 +1000,7 @@ def fitting_dos(): ] -@fitting_args_plugin.register("polar") +@fitting_args_plugin.register("polar", doc=doc_only_tf_supported) def fitting_polar(): doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -738,7 +1054,7 @@ def fitting_polar(): # return fitting_polar() -@fitting_args_plugin.register("dipole") +@fitting_args_plugin.register("dipole", doc=doc_only_tf_supported) def fitting_dipole(): doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -900,16 +1216,30 @@ def model_args(exclude_hybrid=False): default=10, doc=doc_data_bias_nsample, ), - Argument("use_srtab", str, optional=True, doc=doc_use_srtab), - Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha), - Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin), - Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax), + Argument( + "use_srtab", + str, + optional=True, + doc=doc_only_tf_supported + doc_use_srtab, + ), + Argument( + "smin_alpha", + float, + optional=True, + doc=doc_only_tf_supported + doc_smin_alpha, + ), + Argument( + "sw_rmin", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmin + ), + Argument( + "sw_rmax", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmax + ), Argument( "srtab_add_bias", bool, optional=True, default=True, - doc=doc_srtab_add_bias, + doc=doc_only_tf_supported + doc_srtab_add_bias, ), Argument( "type_embedding", @@ -917,7 +1247,7 @@ def model_args(exclude_hybrid=False): type_embedding_args(), [], optional=True, - doc=doc_type_embedding, + doc=doc_only_tf_supported + doc_type_embedding, ), Argument( "modifier", @@ -925,7 +1255,7 @@ def model_args(exclude_hybrid=False): [], [modifier_variant_type_args()], optional=True, - doc=doc_modifier, + doc=doc_only_tf_supported + doc_modifier, ), Argument( "compress", @@ -933,7 +1263,7 @@ def model_args(exclude_hybrid=False): [], [model_compression_type_args()], optional=True, - doc=doc_compress_config, + doc=doc_only_tf_supported + doc_compress_config, fold_subdoc=True, ), Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin), @@ -997,7 +1327,7 @@ def multi_model_args() -> Argument: ), Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict), ], - doc="Multiple-task model.", + doc=doc_only_tf_supported + "Multiple-task model.", ) return ca @@ -1016,6 +1346,7 @@ def pairwise_dprc() -> Argument: qm_model_args, qmmm_model_args, ], + doc=doc_only_tf_supported, ) return ca @@ -1028,6 +1359,7 @@ def frozen_model_args() -> Argument: [ Argument("model_file", str, optional=False, doc=doc_model_file), ], + doc=doc_only_tf_supported, ) return ca @@ -1047,7 +1379,7 @@ def pairtab_model_args() -> Argument: Argument("rcut", float, optional=False, doc=doc_rcut), Argument("sel", [int, List[int], str], optional=False, doc=doc_sel), ], - doc="Pairwise tabulation energy model.", + doc=doc_only_tf_supported + "Pairwise tabulation energy model.", ) return ca @@ -1076,6 +1408,7 @@ def linear_ener_model_args() -> Argument: doc=doc_weights, ), ], + doc=doc_only_tf_supported, ) return ca @@ -1390,7 +1723,7 @@ def loss_ener_spin(): ] -@loss_args_plugin.register("dos") +@loss_args_plugin.register("dos", doc=doc_only_tf_supported) def loss_dos(): doc_start_pref_dos = start_pref("Density of State (DOS)") doc_limit_pref_dos = limit_pref("Density of State (DOS)") @@ -1465,7 +1798,7 @@ def loss_dos(): # YWolfeee: Modified to support tensor type of loss args. -@loss_args_plugin.register("tensor") +@loss_args_plugin.register("tensor", doc=doc_only_tf_supported) def loss_tensor(): # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]." # doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well." @@ -1746,13 +2079,19 @@ def training_args(): # ! modified by Ziyao: data configuration isolated. Argument( "time_training", bool, optional=True, default=True, doc=doc_time_training ), - Argument("profiling", bool, optional=True, default=False, doc=doc_profiling), + Argument( + "profiling", + bool, + optional=True, + default=False, + doc=doc_only_tf_supported + doc_profiling, + ), Argument( "profiling_file", str, optional=True, default="timeline.json", - doc=doc_profiling_file, + doc=doc_only_tf_supported + doc_profiling_file, ), Argument( "enable_profiler", @@ -1776,10 +2115,38 @@ def training_args(): # ! modified by Ziyao: data configuration isolated. ), Argument("data_dict", dict, optional=True, doc=doc_data_dict), Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight), + Argument("warmup_steps", int, optional=True, doc=doc_only_pt_supported), + Argument("gradient_max_norm", float, optional=True, doc=doc_only_pt_supported), + Argument("stat_file", str, optional=True, doc=doc_only_pt_supported), + ] + variants = [ + Variant( + "opt_type", + choices=[ + Argument("Adam", dict, [], [], optional=True), + Argument( + "LKF", + dict, + [ + Argument( + "kf_blocksize", + int, + optional=True, + doc=doc_only_pt_supported, + ), + ], + [], + optional=True, + ), + ], + optional=True, + default_tag="Adam", + doc=doc_only_pt_supported, + ) ] doc_training = "The training options." - return Argument("training", dict, args, [], doc=doc_training) + return Argument("training", dict, args, variants, doc=doc_training) def make_index(keys): diff --git a/examples/water/se_atten/input_torch.json b/examples/water/se_atten/input_torch.json index bc948cc2a0..7e9cf06f35 100644 --- a/examples/water/se_atten/input_torch.json +++ b/examples/water/se_atten/input_torch.json @@ -17,6 +17,7 @@ ], "tebd_dim": 8, "axis_neuron": 16, + "type_one_side": true, "attn": 128, "attn_layer": 2, "attn_dotr": true, @@ -24,7 +25,7 @@ "post_ln": true, "ffn": false, "ffn_embed_dim": 1024, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": true, @@ -78,11 +79,6 @@ "numb_btch": 3, "_comment": "that's all" }, - "wandb_config": { - "wandb_enabled": false, - "entity": "dp_model_engineering", - "project": "DPA" - }, "numb_steps": 1000000, "seed": 10, "disp_file": "lcurve.out", diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py index ad06925eab..49abcf2f90 100644 --- a/source/tests/common/test_examples.py +++ b/source/tests/common/test_examples.py @@ -42,6 +42,9 @@ p_examples / "dprc" / "normal" / "input.json", p_examples / "dprc" / "pairwise" / "input.json", p_examples / "dprc" / "generalized_force" / "input.json", + p_examples / "water" / "se_e2_a" / "input_torch.json", + p_examples / "water" / "se_atten" / "input_torch.json", + p_examples / "water" / "dpa2" / "input_torch.json", ) diff --git a/source/tests/pt/model/models/dpa1.json b/source/tests/pt/model/models/dpa1.json index dd838ac692..5d2c65c214 100644 --- a/source/tests/pt/model/models/dpa1.json +++ b/source/tests/pt/model/models/dpa1.json @@ -21,7 +21,7 @@ "post_ln": true, "ffn": false, "ffn_embed_dim": 10, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": true, diff --git a/source/tests/pt/model/models/dpa2_hyb.json b/source/tests/pt/model/models/dpa2_hyb.json index b5d53b0246..ee69ed4d69 100644 --- a/source/tests/pt/model/models/dpa2_hyb.json +++ b/source/tests/pt/model/models/dpa2_hyb.json @@ -25,7 +25,7 @@ "post_ln": true, "ffn": false, "ffn_embed_dim": 10, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": true, diff --git a/source/tests/pt/model/test_jit.py b/source/tests/pt/model/test_jit.py index f13dade183..a1aa9658fc 100644 --- a/source/tests/pt/model/test_jit.py +++ b/source/tests/pt/model/test_jit.py @@ -85,15 +85,15 @@ def setUp(self): self.config["training"]["training_data"]["systems"] = data_file self.config["training"]["validation_data"]["systems"] = data_file self.config["model"] = deepcopy(model_dpa2) - self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][ - "repinit_rcut" - ] - self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][ - "descriptor" - ]["repinit_rcut_smth"] - self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][ - "repinit_nsel" - ] + # self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][ + # "repinit_rcut" + # ] + # self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][ + # "descriptor" + # ]["repinit_rcut_smth"] + # self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][ + # "repinit_nsel" + # ] self.config["training"]["numb_steps"] = 10 self.config["training"]["save_freq"] = 10 diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py index b97cb349ad..45790bf43d 100644 --- a/source/tests/pt/model/test_permutation.py +++ b/source/tests/pt/model/test_permutation.py @@ -115,12 +115,13 @@ "post_ln": True, "ffn": False, "ffn_embed_dim": 512, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": False, "temperature": 1.0, "set_davg_zero": True, + "type_one_side": True, }, "fitting_net": { "neuron": [24, 24, 24], @@ -149,7 +150,7 @@ "post_ln": True, "ffn": False, "ffn_embed_dim": 1024, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": True, diff --git a/source/tests/pt/model/water/se_atten.json b/source/tests/pt/model/water/se_atten.json index 3ed80ae892..6b6fca50d3 100644 --- a/source/tests/pt/model/water/se_atten.json +++ b/source/tests/pt/model/water/se_atten.json @@ -16,6 +16,7 @@ 100 ], "axis_neuron": 16, + "type_one_side": true, "attn": 64, "attn_layer": 2, "attn_dotr": true, @@ -23,7 +24,7 @@ "post_ln": true, "ffn": false, "ffn_embed_dim": 512, - "activation": "tanh", + "activation_function": "tanh", "scaling_factor": 1.0, "head_num": 1, "normalize": false, diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 2186467788..f86691cde6 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -79,15 +79,15 @@ def setUp(self): self.config["training"]["training_data"]["systems"] = data_file self.config["training"]["validation_data"]["systems"] = data_file self.config["model"] = deepcopy(model_dpa2) - self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][ - "repinit_rcut" - ] - self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][ - "descriptor" - ]["repinit_rcut_smth"] - self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][ - "repinit_nsel" - ] + # self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][ + # "repinit_rcut" + # ] + # self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][ + # "descriptor" + # ]["repinit_rcut_smth"] + # self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][ + # "repinit_nsel" + # ] self.config["training"]["numb_steps"] = 1 self.config["training"]["save_freq"] = 1