diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 876062cce6..b633bc6807 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -25,6 +25,9 @@ LayerNorm, NativeLayer, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.type_embed import ( TypeEmbedNet, ) @@ -251,7 +254,7 @@ def __init__( use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: ## seed, uniform_seed, not included. # Ensure compatibility with the deprecated stripped_type_embedding option. @@ -294,6 +297,7 @@ def __init__( env_protection=env_protection, trainable_ln=trainable_ln, ln_eps=ln_eps, + seed=child_seed(seed, 0), ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -305,6 +309,7 @@ def __init__( precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, + seed=child_seed(seed, 1), ) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd @@ -625,6 +630,7 @@ def __init__( trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, smooth: bool = True, + seed: Optional[Union[int, List[int]]] = None, ) -> None: self.rcut = rcut self.rcut_smth = rcut_smth @@ -674,6 +680,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=child_seed(seed, 0), ) if self.tebd_input_mode in ["strip"]: self.embeddings_strip = NetworkCollection( @@ -687,6 +694,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=child_seed(seed, 1), ) else: self.embeddings_strip = None @@ -703,6 +711,7 @@ def __init__( ln_eps=self.ln_eps, smooth=self.smooth, precision=self.precision, + seed=child_seed(seed, 2), ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -950,6 +959,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention net.""" super().__init__() @@ -982,8 +992,9 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, + seed=child_seed(seed, ii), ) - for _ in range(layer_num) + for ii in range(layer_num) ] def call( @@ -1076,6 +1087,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention layer.""" super().__init__() @@ -1101,9 +1113,14 @@ def __init__( temperature=temperature, smooth=smooth, precision=precision, + seed=child_seed(seed, 0), ) self.attn_layer_norm = LayerNorm( - self.embed_dim, eps=ln_eps, trainable=self.trainable_ln, precision=precision + self.embed_dim, + eps=ln_eps, + trainable=self.trainable_ln, + precision=precision, + seed=child_seed(seed, 1), ) def call( @@ -1176,6 +1193,7 @@ def __init__( bias: bool = True, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -1204,6 +1222,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, + seed=child_seed(seed, 0), ) self.out_proj = NativeLayer( hidden_dim, @@ -1211,6 +1230,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, + seed=child_seed(seed, 1), ) def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0): diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py index 766fe19302..d42a9da26e 100644 --- a/deepmd/dpmodel/descriptor/dpa2.py +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -23,6 +23,9 @@ build_multiple_neighbor_list, get_multiple_nlist_key, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.type_embed import ( TypeEmbedNet, ) @@ -325,7 +328,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], env_protection: float = 0.0, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, add_tebd_to_repinit_out: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, @@ -408,6 +411,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, type_one_side=self.repinit_args.type_one_side, + seed=child_seed(seed, 0), ) self.repformers = DescrptBlockRepformers( self.repformer_args.rcut, @@ -442,6 +446,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, + seed=child_seed(seed, 1), ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -453,6 +458,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, + seed=child_seed(seed, 2), ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py index db11268eca..67c72e8d31 100644 --- a/deepmd/dpmodel/descriptor/repformers.py +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -22,6 +22,9 @@ NativeLayer, get_activation_fn, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.path import ( DPPath, ) @@ -40,6 +43,87 @@ @DescriptorBlock.register("se_repformer") @DescriptorBlock.register("se_uni") class DescrptBlockRepformers(NativeOP, DescriptorBlock): + r""" + The repformer descriptor block. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. + sel : int + Maximally possible number of selected neighbors. + ntypes : int + Number of element types + nlayers : int, optional + Number of repformer layers. + g1_dim : int, optional + Dimension of the first graph convolution layer. + g2_dim : int, optional + Dimension of the second graph convolution layer. + axis_neuron : int, optional + Size of the submatrix of G (embedding matrix). + direct_dist : bool, optional + Whether to use direct distance information (1/r term) in the repformer block. + update_g1_has_conv : bool, optional + Whether to update the g1 rep with convolution term. + update_g1_has_drrd : bool, optional + Whether to update the g1 rep with the drrd term. + update_g1_has_grrg : bool, optional + Whether to update the g1 rep with the grrg term. + update_g1_has_attn : bool, optional + Whether to update the g1 rep with the localized self-attention. + update_g2_has_g1g1 : bool, optional + Whether to update the g2 rep with the g1xg1 term. + update_g2_has_attn : bool, optional + Whether to update the g2 rep with the gated self-attention. + update_h2 : bool, optional + Whether to update the h2 rep. + attn1_hidden : int, optional + The hidden dimension of localized self-attention to update the g1 rep. + attn1_nhead : int, optional + The number of heads in localized self-attention to update the g1 rep. + attn2_hidden : int, optional + The hidden dimension of gated self-attention to update the g2 rep. + attn2_nhead : int, optional + The number of heads in gated self-attention to update the g2 rep. + attn2_has_gate : bool, optional + Whether to use gate in the gated self-attention to update the g2 rep. + activation_function : str, optional + The activation function in the embedding net. + update_style : str, optional + Style to update a representation. + Supported options are: + -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) + -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) + -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) + where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` + and `update_residual_init`. + update_residual : float, optional + When update using residual mode, the initial std of residual vector weights. + update_residual_init : str, optional + When update using residual mode, the initialization mode of residual vector weights. + set_davg_zero : bool, optional + Set the normalization average to zero. + precision : str, optional + The precision of the embedding net parameters. + smooth : bool, optional + Whether to use smoothness in processes such as attention weights calculation. + exclude_types : List[List[int]], optional + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection : float, optional + Protection parameter to prevent division by zero errors during environment matrix calculations. + For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. + trainable_ln : bool, optional + Whether to use trainable shift and scale weights in layer normalization. + ln_eps : float, optional + The epsilon value for layer normalization. + seed : int, optional + The random seed for initialization. + """ + def __init__( self, rcut, @@ -74,85 +158,8 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, + seed: Optional[Union[int, List[int]]] = None, ): - r""" - The repformer descriptor block. - - Parameters - ---------- - rcut : float - The cut-off radius. - rcut_smth : float - Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. - sel : int - Maximally possible number of selected neighbors. - ntypes : int - Number of element types - nlayers : int, optional - Number of repformer layers. - g1_dim : int, optional - Dimension of the first graph convolution layer. - g2_dim : int, optional - Dimension of the second graph convolution layer. - axis_neuron : int, optional - Size of the submatrix of G (embedding matrix). - direct_dist : bool, optional - Whether to use direct distance information (1/r term) in the repformer block. - update_g1_has_conv : bool, optional - Whether to update the g1 rep with convolution term. - update_g1_has_drrd : bool, optional - Whether to update the g1 rep with the drrd term. - update_g1_has_grrg : bool, optional - Whether to update the g1 rep with the grrg term. - update_g1_has_attn : bool, optional - Whether to update the g1 rep with the localized self-attention. - update_g2_has_g1g1 : bool, optional - Whether to update the g2 rep with the g1xg1 term. - update_g2_has_attn : bool, optional - Whether to update the g2 rep with the gated self-attention. - update_h2 : bool, optional - Whether to update the h2 rep. - attn1_hidden : int, optional - The hidden dimension of localized self-attention to update the g1 rep. - attn1_nhead : int, optional - The number of heads in localized self-attention to update the g1 rep. - attn2_hidden : int, optional - The hidden dimension of gated self-attention to update the g2 rep. - attn2_nhead : int, optional - The number of heads in gated self-attention to update the g2 rep. - attn2_has_gate : bool, optional - Whether to use gate in the gated self-attention to update the g2 rep. - activation_function : str, optional - The activation function in the embedding net. - update_style : str, optional - Style to update a representation. - Supported options are: - -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) - -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) - -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) - where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` - and `update_residual_init`. - update_residual : float, optional - When update using residual mode, the initial std of residual vector weights. - update_residual_init : str, optional - When update using residual mode, the initialization mode of residual vector weights. - set_davg_zero : bool, optional - Set the normalization average to zero. - precision : str, optional - The precision of the embedding net parameters. - smooth : bool, optional - Whether to use smoothness in processes such as attention weights calculation. - exclude_types : List[List[int]], optional - The excluded pairs of types which have no interaction with each other. - For example, `[[0, 1]]` means no interaction between type 0 and type 1. - env_protection : float, optional - Protection parameter to prevent division by zero errors during environment matrix calculations. - For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. - trainable_ln : bool, optional - Whether to use trainable shift and scale weights in layer normalization. - ln_eps : float, optional - The epsilon value for layer normalization. - """ super().__init__() self.rcut = rcut self.rcut_smth = rcut_smth @@ -196,7 +203,9 @@ def __init__( self.ln_eps = ln_eps self.epsilon = 1e-4 - self.g2_embd = NativeLayer(1, self.g2_dim, precision=precision) + self.g2_embd = NativeLayer( + 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + ) layers = [] for ii in range(nlayers): layers.append( @@ -229,6 +238,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, + seed=child_seed(child_seed(seed, 1), ii), ) ) self.layers = layers @@ -399,6 +409,7 @@ def get_residual( _mode: str = "norm", trainable: bool = True, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ) -> np.ndarray: """ Get residual tensor for one update vector. @@ -419,7 +430,7 @@ def get_residual( The precision of the residual tensor. """ residual = np.zeros(_dim, dtype=PRECISION_DICT[precision]) - rng = np.random.default_rng() + rng = np.random.default_rng(seed=seed) if trainable: if _mode == "norm": residual = rng.normal(scale=_scale, size=_dim).astype( @@ -634,6 +645,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -641,7 +653,11 @@ def __init__( self.hidden_dim = hidden_dim self.head_num = head_num self.mapqk = NativeLayer( - input_dim, hidden_dim * 2 * head_num, bias=False, precision=precision + input_dim, + hidden_dim * 2 * head_num, + bias=False, + precision=precision, + seed=seed, ) self.has_gate = has_gate self.smooth = smooth @@ -747,15 +763,23 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim self.head_num = head_num self.mapv = NativeLayer( - input_dim, input_dim * head_num, bias=False, precision=precision + input_dim, + input_dim * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 0), ) self.head_map = NativeLayer( - input_dim * head_num, input_dim, precision=precision + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 1), ) self.precision = precision @@ -824,11 +848,14 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim self.head_num = head_num - self.head_map = NativeLayer(head_num, 1, bias=False, precision=precision) + self.head_map = NativeLayer( + head_num, 1, bias=False, precision=precision, seed=seed + ) self.precision = precision def call( @@ -894,22 +921,31 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.head_num = head_num self.mapq = NativeLayer( - input_dim, hidden_dim * 1 * head_num, bias=False, precision=precision + input_dim, + hidden_dim * 1 * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 0), ) self.mapkv = NativeLayer( input_dim, (hidden_dim + input_dim) * head_num, bias=False, precision=precision, + seed=child_seed(seed, 1), ) self.head_map = NativeLayer( - input_dim * head_num, input_dim, precision=precision + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 2), ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -1044,6 +1080,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -1099,11 +1136,17 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 0), ) ) g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron) - self.linear1 = NativeLayer(g1_in_dim, g1_dim, precision=precision) + self.linear1 = NativeLayer( + g1_in_dim, + g1_dim, + precision=precision, + seed=child_seed(seed, 1), + ) self.linear2 = None self.proj_g1g2 = None self.proj_g1g1g2 = None @@ -1114,7 +1157,12 @@ def __init__( self.loc_attn = None if self.update_chnnl_2: - self.linear2 = NativeLayer(g2_dim, g2_dim, precision=precision) + self.linear2 = NativeLayer( + g2_dim, + g2_dim, + precision=precision, + seed=child_seed(seed, 2), + ) if self.update_style == "res_residual": self.g2_residual.append( get_residual( @@ -1122,15 +1170,24 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 3), ) ) if self.update_g1_has_conv: self.proj_g1g2 = NativeLayer( - g1_dim, g2_dim, bias=False, precision=precision + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 4), ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = NativeLayer( - g1_dim, g2_dim, bias=False, precision=precision + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 5), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1139,6 +1196,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 6), ) ) if self.update_g2_has_attn or self.update_h2: @@ -1149,13 +1207,18 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, + seed=child_seed(seed, 7), ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) ) self.attn2_lm = LayerNorm( - g2_dim, eps=ln_eps, trainable=trainable_ln, precision=precision + g2_dim, + eps=ln_eps, + trainable=trainable_ln, + precision=precision, + seed=child_seed(seed, 9), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1164,12 +1227,13 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 10), ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) ) if self.update_style == "res_residual": self.h2_residual.append( @@ -1178,11 +1242,17 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 12), ) ) if self.update_g1_has_attn: self.loc_attn = LocalAtten( - g1_dim, attn1_hidden, attn1_nhead, self.smooth, precision=precision + g1_dim, + attn1_hidden, + attn1_nhead, + self.smooth, + precision=precision, + seed=child_seed(seed, 13), ) if self.update_style == "res_residual": self.g1_residual.append( @@ -1191,6 +1261,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 14), ) ) diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py index 1375d2265f..037222076e 100644 --- a/deepmd/dpmodel/descriptor/se_atten_v2.py +++ b/deepmd/dpmodel/descriptor/se_atten_v2.py @@ -66,7 +66,7 @@ def __init__( use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: DescrptDPA1.__init__( self, diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py index 504e357aeb..75ac11dbed 100644 --- a/deepmd/dpmodel/descriptor/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -6,6 +6,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -21,6 +22,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -158,7 +162,7 @@ def __init__( type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: del ntypes ## seed, uniform_seed, not included. @@ -189,8 +193,8 @@ def __init__( ndim=(1 if self.type_one_side else 2), network_type="embedding_network", ) - for embedding_idx in itertools.product( - range(self.ntypes), repeat=self.embeddings.ndim + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=self.embeddings.ndim) ): self.embeddings[embedding_idx] = EmbeddingNet( in_dim, @@ -198,6 +202,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=child_seed(seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index 938826d16c..20a6fe49dd 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -20,6 +21,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -115,7 +119,7 @@ def __init__( type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: del ntypes ## seed, uniform_seed, not included. @@ -156,6 +160,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=child_seed(seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py index b91f9a6c6e..72d8a24bd9 100644 --- a/deepmd/dpmodel/descriptor/se_t.py +++ b/deepmd/dpmodel/descriptor/se_t.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -20,6 +21,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -98,7 +102,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = DEFAULT_PRECISION, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input ) -> None: @@ -127,8 +131,8 @@ def __init__( ndim=2, network_type="embedding_network", ) - for embedding_idx in itertools.product( - range(self.ntypes), repeat=self.embeddings.ndim + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=self.embeddings.ndim) ): self.embeddings[embedding_idx] = EmbeddingNet( in_dim, @@ -136,6 +140,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=child_seed(self.seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index f922b57367..6932b0d1e2 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -5,6 +5,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -108,7 +109,7 @@ def __init__( type_map: Optional[List[str]] = None, old_impl=False, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): # seed, uniform_seed are not included if tot_ener_zero: diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py index 2c113c1f7d..5a5eb8840a 100644 --- a/deepmd/dpmodel/fitting/dos_fitting.py +++ b/deepmd/dpmodel/fitting/dos_fitting.py @@ -46,7 +46,7 @@ def __init__( exclude_types: List[int] = [], type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): if bias_dos is not None: self.bias_dos = bias_dos diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py index 7c262209d9..9e639c1de2 100644 --- a/deepmd/dpmodel/fitting/ener_fitting.py +++ b/deepmd/dpmodel/fitting/ener_fitting.py @@ -5,6 +5,7 @@ Any, List, Optional, + Union, ) from deepmd.dpmodel.common import ( @@ -46,7 +47,7 @@ def __init__( exclude_types: List[int] = [], type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__( var_name="energy", diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index 2f0b3c7ac6..6d45d3ac29 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -8,6 +8,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -21,6 +22,9 @@ FittingNet, NetworkCollection, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.finetune import ( get_index_between_two_maps, map_atom_exclude_types, @@ -82,6 +86,8 @@ class GeneralFitting(NativeOP, BaseFitting): length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list. type_map: List[str], Optional A list of strings. Give the name to each type of atoms. + seed: Optional[Union[int, List[int]]] + Random seed for initializing the network parameters. """ def __init__( @@ -106,6 +112,7 @@ def __init__( exclude_types: List[int] = [], remove_vaccum_contribution: Optional[List[bool]] = None, type_map: Optional[List[str]] = None, + seed: Optional[Union[int, List[int]]] = None, ): self.var_name = var_name self.ntypes = ntypes @@ -166,6 +173,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, + seed=child_seed(seed, ii), ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 67b4888c67..05f4aa349d 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -5,6 +5,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -114,7 +115,7 @@ def __init__( shift_diag: bool = True, type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): # seed, uniform_seed are not included if tot_ener_zero: diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 7a17cc459d..53a170ac4a 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -22,6 +22,9 @@ PRECISION_DICT, NativeOP, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.version import ( check_version_compatibility, ) @@ -61,6 +64,10 @@ class NativeLayer(NativeOP): The activation function of the layer. resnet : bool, optional Whether the layer is a residual layer. + precision : str, optional + The precision of the layer. + seed : int, optional + Random seed. """ def __init__( @@ -72,7 +79,7 @@ def __init__( activation_function: Optional[str] = None, resnet: bool = False, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: prec = PRECISION_DICT[precision.lower()] self.precision = precision @@ -299,6 +306,12 @@ class LayerNorm(NativeLayer): A small value added to prevent division by zero in calculations. uni_init : bool, optional If initialize the weights to be zeros and ones. + trainable : bool, optional + If the weights are trainable. + precision : str, optional + The precision of the layer. + seed : int, optional + Random seed. """ def __init__( @@ -308,7 +321,7 @@ def __init__( uni_init: bool = True, trainable: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: self.eps = eps self.uni_init = uni_init @@ -556,7 +569,8 @@ class EN(T_Network): Use time step at the resnet architecture. precision Floating point precision for the model paramters. - + seed : int, optional + Random seed. """ def __init__( @@ -566,7 +580,7 @@ def __init__( activation_function: str = "tanh", resnet_dt: bool = False, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): layers = [] i_in = in_dim @@ -581,7 +595,7 @@ def __init__( activation_function=activation_function, resnet=True, precision=precision, - seed=seed, + seed=child_seed(seed, idx), ).serialize() ) i_in = i_ot @@ -656,7 +670,8 @@ class FN(T_EmbeddingNet): Floating point precision for the model paramters. bias_out The last linear layer has bias. - + seed : int, optional + Random seed. """ def __init__( @@ -668,7 +683,7 @@ def __init__( resnet_dt: bool = False, precision: str = DEFAULT_PRECISION, bias_out: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__( in_dim, @@ -688,7 +703,7 @@ def __init__( activation_function=None, resnet=False, precision=precision, - seed=seed, + seed=child_seed(seed, len(neuron)), ) ) self.out_dim = out_dim diff --git a/deepmd/dpmodel/utils/seed.py b/deepmd/dpmodel/utils/seed.py new file mode 100644 index 0000000000..4ceab80066 --- /dev/null +++ b/deepmd/dpmodel/utils/seed.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, + Union, + overload, +) + + +@overload +def child_seed(seed: None, idx: int) -> None: ... + + +@overload +def child_seed(seed: Union[int, List[int]], idx: int) -> List[int]: ... + + +def child_seed(seed: Optional[Union[int, List[int]]], idx: int) -> Optional[List[int]]: + """Generate a child seed from a parent seed. + + Parameters + ---------- + seed + The parent seed. + idx + The index of the child seed. + + Returns + ------- + Optional[List[int]] + The child seed. + """ + # See https://numpy.org/doc/stable/reference/random/parallel.html#sequence-of-integer-seeds + if seed is None: + return None + elif isinstance(seed, int): + return [idx, seed] + elif isinstance(seed, list): + return [idx, *seed] + else: + raise TypeError(f"seed must be int or list, not {type(seed)}") diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py index 99508ea7b3..0db064fb5e 100644 --- a/deepmd/dpmodel/utils/type_embed.py +++ b/deepmd/dpmodel/utils/type_embed.py @@ -2,6 +2,7 @@ from typing import ( List, Optional, + Union, ) import numpy as np @@ -57,7 +58,7 @@ def __init__( activation_function: str = "tanh", precision: str = "default", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, padding: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, @@ -83,6 +84,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=self.seed, ) def call(self) -> np.ndarray: diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index ff29d14e1d..e6dba7f88f 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -11,6 +11,9 @@ import torch from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( NetworkCollection, ) @@ -236,7 +239,7 @@ def __init__( smooth_type_embedding: bool = True, type_one_side: bool = False, stripped_type_embedding: Optional[bool] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # not implemented @@ -286,7 +289,7 @@ def __init__( env_protection=env_protection, trainable_ln=trainable_ln, ln_eps=ln_eps, - seed=seed, + seed=child_seed(seed, 1), old_impl=old_impl, ) self.use_econf_tebd = use_econf_tebd @@ -295,7 +298,7 @@ def __init__( ntypes, tebd_dim, precision=precision, - seed=seed, + seed=child_seed(seed, 2), use_econf_tebd=use_econf_tebd, type_map=type_map, ) diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index ae8c924e9a..aa5aa11f67 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -15,6 +15,9 @@ RepinitArgs, ) from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( Identity, MLPLayer, @@ -84,7 +87,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], env_protection: float = 0.0, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, add_tebd_to_repinit_out: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, @@ -169,7 +172,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, type_one_side=self.repinit_args.type_one_side, - seed=seed, + seed=child_seed(seed, 0), ) self.repformers = DescrptBlockRepformers( self.repformer_args.rcut, @@ -204,7 +207,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, - seed=seed, + seed=child_seed(seed, 1), old_impl=old_impl, ) self.use_econf_tebd = use_econf_tebd @@ -213,7 +216,7 @@ def init_subclass_params(sub_data, sub_class): ntypes, self.repinit_args.tebd_dim, precision=precision, - seed=seed, + seed=child_seed(seed, 2), use_econf_tebd=self.use_econf_tebd, type_map=type_map, ) @@ -234,7 +237,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, init="glorot", - seed=seed, + seed=child_seed(seed, 3), ) self.tebd_transform = None if self.add_tebd_to_repinit_out: @@ -243,7 +246,7 @@ def init_subclass_params(sub_data, sub_class): self.repformers.dim_in, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 4), ) assert self.repinit.rcut > self.repformers.rcut assert self.repinit.sel[0] > self.repformers.sel[0] diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py index 3f377f9de5..85a9800c73 100644 --- a/deepmd/pt/model/descriptor/repformer_layer.py +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -2,11 +2,15 @@ from typing import ( List, Optional, + Union, ) import torch import torch.nn as nn +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.init import ( constant_, normal_, @@ -40,7 +44,7 @@ def get_residual( _mode: str = "norm", trainable: bool = True, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> torch.Tensor: r""" Get residual tensor for one update vector. @@ -156,7 +160,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -281,7 +285,7 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -291,10 +295,13 @@ def __init__( input_dim * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.head_map = MLPLayer( - input_dim * head_num, input_dim, precision=precision, seed=seed + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 1), ) self.precision = precision @@ -363,7 +370,7 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -436,7 +443,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -447,17 +454,20 @@ def __init__( hidden_dim * 1 * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.mapkv = MLPLayer( input_dim, (hidden_dim + input_dim) * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 1), ) self.head_map = MLPLayer( - input_dim * head_num, input_dim, precision=precision, seed=seed + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 2), ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -589,7 +599,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -646,12 +656,17 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) ) g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron) - self.linear1 = MLPLayer(g1_in_dim, g1_dim, precision=precision, seed=seed) + self.linear1 = MLPLayer( + g1_in_dim, + g1_dim, + precision=precision, + seed=child_seed(seed, 1), + ) self.linear2 = None self.proj_g1g2 = None self.proj_g1g1g2 = None @@ -662,7 +677,12 @@ def __init__( self.loc_attn = None if self.update_chnnl_2: - self.linear2 = MLPLayer(g2_dim, g2_dim, precision=precision, seed=seed) + self.linear2 = MLPLayer( + g2_dim, + g2_dim, + precision=precision, + seed=child_seed(seed, 2), + ) if self.update_style == "res_residual": self.g2_residual.append( get_residual( @@ -670,16 +690,24 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 3), ) ) if self.update_g1_has_conv: self.proj_g1g2 = MLPLayer( - g1_dim, g2_dim, bias=False, precision=precision, seed=seed + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 4), ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = MLPLayer( - g1_dim, g2_dim, bias=False, precision=precision, seed=seed + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 5), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -688,7 +716,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 6), ) ) if self.update_g2_has_attn or self.update_h2: @@ -699,18 +727,18 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, - seed=seed, + seed=child_seed(seed, 7), ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision, seed=seed + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) ) self.attn2_lm = LayerNorm( g2_dim, eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed, + seed=child_seed(seed, 9), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -719,13 +747,13 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 10), ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision, seed=seed + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) ) if self.update_style == "res_residual": self.h2_residual.append( @@ -734,7 +762,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 12), ) ) if self.update_g1_has_attn: @@ -744,7 +772,7 @@ def __init__( attn1_nhead, self.smooth, precision=precision, - seed=seed, + seed=child_seed(seed, 13), ) if self.update_style == "res_residual": self.g1_residual.append( @@ -753,7 +781,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 14), ) ) diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index 54fbc2d216..8653d79140 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -10,6 +10,9 @@ import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor.descriptor import ( DescriptorBlock, ) @@ -101,7 +104,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, old_impl: bool = False, ): r""" @@ -229,7 +232,9 @@ def __init__( self.seed = seed self.old_impl = old_impl - self.g2_embd = MLPLayer(1, self.g2_dim, precision=precision, seed=seed) + self.g2_embd = MLPLayer( + 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + ) layers = [] for ii in range(nlayers): if self.old_impl: @@ -291,7 +296,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, - seed=seed, + seed=child_seed(child_seed(seed, 1), ii), ) ) self.layers = torch.nn.ModuleList(layers) diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index e771c03e52..8e51b03fc2 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( DescriptorBlock, prod_env_mat, @@ -86,7 +89,7 @@ def __init__( old_impl: bool = False, type_one_side: bool = True, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ntypes: Optional[int] = None, # to be compat with input type_map: Optional[List[str]] = None, # not implemented @@ -384,7 +387,7 @@ def __init__( old_impl: bool = False, type_one_side: bool = True, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, **kwargs, ): """Construct an embedding net of type `se_a`. @@ -448,14 +451,16 @@ def __init__( filter_layers = NetworkCollection( ndim=ndim, ntypes=len(sel), network_type="embedding_network" ) - for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim): + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=ndim) + ): filter_layers[embedding_idx] = EmbeddingNet( 1, self.filter_neuron, activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index a2ab5dd9cf..a30869f24a 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -12,6 +12,9 @@ import torch.nn as nn import torch.nn.functional as torch_func +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor.descriptor import ( DescriptorBlock, ) @@ -83,7 +86,7 @@ def __init__( env_protection: float = 0.0, trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type: Optional[str] = None, old_impl: bool = False, ): @@ -227,7 +230,7 @@ def __init__( ln_eps=self.ln_eps, smooth=self.smooth, precision=self.precision, - seed=self.seed, + seed=child_seed(self.seed, 0), ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -271,7 +274,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=child_seed(self.seed, 1), ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -284,7 +287,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=child_seed(self.seed, 2), ) self.filter_layers_strip = filter_layers_strip self.stats = None @@ -605,7 +608,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention net.""" super().__init__() @@ -640,7 +643,7 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, - seed=seed, + seed=child_seed(seed, i), ) ) self.attention_layers = nn.ModuleList(attention_layers) @@ -748,7 +751,7 @@ def __init__( trainable_ln: bool = True, ln_eps: float = 1e-5, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention layer.""" super().__init__() @@ -775,14 +778,14 @@ def __init__( temperature=temperature, smooth=smooth, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.attn_layer_norm = LayerNorm( self.embed_dim, eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed, + seed=child_seed(seed, 1), ) def forward( @@ -855,7 +858,7 @@ def __init__( bias: bool = True, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -887,7 +890,7 @@ def __init__( bavg=0.0, stddev=1.0, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.out_proj = MLPLayer( hidden_dim, @@ -897,7 +900,7 @@ def __init__( bavg=0.0, stddev=1.0, precision=precision, - seed=seed, + seed=child_seed(seed, 1), ) def forward( diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py index 3b350ded98..8fc6271665 100644 --- a/deepmd/pt/model/descriptor/se_atten_v2.py +++ b/deepmd/pt/model/descriptor/se_atten_v2.py @@ -66,7 +66,7 @@ def __init__( ln_eps: Optional[float] = 1e-5, type_one_side: bool = False, stripped_type_embedding: Optional[bool] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # not implemented diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index e6ebe53c26..d27aba5725 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -12,6 +12,9 @@ import torch from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( prod_env_mat, ) @@ -70,7 +73,7 @@ def __init__( env_protection: float = 0.0, old_impl: bool = False, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, **kwargs, ): @@ -120,7 +123,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py index caa4c9ce45..2fe5c16059 100644 --- a/deepmd/pt/model/descriptor/se_t.py +++ b/deepmd/pt/model/descriptor/se_t.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( DescriptorBlock, prod_env_mat, @@ -118,7 +121,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = "float64", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # not implemented @@ -406,7 +409,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = "float64", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): r"""Construct an embedding net of type `se_e3`. @@ -474,14 +477,16 @@ def __init__( filter_layers = NetworkCollection( ndim=ndim, ntypes=len(sel), network_type="embedding_network" ) - for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim): + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=ndim) + ): filter_layers[embedding_idx] = EmbeddingNet( 1, self.filter_neuron, activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/network/layernorm.py b/deepmd/pt/model/network/layernorm.py index 385bbaf270..f5cd6b965f 100644 --- a/deepmd/pt/model/network/layernorm.py +++ b/deepmd/pt/model/network/layernorm.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + List, Optional, + Union, ) import numpy as np @@ -43,7 +45,7 @@ def __init__( stddev: float = 1.0, precision: str = DEFAULT_PRECISION, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.eps = eps diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index dada7ed1b8..090d64fbcf 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -2,7 +2,9 @@ from typing import ( ClassVar, Dict, + List, Optional, + Union, ) import numpy as np @@ -81,7 +83,7 @@ def __init__( stddev: float = 1.0, precision: str = DEFAULT_PRECISION, init: str = "default", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() # only use_timestep when skip connection is established. diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py index 0475c35750..0879daf6ec 100644 --- a/deepmd/pt/model/network/network.py +++ b/deepmd/pt/model/network/network.py @@ -2,6 +2,7 @@ from typing import ( List, Optional, + Union, ) import numpy as np @@ -570,7 +571,7 @@ def __init__( bavg=0.0, stddev=1.0, precision="default", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd=False, type_map=None, ): @@ -667,7 +668,7 @@ def __init__( activation_function: str = "tanh", precision: str = "default", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, padding: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py index 917af1bdcc..782af05d3c 100644 --- a/deepmd/pt/model/task/dipole.py +++ b/deepmd/pt/model/task/dipole.py @@ -87,7 +87,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], r_differentiable: bool = True, c_differentiable: bool = True, diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py index c6a533ce7e..c78b589939 100644 --- a/deepmd/pt/model/task/dos.py +++ b/deepmd/pt/model/task/dos.py @@ -52,7 +52,7 @@ def __init__( rcond: Optional[float] = None, bias_dos: Optional[torch.Tensor] = None, trainable: Union[bool, List[bool]] = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, exclude_types: List[int] = [], diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 6db937f72c..e129221a2a 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -55,7 +56,7 @@ def __init__( activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, **kwargs, ): diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 0ca2c5c896..95242eb67c 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( FittingNet, NetworkCollection, @@ -143,7 +146,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], trainable: Union[bool, List[bool]] = True, remove_vaccum_contribution: Optional[List[bool]] = None, @@ -238,7 +241,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, - seed=seed, + seed=child_seed(self.seed, ii), ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index 2a8aab9734..ac0c4d59e5 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -4,6 +4,7 @@ from typing import ( List, Optional, + Union, ) import torch @@ -95,7 +96,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], atom_ener: Optional[List[Optional[torch.Tensor]]] = None, type_map: Optional[List[str]] = None, diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index 66120a1523..a4592a8890 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -91,7 +91,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], fit_diag: bool = True, scale: Optional[Union[List[float], float]] = None, diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index 86cede347a..9ccdbfdb5d 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + List, Optional, + Union, overload, ) @@ -113,8 +115,48 @@ def dict_to_device(sample_dict): sample_dict[key] = sample_dict[key].to(DEVICE) -def get_generator(seed: Optional[int] = None) -> Optional[torch.Generator]: +# https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L58-L63 +INIT_A = 0x43B0D7E5 +MULT_A = 0x931E8875 +MIX_MULT_L = 0xCA01F9DD +MIX_MULT_R = 0x4973F715 +XSHIFT = 16 + + +def hashmix(value: int, hash_const: List[int]): + value ^= INIT_A + hash_const[0] *= MULT_A + value *= INIT_A + # prevent overflow + hash_const[0] &= 0xFFFF_FFFF_FFFF_FFFF + value &= 0xFFFF_FFFF_FFFF_FFFF + value ^= value >> XSHIFT + return value + + +def mix(x: int, y: int): + result = MIX_MULT_L * x - MIX_MULT_R * y + # prevent overflow + result &= 0xFFFF_FFFF_FFFF_FFFF + result ^= result >> XSHIFT + return result + + +def mix_entropy(entropy_array: List[int]) -> int: + # https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L341-L374 + hash_const = [INIT_A] + mixer = hashmix(entropy_array[0], hash_const) + for i_src in range(1, len(entropy_array)): + mixer = mix(mixer, hashmix(entropy_array[i_src], hash_const)) + return mixer + + +def get_generator( + seed: Optional[Union[int, List[int]]] = None, +) -> Optional[torch.Generator]: if seed is not None: + if isinstance(seed, list): + seed = mix_entropy(seed) generator = torch.Generator(device=DEVICE) generator.manual_seed(seed) return generator diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py index babec2d68e..1248e3cfc8 100644 --- a/deepmd/tf/descriptor/se_a.py +++ b/deepmd/tf/descriptor/se_a.py @@ -1036,6 +1036,8 @@ def _filter_lower( mixed_prec=self.mixed_prec, ) net_output = tf.nn.embedding_lookup(net_output, idx) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift net_output = tf.reshape(net_output, [-1, self.filter_neuron[-1]]) else: xyz_scatter = self._concat_type_embedding( @@ -1047,7 +1049,7 @@ def _filter_lower( ) # natom x 4 x outputs_size if nvnmd_cfg.enable: - return filter_lower_R42GR( + oo = filter_lower_R42GR( type_i, type_input, inputs_i, @@ -1065,6 +1067,9 @@ def _filter_lower( self.filter_resnet_dt, self.embedding_net_variables, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift + return oo if self.compress and (not is_exclude): if self.stripped_type_embedding: net_output = tf.nn.embedding_lookup( diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py index 312a7481ba..de652bc91d 100644 --- a/deepmd/tf/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -63,6 +63,7 @@ ) from deepmd.tf.utils.network import ( embedding_net, + embedding_net_rand_seed_shift, layernorm, one_layer, ) @@ -997,6 +998,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 K_c = one_layer( input_xyz, self.att_n, @@ -1010,6 +1013,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 V_c = one_layer( input_xyz, self.att_n, @@ -1023,6 +1028,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 # # natom x nei_type_i x out_size # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])) # natom x nei_type_i x att_n @@ -1055,6 +1062,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 input_xyz = layernorm( input_xyz, outputs_size[-1], @@ -1068,6 +1077,8 @@ def _attention_layers( eps=self.ln_eps, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 return input_xyz def _filter_lower( @@ -1125,6 +1136,8 @@ def _filter_lower( initial_variables=self.embedding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift else: if self.attn_layer == 0: log.info( @@ -1164,6 +1177,8 @@ def _filter_lower( initial_variables=self.embedding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift else: net = "filter_net" info = [ @@ -1221,6 +1236,8 @@ def _filter_lower( initial_variables=self.two_side_embeeding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift two_embd = tf.nn.embedding_lookup( embedding_of_two_side_type_embedding, index_of_two_side ) @@ -1239,8 +1256,6 @@ def _filter_lower( is_sorted=len(self.exclude_types) == 0, ) - if (not self.uniform_seed) and (self.seed is not None): - self.seed += self.seed_shift input_r = tf.slice( tf.reshape(inputs_i, (-1, shape_i[1] // 4, 4)), [0, 0, 1], [-1, -1, 3] ) @@ -2163,6 +2178,7 @@ def __init__( use_econf_tebd=use_econf_tebd, type_map=type_map, # precision=precision, + seed=seed, ) self.concat_output_tebd = concat_output_tebd if self.tebd_input_mode in ["concat"]: @@ -2185,6 +2201,8 @@ def build( suffix: str = "", ) -> tf.Tensor: type_embedding = self.type_embedding.build(self.ntypes, suffix=suffix) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += embedding_net_rand_seed_shift([self.tebd_dim]) input_dict["type_embedding"] = type_embedding # nf x nloc x out_dim diff --git a/source/tests/tf/test_model_se_a_ebd_v2.py b/source/tests/tf/test_model_se_a_ebd_v2.py index 1aa4fdb92b..ffa558da35 100644 --- a/source/tests/tf/test_model_se_a_ebd_v2.py +++ b/source/tests/tf/test_model_se_a_ebd_v2.py @@ -139,37 +139,37 @@ def test_model(self): f = f.reshape([-1]) v = v.reshape([-1]) - refe = [5.435394596262052014e-01] + refe = [6.100037044296185e-01] reff = [ - 6.583728125594628944e-02, - 7.228993116083935744e-02, - 1.971543579114074483e-03, - 6.567474563776359853e-02, - 7.809421727465599983e-02, - -4.866958849094786890e-03, - -8.670511901715304004e-02, - 3.525374157021862048e-02, - 1.415748959800727487e-03, - 6.375813001810648473e-02, - -1.139053242798149790e-01, - -4.178593754384440744e-03, - -1.471737787218250215e-01, - 4.189712704724830872e-02, - 7.011731363309440038e-03, - 3.860874082716164030e-02, - -1.136296927731473005e-01, - -1.353471298745012206e-03, + 8.448651008616304e-02, + 8.613568658155157e-02, + 4.377711655236228e-03, + 9.264613309788312e-02, + 9.351200240060925e-02, + -6.743918515275118e-03, + -1.268078358219972e-01, + 4.855965861982662e-02, + 1.361334787979757e-04, + 4.193213089916692e-02, + -1.324120032345251e-01, + -4.507320444374342e-03, + -1.314595297986654e-01, + 4.120567370248839e-02, + 7.896917575801866e-03, + 3.920259153744955e-02, + -1.370010180699507e-01, + -1.159523750186610e-03, ] refv = [ - -4.243979601186427253e-01, - 1.097173849143971286e-01, - 1.227299373463585502e-02, - 1.097173849143970314e-01, - -2.462891443164323124e-01, - -5.711664180530139426e-03, - 1.227299373463585502e-02, - -5.711664180530143763e-03, - -6.217348853341628408e-04, + -0.277134219204478, + 0.088897922530779, + 0.008633318264458, + 0.088897922530779, + -0.292191560546969, + -0.005709595520904, + 0.008633318264458, + -0.005709595520904, + -0.000682136341924, ] refe = np.reshape(refe, [-1]) reff = np.reshape(reff, [-1]) diff --git a/source/tests/tf/test_pairwise_dprc.py b/source/tests/tf/test_pairwise_dprc.py index 3a5836ce45..22c73beb3b 100644 --- a/source/tests/tf/test_pairwise_dprc.py +++ b/source/tests/tf/test_pairwise_dprc.py @@ -522,8 +522,8 @@ def test_model_ener(self): # the model is pairwise! self.assertAllClose(e[1] + e[2] + e[3] - 3 * e[0], e[4] - e[0]) self.assertAllClose(f[1] + f[2] + f[3] - 3 * f[0], f[4] - f[0]) - self.assertAllClose(e[0], 0.189075, 1e-6) - self.assertAllClose(f[0, 0], 0.060047, 1e-6) + self.assertAllClose(e[0], 4.82969, 1e-6) + self.assertAllClose(f[0, 0], -0.104339, 1e-6) # test input requirement for the model self.assertCountEqual(