From a6b61b9b9913eef4ffec2d047e55015dd811b259 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:12:12 +0800 Subject: [PATCH] feat(pt/dp): support case embedding and sharable fitting (#4417) ## Summary by CodeRabbit - **New Features** - Introduced a `set_case_embd` method across multiple atomic model classes to enhance case embedding functionality. - Added a `dim_case_embd` parameter in various fitting classes to support case-specific embedding dimensions. - Updated serialization methods to include `dim_case_embd` in the output. - Added a comprehensive JSON configuration for multitask models in water simulations. - Introduced a new function to validate case embedding dimensions in multi-task training. - Updated the `share_params` method in the `DescrptDPA2` class to streamline parameter sharing logic. - **Bug Fixes** - Improved version compatibility checks in deserialization methods across several classes. - **Documentation** - Enhanced documentation for multi-task training, emphasizing the transition to PyTorch and detailing configuration changes. - **Tests** - Updated test cases to incorporate new parameters and configurations related to case embeddings. - Introduced new tests for multitask learning configurations. --------- Signed-off-by: Duo <50307526+iProzd@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../dpmodel/atomic_model/dp_atomic_model.py | 7 + .../atomic_model/linear_atomic_model.py | 16 ++ .../atomic_model/make_base_atomic_model.py | 8 + .../atomic_model/pairtab_atomic_model.py | 9 + deepmd/dpmodel/fitting/dipole_fitting.py | 4 +- deepmd/dpmodel/fitting/dos_fitting.py | 4 +- deepmd/dpmodel/fitting/ener_fitting.py | 4 +- deepmd/dpmodel/fitting/general_fitting.py | 35 +++- deepmd/dpmodel/fitting/invar_fitting.py | 4 +- .../dpmodel/fitting/polarizability_fitting.py | 6 +- deepmd/dpmodel/fitting/property_fitting.py | 4 +- deepmd/dpmodel/model/make_model.py | 3 + .../pd/model/atomic_model/dp_atomic_model.py | 7 + deepmd/pd/model/model/make_model.py | 3 + deepmd/pd/model/task/ener.py | 4 +- deepmd/pd/model/task/fitting.py | 23 ++- deepmd/pd/model/task/invar_fitting.py | 7 +- .../pt/model/atomic_model/dp_atomic_model.py | 7 + .../model/atomic_model/linear_atomic_model.py | 16 ++ .../atomic_model/pairtab_atomic_model.py | 9 + deepmd/pt/model/descriptor/dpa2.py | 19 +-- deepmd/pt/model/model/make_model.py | 3 + deepmd/pt/model/task/dipole.py | 6 +- deepmd/pt/model/task/dos.py | 4 +- deepmd/pt/model/task/ener.py | 4 +- deepmd/pt/model/task/fitting.py | 53 +++++- deepmd/pt/model/task/invar_fitting.py | 6 +- deepmd/pt/model/task/polarizability.py | 8 +- deepmd/pt/model/task/property.py | 6 +- deepmd/pt/train/training.py | 32 +++- deepmd/pt/train/wrapper.py | 12 +- deepmd/tf/fit/dipole.py | 11 +- deepmd/tf/fit/dos.py | 12 +- deepmd/tf/fit/ener.py | 12 +- deepmd/tf/fit/polar.py | 11 +- deepmd/utils/argcheck.py | 40 +++++ doc/train/multi-task-training.md | 21 ++- .../pytorch_example/input_torch_sharefit.json | 155 ++++++++++++++++++ source/tests/common/test_examples.py | 1 + source/tests/pt/model/water/multitask.json | 3 +- .../pt/model/water/multitask_sharefit.json | 134 +++++++++++++++ source/tests/pt/test_multitask.py | 55 ++++++- .../universal/dpmodel/fitting/test_fitting.py | 15 +- 43 files changed, 732 insertions(+), 71 deletions(-) create mode 100644 examples/water_multi_task/pytorch_example/input_torch_sharefit.json create mode 100644 source/tests/pt/model/water/multitask_sharefit.json diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py index 749fe6bbf9..2fa072cc78 100644 --- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py @@ -65,6 +65,13 @@ def get_sel(self) -> list[int]: """Get the neighbor selection.""" return self.descriptor.get_sel() + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + self.fitting.set_case_embd(case_idx) + def mixed_types(self) -> bool: """If true, the model 1. assumes total number of atoms aligned across frames; diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py index 9676b34bfd..8108292bd2 100644 --- a/deepmd/dpmodel/atomic_model/linear_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py @@ -134,6 +134,14 @@ def get_model_rcuts(self) -> list[float]: def get_sel(self) -> list[int]: return [max([model.get_nsel() for model in self.models])] + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + for model in self.models: + model.set_case_embd(case_idx) + def get_model_nsels(self) -> list[int]: """Get the processed sels for each individual models. Not distinguishing types.""" return [model.get_nsel() for model in self.models] @@ -428,6 +436,14 @@ def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel": data.pop("type", None) return super().deserialize(data) + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + # only set case_idx for dpmodel + self.models[0].set_case_embd(case_idx) + def _compute_weight( self, extended_coord: np.ndarray, diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py index a4c38518a3..01caa7cd64 100644 --- a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py @@ -68,6 +68,14 @@ def get_sel(self) -> list[int]: """Returns the number of selected atoms for each type.""" pass + @abstractmethod + def set_case_embd(self, case_idx: int) -> None: + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + pass + def get_nsel(self) -> int: """Returns the total number of selected neighboring atoms in the cut-off radius.""" return sum(self.get_sel()) diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py index a4bffe508d..0c35320e7f 100644 --- a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py @@ -120,6 +120,15 @@ def get_type_map(self) -> list[str]: def get_sel(self) -> list[int]: return [self.sel] + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + raise NotImplementedError( + "Case identification not supported for PairTabAtomicModel!" + ) + def get_nsel(self) -> int: return self.sel diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index c872ef0555..fcaea43338 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -95,6 +95,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, rcond: Optional[float] = None, tot_ener_zero: bool = False, trainable: Optional[list[bool]] = None, @@ -130,6 +131,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, tot_ener_zero=tot_ener_zero, trainable=trainable, @@ -159,7 +161,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) var_name = data.pop("var_name", None) assert var_name == "dipole" return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py index b4b1ee3cb2..2f6df77eac 100644 --- a/deepmd/dpmodel/fitting/dos_fitting.py +++ b/deepmd/dpmodel/fitting/dos_fitting.py @@ -36,6 +36,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, bias_dos: Optional[np.ndarray] = None, rcond: Optional[float] = None, trainable: Union[bool, list[bool]] = True, @@ -60,6 +61,7 @@ def __init__( bias_atom=bias_dos, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, trainable=trainable, activation_function=activation_function, @@ -73,7 +75,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data["numb_dos"] = data.pop("dim_out") data.pop("tot_ener_zero", None) data.pop("var_name", None) diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py index 53bedb4cec..6435b6468f 100644 --- a/deepmd/dpmodel/fitting/ener_fitting.py +++ b/deepmd/dpmodel/fitting/ener_fitting.py @@ -32,6 +32,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, rcond: Optional[float] = None, tot_ener_zero: bool = False, trainable: Optional[list[bool]] = None, @@ -55,6 +56,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, tot_ener_zero=tot_ener_zero, trainable=trainable, @@ -73,7 +75,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index 2958a7d18d..c05d84c4a1 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -105,6 +105,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, bias_atom_e: Optional[np.ndarray] = None, rcond: Optional[float] = None, tot_ener_zero: bool = False, @@ -127,6 +128,7 @@ def __init__( self.resnet_dt = resnet_dt self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd self.rcond = rcond self.tot_ener_zero = tot_ener_zero self.trainable = trainable @@ -171,11 +173,16 @@ def __init__( self.aparam_inv_std = np.ones(self.numb_aparam, dtype=self.prec) else: self.aparam_avg, self.aparam_inv_std = None, None + if self.dim_case_embd > 0: + self.case_embd = np.zeros(self.dim_case_embd, dtype=self.prec) + else: + self.case_embd = None # init networks in_dim = ( self.dim_descrpt + self.numb_fparam + (0 if self.use_aparam_as_mask else self.numb_aparam) + + self.dim_case_embd ) self.nets = NetworkCollection( 1 if not self.mixed_types else 0, @@ -222,6 +229,13 @@ def get_type_map(self) -> list[str]: """Get the name to each type of atoms.""" return self.type_map + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this fitting net by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + self.case_embd = np.eye(self.dim_case_embd, dtype=self.prec)[case_idx] + def change_type_map( self, type_map: list[str], model_with_new_type_stat=None ) -> None: @@ -255,6 +269,8 @@ def __setitem__(self, key, value) -> None: self.aparam_avg = value elif key in ["aparam_inv_std"]: self.aparam_inv_std = value + elif key in ["case_embd"]: + self.case_embd = value elif key in ["scale"]: self.scale = value else: @@ -271,6 +287,8 @@ def __getitem__(self, key): return self.aparam_avg elif key in ["aparam_inv_std"]: return self.aparam_inv_std + elif key in ["case_embd"]: + return self.case_embd elif key in ["scale"]: return self.scale else: @@ -287,7 +305,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 2, + "@version": 3, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -295,6 +313,7 @@ def serialize(self) -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "rcond": self.rcond, "activation_function": self.activation_function, "precision": self.precision, @@ -303,6 +322,7 @@ def serialize(self) -> dict: "nets": self.nets.serialize(), "@variables": { "bias_atom_e": to_numpy_array(self.bias_atom_e), + "case_embd": to_numpy_array(self.case_embd), "fparam_avg": to_numpy_array(self.fparam_avg), "fparam_inv_std": to_numpy_array(self.fparam_inv_std), "aparam_avg": to_numpy_array(self.aparam_avg), @@ -423,6 +443,19 @@ def _call_common( axis=-1, ) + if self.dim_case_embd > 0: + assert self.case_embd is not None + case_embd = xp.tile(xp.reshape(self.case_embd, [1, 1, -1]), [nf, nloc, 1]) + xx = xp.concat( + [xx, case_embd], + axis=-1, + ) + if xx_zeros is not None: + xx_zeros = xp.concat( + [xx_zeros, case_embd], + axis=-1, + ) + # calculate the prediction if not self.mixed_types: outs = xp.zeros( diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py index 219589d9ee..b5d3a02d86 100644 --- a/deepmd/dpmodel/fitting/invar_fitting.py +++ b/deepmd/dpmodel/fitting/invar_fitting.py @@ -123,6 +123,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, bias_atom: Optional[np.ndarray] = None, rcond: Optional[float] = None, tot_ener_zero: bool = False, @@ -155,6 +156,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, bias_atom_e=bias_atom, tot_ener_zero=tot_ener_zero, @@ -183,7 +185,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) return super().deserialize(data) def _net_out_dim(self): diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 021359a96e..0db6a23377 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -101,6 +101,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, rcond: Optional[float] = None, tot_ener_zero: bool = False, trainable: Optional[list[bool]] = None, @@ -150,6 +151,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, tot_ener_zero=tot_ener_zero, trainable=trainable, @@ -187,7 +189,7 @@ def __getitem__(self, key): def serialize(self) -> dict: data = super().serialize() data["type"] = "polar" - data["@version"] = 3 + data["@version"] = 4 data["embedding_width"] = self.embedding_width data["fit_diag"] = self.fit_diag data["shift_diag"] = self.shift_diag @@ -198,7 +200,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) var_name = data.pop("var_name", None) assert var_name == "polar" return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py index 18a56e3bf9..8b903af00e 100644 --- a/deepmd/dpmodel/fitting/property_fitting.py +++ b/deepmd/dpmodel/fitting/property_fitting.py @@ -78,6 +78,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -99,6 +100,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, rcond=rcond, trainable=trainable, activation_function=activation_function, @@ -111,7 +113,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "PropertyFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version"), 2, 1) + check_version_compatibility(data.pop("@version"), 3, 1) data.pop("dim_out") data.pop("var_name") data.pop("tot_ener_zero") diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py index 70ddbe09b8..ccad72c6a5 100644 --- a/deepmd/dpmodel/model/make_model.py +++ b/deepmd/dpmodel/model/make_model.py @@ -552,6 +552,9 @@ def serialize(self) -> dict: def deserialize(cls, data) -> "CM": return cls(atomic_model_=T_AtomicModel.deserialize(data)) + def set_case_embd(self, case_idx: int): + self.atomic_model.set_case_embd(case_idx) + def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" return self.atomic_model.get_dim_fparam() diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py index 47b881e0cc..25a0f89d77 100644 --- a/deepmd/pd/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pd/model/atomic_model/dp_atomic_model.py @@ -139,6 +139,13 @@ def get_sel(self) -> list[int]: """Get the neighbor selection.""" return self.sel + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + self.fitting_net.set_case_embd(case_idx) + def mixed_types(self) -> bool: """If true, the model 1. assumes total number of atoms aligned across frames; diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py index 67b46d4d87..d5c5c6bd41 100644 --- a/deepmd/pd/model/model/make_model.py +++ b/deepmd/pd/model/model/make_model.py @@ -516,6 +516,9 @@ def serialize(self) -> dict: def deserialize(cls, data) -> "CM": return cls(atomic_model_=T_AtomicModel.deserialize(data)) + def set_case_embd(self, case_idx: int): + self.atomic_model.set_case_embd(case_idx) + def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" return self.atomic_model.get_dim_fparam() diff --git a/deepmd/pd/model/task/ener.py b/deepmd/pd/model/task/ener.py index ed0cfac69d..789ef75066 100644 --- a/deepmd/pd/model/task/ener.py +++ b/deepmd/pd/model/task/ener.py @@ -42,6 +42,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -59,6 +60,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -70,7 +72,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py index 9008ef8af3..375cf834cc 100644 --- a/deepmd/pd/model/task/fitting.py +++ b/deepmd/pd/model/task/fitting.py @@ -103,6 +103,9 @@ class GeneralFitting(Fitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + (Not supported yet) + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -140,6 +143,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -161,6 +165,10 @@ def __init__( self.resnet_dt = resnet_dt self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd + if dim_case_embd > 0: + raise ValueError("dim_case_embd is not supported yet in PaddlePaddle.") + self.case_embd = None self.activation_function = activation_function self.precision = precision self.prec = PRECISION_DICT[self.precision] @@ -274,7 +282,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 2, + "@version": 3, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -282,6 +290,7 @@ def serialize(self) -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "activation_function": self.activation_function, "precision": self.precision, "mixed_types": self.mixed_types, @@ -290,6 +299,7 @@ def serialize(self) -> dict: "exclude_types": self.exclude_types, "@variables": { "bias_atom_e": to_numpy_array(self.bias_atom_e), + "case_embd": None, "fparam_avg": to_numpy_array(self.fparam_avg), "fparam_inv_std": to_numpy_array(self.fparam_inv_std), "aparam_avg": to_numpy_array(self.aparam_avg), @@ -349,6 +359,13 @@ def get_type_map(self) -> list[str]: """Get the name to each type of atoms.""" return self.type_map + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this fitting net by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + raise NotImplementedError("set_case_embd is not supported yet in PaddlePaddle.") + def __setitem__(self, key, value): if key in ["bias_atom_e"]: value = value.reshape([self.ntypes, self._net_out_dim()]) @@ -361,6 +378,8 @@ def __setitem__(self, key, value): self.aparam_avg = value elif key in ["aparam_inv_std"]: self.aparam_inv_std = value + elif key in ["case_embd"]: + self.case_embd = value elif key in ["scale"]: self.scale = value else: @@ -377,6 +396,8 @@ def __getitem__(self, key): return self.aparam_avg elif key in ["aparam_inv_std"]: return self.aparam_inv_std + elif key in ["case_embd"]: + return self.case_embd elif key in ["scale"]: return self.scale else: diff --git a/deepmd/pd/model/task/invar_fitting.py b/deepmd/pd/model/task/invar_fitting.py index b366fc1d2e..b92c862dc8 100644 --- a/deepmd/pd/model/task/invar_fitting.py +++ b/deepmd/pd/model/task/invar_fitting.py @@ -57,6 +57,9 @@ class InvarFitting(GeneralFitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + (Not supported yet) + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -92,6 +95,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -114,6 +118,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -142,7 +147,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) return super().deserialize(data) def output_def(self) -> FittingOutputDef: diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index 2cdc97f934..c988d63213 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -93,6 +93,13 @@ def get_sel(self) -> list[int]: """Get the neighbor selection.""" return self.sel + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + self.fitting_net.set_case_embd(case_idx) + def mixed_types(self) -> bool: """If true, the model 1. assumes total number of atoms aligned across frames; diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py index 3a6abccdf6..36c636ddfb 100644 --- a/deepmd/pt/model/atomic_model/linear_atomic_model.py +++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py @@ -158,6 +158,14 @@ def get_model_rcuts(self) -> list[float]: def get_sel(self) -> list[int]: return [max([model.get_nsel() for model in self.models])] + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + for model in self.models: + model.set_case_embd(case_idx) + def get_model_nsels(self) -> list[int]: """Get the processed sels for each individual models. Not distinguishing types.""" return [model.get_nsel() for model in self.models] @@ -561,6 +569,14 @@ def serialize(self) -> dict: ) return dd + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + # only set case_idx for dpmodel + self.models[0].set_case_embd(case_idx) + @classmethod def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel": data = data.copy() diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py index 0d3b2c0c41..62b47afb32 100644 --- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py +++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py @@ -141,6 +141,15 @@ def get_type_map(self) -> list[str]: def get_sel(self) -> list[int]: return [self.sel] + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this atomic model by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + raise NotImplementedError( + "Case identification not supported for PairTabAtomicModel!" + ) + def get_nsel(self) -> int: return self.sel diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index ebad588e32..c8e430960b 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -403,25 +403,8 @@ def share_params(self, base_class, shared_level, resume=False) -> None: ] self.repformers.share_params(base_class.repformers, 0, resume=resume) # shared_level: 1 - # share all parameters in type_embedding and repinit - elif shared_level == 1: - self._modules["type_embedding"] = base_class._modules["type_embedding"] - self.repinit.share_params(base_class.repinit, 0, resume=resume) - if self.use_three_body: - self.repinit_three_body.share_params( - base_class.repinit_three_body, 0, resume=resume - ) - # shared_level: 2 - # share all parameters in type_embedding and repformers - elif shared_level == 2: - self._modules["type_embedding"] = base_class._modules["type_embedding"] - self._modules["g1_shape_tranform"] = base_class._modules[ - "g1_shape_tranform" - ] - self.repformers.share_params(base_class.repformers, 0, resume=resume) - # shared_level: 3 # share all parameters in type_embedding - elif shared_level == 3: + elif shared_level == 1: self._modules["type_embedding"] = base_class._modules["type_embedding"] # Other shared levels else: diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index 83abf9ee4a..472eae5329 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -514,6 +514,9 @@ def serialize(self) -> dict: def deserialize(cls, data) -> "CM": return cls(atomic_model_=T_AtomicModel.deserialize(data)) + def set_case_embd(self, case_idx: int): + self.atomic_model.set_case_embd(case_idx) + @torch.jit.export def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py index c2db53288a..65b64220ae 100644 --- a/deepmd/pt/model/task/dipole.py +++ b/deepmd/pt/model/task/dipole.py @@ -51,6 +51,8 @@ class DipoleFittingNet(GeneralFitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -81,6 +83,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -103,6 +106,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -128,7 +132,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("var_name", None) return super().deserialize(data) diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py index a71117e587..568ef81c92 100644 --- a/deepmd/pt/model/task/dos.py +++ b/deepmd/pt/model/task/dos.py @@ -47,6 +47,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, rcond: Optional[float] = None, bias_dos: Optional[torch.Tensor] = None, trainable: Union[bool, list[bool]] = True, @@ -73,6 +74,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -99,7 +101,7 @@ def output_def(self) -> FittingOutputDef: @classmethod def deserialize(cls, data: dict) -> "DOSFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("@class", None) data.pop("var_name", None) data.pop("tot_ener_zero", None) diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 543d987e31..07351b33f6 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -50,6 +50,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -67,6 +68,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -78,7 +80,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index fb0954979e..2486ab576f 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -64,14 +64,7 @@ def share_params(self, base_class, shared_level, resume=False) -> None: self.__class__ == base_class.__class__ ), "Only fitting nets of the same type can share params!" if shared_level == 0: - # link buffers - if hasattr(self, "bias_atom_e"): - self.bias_atom_e = base_class.bias_atom_e - # the following will successfully link all the params except buffers, which need manually link. - for item in self._modules: - self._modules[item] = base_class._modules[item] - elif shared_level == 1: - # only not share the bias_atom_e + # only not share the bias_atom_e and the case_embd # the following will successfully link all the params except buffers, which need manually link. for item in self._modules: self._modules[item] = base_class._modules[item] @@ -102,6 +95,8 @@ class GeneralFitting(Fitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -139,6 +134,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -160,6 +156,7 @@ def __init__( self.resnet_dt = resnet_dt self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd self.activation_function = activation_function self.precision = precision self.prec = PRECISION_DICT[self.precision] @@ -211,10 +208,20 @@ def __init__( else: self.aparam_avg, self.aparam_inv_std = None, None + if self.dim_case_embd > 0: + self.register_buffer( + "case_embd", + torch.zeros(self.dim_case_embd, dtype=self.prec, device=device), + # torch.eye(self.dim_case_embd, dtype=self.prec, device=device)[0], + ) + else: + self.case_embd = None + in_dim = ( self.dim_descrpt + self.numb_fparam + (0 if self.use_aparam_as_mask else self.numb_aparam) + + self.dim_case_embd ) self.filter_layers = NetworkCollection( @@ -274,7 +281,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 2, + "@version": 3, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -282,6 +289,7 @@ def serialize(self) -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "activation_function": self.activation_function, "precision": self.precision, "mixed_types": self.mixed_types, @@ -290,6 +298,7 @@ def serialize(self) -> dict: "exclude_types": self.exclude_types, "@variables": { "bias_atom_e": to_numpy_array(self.bias_atom_e), + "case_embd": to_numpy_array(self.case_embd), "fparam_avg": to_numpy_array(self.fparam_avg), "fparam_inv_std": to_numpy_array(self.fparam_inv_std), "aparam_avg": to_numpy_array(self.aparam_avg), @@ -349,6 +358,15 @@ def get_type_map(self) -> list[str]: """Get the name to each type of atoms.""" return self.type_map + def set_case_embd(self, case_idx: int): + """ + Set the case embedding of this fitting net by the given case_idx, + typically concatenated with the output of the descriptor and fed into the fitting net. + """ + self.case_embd = torch.eye(self.dim_case_embd, dtype=self.prec, device=device)[ + case_idx + ] + def __setitem__(self, key, value) -> None: if key in ["bias_atom_e"]: value = value.view([self.ntypes, self._net_out_dim()]) @@ -361,6 +379,8 @@ def __setitem__(self, key, value) -> None: self.aparam_avg = value elif key in ["aparam_inv_std"]: self.aparam_inv_std = value + elif key in ["case_embd"]: + self.case_embd = value elif key in ["scale"]: self.scale = value else: @@ -377,6 +397,8 @@ def __getitem__(self, key): return self.aparam_avg elif key in ["aparam_inv_std"]: return self.aparam_inv_std + elif key in ["case_embd"]: + return self.case_embd elif key in ["scale"]: return self.scale else: @@ -475,6 +497,19 @@ def _forward_common( dim=-1, ) + if self.dim_case_embd > 0: + assert self.case_embd is not None + case_embd = torch.tile(self.case_embd.reshape([1, 1, -1]), [nf, nloc, 1]) + xx = torch.cat( + [xx, case_embd], + dim=-1, + ) + if xx_zeros is not None: + xx_zeros = torch.cat( + [xx_zeros, case_embd], + dim=-1, + ) + outs = torch.zeros( (nf, nloc, net_dim_out), dtype=self.prec, diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index 2579f5b9da..b1599eac60 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -56,6 +56,8 @@ class InvarFitting(GeneralFitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -91,6 +93,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -113,6 +116,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -141,7 +145,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) return super().deserialize(data) def output_def(self) -> FittingOutputDef: diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index 8e07896e38..d9a421d635 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -53,6 +53,8 @@ class PolarFittingNet(GeneralFitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -85,6 +87,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -128,6 +131,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -191,7 +195,7 @@ def change_type_map( def serialize(self) -> dict: data = super().serialize() data["type"] = "polar" - data["@version"] = 3 + data["@version"] = 4 data["embedding_width"] = self.embedding_width data["fit_diag"] = self.fit_diag data["shift_diag"] = self.shift_diag @@ -202,7 +206,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("var_name", None) return super().deserialize(data) diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py index 1c2b9e7c9c..dec0f1447b 100644 --- a/deepmd/pt/model/task/property.py +++ b/deepmd/pt/model/task/property.py @@ -60,6 +60,8 @@ class PropertyFittingNet(InvarFitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + dim_case_embd : int + Dimension of case specific embedding. activation_function : str Activation function. precision : str @@ -83,6 +85,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, @@ -102,6 +105,7 @@ def __init__( resnet_dt=resnet_dt, numb_fparam=numb_fparam, numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, activation_function=activation_function, precision=precision, mixed_types=mixed_types, @@ -129,7 +133,7 @@ def output_def(self) -> FittingOutputDef: @classmethod def deserialize(cls, data: dict) -> "PropertyFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data.pop("dim_out") data.pop("var_name") obj = super().deserialize(data) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index af6e48191d..61683fd857 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -265,7 +265,7 @@ def get_lr(lr_params): self.opt_type, self.opt_param = get_opt_param(training_params) # Model - self.model = get_model_for_wrapper(model_params) + self.model = get_model_for_wrapper(model_params, resuming=resuming) # Loss if not self.multi_task: @@ -1267,7 +1267,7 @@ def get_single_model( return model -def get_model_for_wrapper(_model_params): +def get_model_for_wrapper(_model_params, resuming=False): if "model_dict" not in _model_params: _model = get_single_model( _model_params, @@ -1275,13 +1275,41 @@ def get_model_for_wrapper(_model_params): else: _model = {} model_keys = list(_model_params["model_dict"]) + do_case_embd, case_embd_index = get_case_embd_config(_model_params) for _model_key in model_keys: _model[_model_key] = get_single_model( _model_params["model_dict"][_model_key], ) + if do_case_embd and not resuming: + # only set case_embd when from scratch multitask training + _model[_model_key].set_case_embd(case_embd_index[_model_key]) return _model +def get_case_embd_config(_model_params): + assert ( + "model_dict" in _model_params + ), "Only support setting case embedding for multi-task model!" + model_keys = list(_model_params["model_dict"]) + sorted_model_keys = sorted(model_keys) + numb_case_embd_list = [ + _model_params["model_dict"][model_key] + .get("fitting_net", {}) + .get("dim_case_embd", 0) + for model_key in sorted_model_keys + ] + if not all(item == numb_case_embd_list[0] for item in numb_case_embd_list): + raise ValueError( + f"All models must have the same dimension of case embedding, while the settings are: {numb_case_embd_list}" + ) + if numb_case_embd_list[0] == 0: + return False, {} + case_embd_index = { + model_key: idx for idx, model_key in enumerate(sorted_model_keys) + } + return True, case_embd_index + + def model_change_out_bias( _model, _sample_func, diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py index 48119caf19..f0253c283e 100644 --- a/deepmd/pt/train/wrapper.py +++ b/deepmd/pt/train/wrapper.py @@ -112,8 +112,10 @@ def share_params(self, shared_links, resume=False) -> None: f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!" ) else: - if hasattr(self.model[model_key_base], class_type_base): - base_class = self.model[model_key_base].__getattr__(class_type_base) + if hasattr(self.model[model_key_base].atomic_model, class_type_base): + base_class = self.model[model_key_base].atomic_model.__getattr__( + class_type_base + ) for link_item in shared_links[shared_item]["links"][1:]: class_type_link = link_item["shared_type"] model_key_link = link_item["model_key"] @@ -124,9 +126,9 @@ def share_params(self, shared_links, resume=False) -> None: assert ( class_type_base == class_type_link ), f"Class type mismatched: {class_type_base} vs {class_type_link}!" - link_class = self.model[model_key_link].__getattr__( - class_type_link - ) + link_class = self.model[ + model_key_link + ].atomic_model.__getattr__(class_type_link) link_class.share_params( base_class, shared_level_link, resume=resume ) diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py index c05fa4b525..4428d06536 100644 --- a/deepmd/tf/fit/dipole.py +++ b/deepmd/tf/fit/dipole.py @@ -58,6 +58,8 @@ class DipoleFittingSeA(Fitting): Number of frame parameters numb_aparam Number of atomic parameters + dim_case_embd + Dimension of case specific embedding. sel_type : list[int] The atom types selected to have an atomic dipole prediction. If is None, all atoms are selected. seed : int @@ -84,6 +86,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, sel_type: Optional[list[int]] = None, seed: Optional[int] = None, activation_function: str = "tanh", @@ -119,10 +122,13 @@ def __init__( self.type_map = type_map self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd if numb_fparam > 0: raise ValueError("numb_fparam is not supported in the dipole fitting") if numb_aparam > 0: raise ValueError("numb_aparam is not supported in the dipole fitting") + if dim_case_embd > 0: + raise ValueError("dim_case_embd is not supported in TensorFlow.") self.fparam_avg = None self.fparam_std = None self.fparam_inv_std = None @@ -385,7 +391,7 @@ def serialize(self, suffix: str) -> dict: data = { "@class": "Fitting", "type": "dipole", - "@version": 2, + "@version": 3, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, "embedding_width": self.dim_rot_mat_1, @@ -395,6 +401,7 @@ def serialize(self, suffix: str) -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "activation_function": self.activation_function_name, "precision": self.fitting_precision.name, "exclude_types": [], @@ -428,7 +435,7 @@ def deserialize(cls, data: dict, suffix: str): The deserialized model """ data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) fitting = cls(**data) fitting.fitting_net_variables = cls.deserialize_network( data["nets"], diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py index 099cba0d12..1da0e55a92 100644 --- a/deepmd/tf/fit/dos.py +++ b/deepmd/tf/fit/dos.py @@ -74,6 +74,8 @@ class DOSFitting(Fitting): Number of frame parameter numb_aparam Number of atomic parameter + dim_case_embd + Dimension of case specific embedding. ! numb_dos (added) Number of gridpoints on which the DOS is evaluated (NEDOS in VASP) rcond @@ -111,6 +113,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, numb_dos: int = 300, rcond: Optional[float] = None, trainable: Optional[list[bool]] = None, @@ -132,6 +135,9 @@ def __init__( self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd + if dim_case_embd > 0: + raise ValueError("dim_case_embd is not supported in TensorFlow.") self.numb_dos = numb_dos @@ -672,7 +678,7 @@ def deserialize(cls, data: dict, suffix: str = ""): The deserialized model """ data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) data["numb_dos"] = data.pop("dim_out") fitting = cls(**data) fitting.fitting_net_variables = cls.deserialize_network( @@ -699,7 +705,7 @@ def serialize(self, suffix: str = "") -> dict: data = { "@class": "Fitting", "type": "dos", - "@version": 2, + "@version": 3, "var_name": "dos", "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -709,6 +715,7 @@ def serialize(self, suffix: str = "") -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "rcond": self.rcond, "trainable": self.trainable, "activation_function": self.activation_function, @@ -731,6 +738,7 @@ def serialize(self, suffix: str = "") -> dict: "fparam_inv_std": self.fparam_inv_std, "aparam_avg": self.aparam_avg, "aparam_inv_std": self.aparam_inv_std, + "case_embd": None, }, "type_map": self.type_map, } diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py index 7a3ee8eade..068d3d8e35 100644 --- a/deepmd/tf/fit/ener.py +++ b/deepmd/tf/fit/ener.py @@ -117,6 +117,8 @@ class EnerFitting(Fitting): Number of frame parameter numb_aparam Number of atomic parameter + dim_case_embd + Dimension of case specific embedding. rcond The condition number for the regression of atomic energy. tot_ener_zero @@ -156,6 +158,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, rcond: Optional[float] = None, tot_ener_zero: bool = False, trainable: Optional[list[bool]] = None, @@ -190,6 +193,9 @@ def __init__( # .add("trainable", [list, bool], default = True) self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd + if dim_case_embd > 0: + raise ValueError("dim_case_embd is not supported in TensorFlow.") self.n_neuron = neuron self.resnet_dt = resnet_dt self.rcond = rcond @@ -878,7 +884,7 @@ def deserialize(cls, data: dict, suffix: str = ""): The deserialized model """ data = data.copy() - check_version_compatibility(data.pop("@version", 1), 2, 1) + check_version_compatibility(data.pop("@version", 1), 3, 1) fitting = cls(**data) fitting.fitting_net_variables = cls.deserialize_network( data["nets"], @@ -904,7 +910,7 @@ def serialize(self, suffix: str = "") -> dict: data = { "@class": "Fitting", "type": "ener", - "@version": 2, + "@version": 3, "var_name": "energy", "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt + self.tebd_dim, @@ -914,6 +920,7 @@ def serialize(self, suffix: str = "") -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "rcond": self.rcond, "tot_ener_zero": self.tot_ener_zero, "trainable": self.trainable, @@ -945,6 +952,7 @@ def serialize(self, suffix: str = "") -> dict: "fparam_inv_std": self.fparam_inv_std, "aparam_avg": self.aparam_avg, "aparam_inv_std": self.aparam_inv_std, + "case_embd": None, }, "type_map": self.type_map, } diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py index 2f1400e697..14dd6ee092 100644 --- a/deepmd/tf/fit/polar.py +++ b/deepmd/tf/fit/polar.py @@ -63,6 +63,8 @@ class PolarFittingSeA(Fitting): Number of frame parameters numb_aparam Number of atomic parameters + dim_case_embd + Dimension of case specific embedding. sel_type : list[int] The atom types selected to have an atomic polarizability prediction. If is None, all atoms are selected. fit_diag : bool @@ -95,6 +97,7 @@ def __init__( resnet_dt: bool = True, numb_fparam: int = 0, numb_aparam: int = 0, + dim_case_embd: int = 0, sel_type: Optional[list[int]] = None, fit_diag: bool = True, scale: Optional[list[float]] = None, @@ -162,10 +165,13 @@ def __init__( self.type_map = type_map self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.dim_case_embd = dim_case_embd if numb_fparam > 0: raise ValueError("numb_fparam is not supported in the dipole fitting") if numb_aparam > 0: raise ValueError("numb_aparam is not supported in the dipole fitting") + if dim_case_embd > 0: + raise ValueError("dim_case_embd is not supported in TensorFlow.") self.fparam_avg = None self.fparam_std = None self.fparam_inv_std = None @@ -578,7 +584,7 @@ def serialize(self, suffix: str) -> dict: data = { "@class": "Fitting", "type": "polar", - "@version": 3, + "@version": 4, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, "embedding_width": self.dim_rot_mat_1, @@ -588,6 +594,7 @@ def serialize(self, suffix: str) -> dict: "resnet_dt": self.resnet_dt, "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, + "dim_case_embd": self.dim_case_embd, "activation_function": self.activation_function_name, "precision": self.fitting_precision.name, "exclude_types": [], @@ -625,7 +632,7 @@ def deserialize(cls, data: dict, suffix: str): """ data = data.copy() check_version_compatibility( - data.pop("@version", 1), 3, 1 + data.pop("@version", 1), 4, 1 ) # to allow PT version. fitting = cls(**data) fitting.fitting_net_variables = cls.deserialize_network( diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index d5419a38cd..5b57f15979 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1433,6 +1433,7 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant: def fitting_ener(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." @@ -1459,6 +1460,13 @@ def fitting_ener(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), Argument( "neuron", list[int], @@ -1509,6 +1517,7 @@ def fitting_ener(): def fitting_dos(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." @@ -1525,6 +1534,13 @@ def fitting_dos(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), Argument( "neuron", list[int], optional=True, default=[120, 120, 120], doc=doc_neuron ), @@ -1556,6 +1572,7 @@ def fitting_dos(): def fitting_property(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built" doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' @@ -1567,6 +1584,13 @@ def fitting_property(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), Argument( "neuron", list[int], @@ -1597,6 +1621,7 @@ def fitting_property(): def fitting_polar(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' @@ -1625,6 +1650,13 @@ def fitting_polar(): default=0, doc=doc_only_pt_supported + doc_numb_aparam, ), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), Argument( "neuron", list[int], @@ -1667,6 +1699,7 @@ def fitting_polar(): def fitting_dipole(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' @@ -1688,6 +1721,13 @@ def fitting_dipole(): default=0, doc=doc_only_pt_supported + doc_numb_aparam, ), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), Argument( "neuron", list[int], diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training.md index 9d5b71592e..51dffcc5f5 100644 --- a/doc/train/multi-task-training.md +++ b/doc/train/multi-task-training.md @@ -48,14 +48,27 @@ Specifically, there are several parts that need to be modified: - {ref}`model/model_dict `: The core definition of the model part and the explanation of sharing rules, starting with user-defined model name keys `model_key`, such as `my_model_1`. Each model part needs to align with the components of the single-task training {ref}`model `, but with the following sharing rules: -- - If you want to share the current model component with other tasks, which should be part of the {ref}`model/shared_dict `, + + - If you want to share the current model component with other tasks, which should be part of the {ref}`model/shared_dict `, you can directly fill in the corresponding `part_key`, such as `"descriptor": "my_descriptor", ` to replace the previous detailed parameters. Here, you can also specify the shared_level, such as `"descriptor": "my_descriptor:shared_level", ` - and use the user-defined integer `shared_level` in the code to share the corresponding module to varying degrees - (default is to share all parameters, i.e., `shared_level`=0). - The parts that are exclusive to each model can be written following the previous definition. + and use the user-defined integer `shared_level` in the code to share the corresponding module to varying degrees. + - For descriptors, `shared_level` can be set as follows: + - Valid `shared_level` values are 0-1, depending on the descriptor type + - Each level enables different sharing behaviors: + - Level 0: Shares all parameters (default) + - Level 1: Shares type embedding only + - Not all descriptors support all levels (e.g., se_a only supports level 0) + - For fitting nets, we only support the default `shared_level`=0, where all parameters will be shared except for `bias_atom_e` and `case_embd`. + - To conduct multitask training, there are two typical approaches: + 1. **Descriptor sharing only**: Share the descriptor with `shared_level`=0. See [here](../../examples/water_multi_task/pytorch_example/input_torch.json) for an example. + 2. **Descriptor and fitting network sharing with data identification**: + - Share the descriptor and the fitting network with `shared_level`=0. + - {ref}`dim_case_embd ` must be set to the number of model branches, which will distinguish different data tasks using a one-hot embedding. + - See [here](../../examples/water_multi_task/pytorch_example/input_torch_sharefit.json) for an example. + - The parts that are exclusive to each model can be written following the previous definition. - {ref}`loss_dict `: The loss settings corresponding to each task model, specified by the `model_key`. Each {ref}`loss_dict/model_key ` contains the corresponding loss settings, diff --git a/examples/water_multi_task/pytorch_example/input_torch_sharefit.json b/examples/water_multi_task/pytorch_example/input_torch_sharefit.json new file mode 100644 index 0000000000..2fc23007c6 --- /dev/null +++ b/examples/water_multi_task/pytorch_example/input_torch_sharefit.json @@ -0,0 +1,155 @@ +{ + "_comment": "that's all", + "model": { + "shared_dict": { + "type_map_all": [ + "O", + "H" + ], + "dpa2_descriptor": { + "type": "dpa2", + "repinit": { + "tebd_dim": 8, + "rcut": 6.0, + "rcut_smth": 0.5, + "nsel": 120, + "neuron": [ + 25, + 50, + 100 + ], + "axis_neuron": 12, + "activation_function": "tanh", + "three_body_sel": 48, + "three_body_rcut": 4.0, + "three_body_rcut_smth": 3.5, + "use_three_body": true + }, + "repformer": { + "rcut": 4.0, + "rcut_smth": 3.5, + "nsel": 48, + "nlayers": 6, + "g1_dim": 128, + "g2_dim": 32, + "attn2_hidden": 32, + "attn2_nhead": 4, + "attn1_hidden": 128, + "attn1_nhead": 4, + "axis_neuron": 4, + "update_h2": false, + "update_g1_has_conv": true, + "update_g1_has_grrg": true, + "update_g1_has_drrd": true, + "update_g1_has_attn": false, + "update_g2_has_g1g1": false, + "update_g2_has_attn": true, + "update_style": "res_residual", + "update_residual": 0.01, + "update_residual_init": "norm", + "attn2_has_gate": true, + "use_sqrt_nnei": true, + "g1_out_conv": true, + "g1_out_mlp": true + }, + "precision": "float64", + "add_tebd_to_repinit_out": false, + "_comment": " that's all" + }, + "shared_fit_with_id": { + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "dim_case_embd": 2, + "_comment": " that's all" + }, + "_comment": "that's all" + }, + "model_dict": { + "water_1": { + "type_map": "type_map_all", + "descriptor": "dpa2_descriptor", + "fitting_net": "shared_fit_with_id" + }, + "water_2": { + "type_map": "type_map_all", + "descriptor": "dpa2_descriptor", + "fitting_net": "shared_fit_with_id" + } + } + }, + "learning_rate": { + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-08, + "_comment": "that's all" + }, + "loss_dict": { + "water_1": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + }, + "water_2": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + } + }, + "training": { + "model_prob": { + "water_1": 0.5, + "water_2": 0.5 + }, + "data_dict": { + "water_1": { + "training_data": { + "systems": [ + "../../water/data/data_0/", + "../../water/data/data_1/", + "../../water/data/data_2/" + ], + "batch_size": 1, + "_comment": "that's all" + }, + "validation_data": { + "systems": [ + "../../water/data/data_3/" + ], + "batch_size": 1, + "_comment": "that's all" + } + }, + "water_2": { + "training_data": { + "systems": [ + "../../water/data/data_0/", + "../../water/data/data_1/", + "../../water/data/data_2/" + ], + "batch_size": 1, + "_comment": "that's all" + } + } + }, + "numb_steps": 100000, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 100, + "save_freq": 100, + "_comment": "that's all" + } +} diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py index 068a91709c..1ddbb50db9 100644 --- a/source/tests/common/test_examples.py +++ b/source/tests/common/test_examples.py @@ -64,6 +64,7 @@ input_files_multi = ( p_examples / "water_multi_task" / "pytorch_example" / "input_torch.json", + p_examples / "water_multi_task" / "pytorch_example" / "input_torch_sharefit.json", ) diff --git a/source/tests/pt/model/water/multitask.json b/source/tests/pt/model/water/multitask.json index 06a4f88e55..e8d998e6f1 100644 --- a/source/tests/pt/model/water/multitask.json +++ b/source/tests/pt/model/water/multitask.json @@ -10,7 +10,8 @@ "type": "se_e2_a", "sel": [ 46, - 92 + 92, + 4 ], "rcut_smth": 0.50, "rcut": 6.00, diff --git a/source/tests/pt/model/water/multitask_sharefit.json b/source/tests/pt/model/water/multitask_sharefit.json new file mode 100644 index 0000000000..246b5992f7 --- /dev/null +++ b/source/tests/pt/model/water/multitask_sharefit.json @@ -0,0 +1,134 @@ +{ + "model": { + "shared_dict": { + "my_type_map": [ + "O", + "H", + "B" + ], + "my_descriptor": { + "type": "se_e2_a", + "sel": [ + 46, + 92, + 4 + ], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "seed": 1, + "_comment": " that's all" + }, + "my_fitting": { + "dim_case_embd": 2, + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + "_comment": "that's all" + }, + "model_dict": { + "model_1": { + "type_map": "my_type_map", + "descriptor": "my_descriptor", + "fitting_net": "my_fitting", + "data_stat_nbatch": 1 + }, + "model_2": { + "type_map": "my_type_map", + "descriptor": "my_descriptor", + "fitting_net": "my_fitting", + "data_stat_nbatch": 1 + } + } + }, + "learning_rate": { + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.0002, + "decay_rate": 0.98, + "stop_lr": 3.51e-08, + "_comment": "that's all" + }, + "loss_dict": { + "model_1": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + }, + "model_2": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + } + }, + "training": { + "model_prob": { + "model_1": 0.5, + "model_2": 0.5 + }, + "data_dict": { + "model_1": { + "stat_file": "./stat_files/model_1.hdf5", + "training_data": { + "systems": [ + "pt/water/data/data_0" + ], + "batch_size": 1, + "_comment": "that's all" + }, + "validation_data": { + "systems": [ + "pt/water/data/data_0" + ], + "batch_size": 1, + "_comment": "that's all" + } + }, + "model_2": { + "stat_file": "./stat_files/model_2.hdf5", + "training_data": { + "systems": [ + "pt/water/data/data_0" + ], + "batch_size": 1, + "_comment": "that's all" + }, + "validation_data": { + "systems": [ + "pt/water/data/data_0" + ], + "batch_size": 1, + "_comment": "that's all" + } + } + }, + "numb_steps": 100000, + "warmup_steps": 0, + "gradient_max_norm": 5.0, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 100, + "save_freq": 100, + "_comment": "that's all" + } +} diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py index a59d6f8e54..62964abad3 100644 --- a/source/tests/pt/test_multitask.py +++ b/source/tests/pt/test_multitask.py @@ -42,12 +42,20 @@ def setUpModule() -> None: with open(multitask_template_json) as f: multitask_template = json.load(f) + global multitask_sharefit_template + multitask_sharefit_template_json = str( + Path(__file__).parent / "water/multitask_sharefit.json" + ) + with open(multitask_sharefit_template_json) as f: + multitask_sharefit_template = json.load(f) + class MultiTaskTrainTest: def test_multitask_train(self) -> None: # test multitask training self.config = update_deepmd_input(self.config, warning=True) self.config = normalize(self.config, multi_task=True) + self.share_fitting = getattr(self, "share_fitting", False) trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links) trainer.run() # check model keys @@ -62,7 +70,12 @@ def test_multitask_train(self) -> None: self.assertIn(state_key.replace("model_1", "model_2"), multi_state_dict) if "model_2" in state_key: self.assertIn(state_key.replace("model_2", "model_1"), multi_state_dict) - if "model_1.descriptor" in state_key: + if ("model_1.atomic_model.descriptor" in state_key) or ( + self.share_fitting + and "model_1.atomic_model.fitting_net" in state_key + and "fitting_net.bias_atom_e" not in state_key + and "fitting_net.case_embd" not in state_key + ): torch.testing.assert_close( multi_state_dict[state_key], multi_state_dict[state_key.replace("model_1", "model_2")], @@ -223,6 +236,46 @@ def tearDown(self) -> None: MultiTaskTrainTest.tearDown(self) +class TestMultiTaskSeASharefit(unittest.TestCase, MultiTaskTrainTest): + def setUp(self) -> None: + multitask_se_e2_a = deepcopy(multitask_sharefit_template) + multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[ + "descriptor" + ] + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.stat_files = "se_e2_a_share_fit" + os.makedirs(self.stat_files, exist_ok=True) + self.config = multitask_se_e2_a + self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = ( + data_file + ) + self.config["training"]["data_dict"]["model_1"]["validation_data"][ + "systems" + ] = data_file + self.config["training"]["data_dict"]["model_1"]["stat_file"] = ( + f"{self.stat_files}/model_1" + ) + self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = ( + data_file + ) + self.config["training"]["data_dict"]["model_2"]["validation_data"][ + "systems" + ] = data_file + self.config["training"]["data_dict"]["model_2"]["stat_file"] = ( + f"{self.stat_files}/model_2" + ) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + self.origin_config = deepcopy(self.config) + self.config["model"], self.shared_links = preprocess_shared_params( + self.config["model"] + ) + self.share_fitting = True + + def tearDown(self) -> None: + MultiTaskTrainTest.tearDown(self) + + class TestMultiTaskDPA1(unittest.TestCase, MultiTaskTrainTest): def setUp(self) -> None: multitask_DPA1 = deepcopy(multitask_template) diff --git a/source/tests/universal/dpmodel/fitting/test_fitting.py b/source/tests/universal/dpmodel/fitting/test_fitting.py index fe6ffd2e09..db199c02a3 100644 --- a/source/tests/universal/dpmodel/fitting/test_fitting.py +++ b/source/tests/universal/dpmodel/fitting/test_fitting.py @@ -39,7 +39,7 @@ def FittingParamEnergy( exclude_types=[], precision="float64", embedding_width=None, - numb_param=0, # test numb_fparam and numb_aparam together + numb_param=0, # test numb_fparam, numb_aparam and dim_case_embd together ): input_dict = { "ntypes": ntypes, @@ -51,6 +51,7 @@ def FittingParamEnergy( "precision": precision, "numb_fparam": numb_param, "numb_aparam": numb_param, + "dim_case_embd": numb_param, } return input_dict @@ -77,7 +78,7 @@ def FittingParamDos( exclude_types=[], precision="float64", embedding_width=None, - numb_param=0, # test numb_fparam and numb_aparam together + numb_param=0, # test numb_fparam, numb_aparam and dim_case_embd together ): input_dict = { "ntypes": ntypes, @@ -89,6 +90,7 @@ def FittingParamDos( "precision": precision, "numb_fparam": numb_param, "numb_aparam": numb_param, + "dim_case_embd": numb_param, } return input_dict @@ -115,7 +117,7 @@ def FittingParamDipole( exclude_types=[], precision="float64", embedding_width=None, - numb_param=0, # test numb_fparam and numb_aparam together + numb_param=0, # test numb_fparam, numb_aparam and dim_case_embd together ): assert ( embedding_width is not None @@ -131,6 +133,7 @@ def FittingParamDipole( "precision": precision, "numb_fparam": numb_param, "numb_aparam": numb_param, + "dim_case_embd": numb_param, } return input_dict @@ -157,7 +160,7 @@ def FittingParamPolar( exclude_types=[], precision="float64", embedding_width=None, - numb_param=0, # test numb_fparam and numb_aparam together + numb_param=0, # test numb_fparam, numb_aparam and dim_case_embd together ): assert embedding_width is not None, "embedding_width for polar fitting is required." input_dict = { @@ -171,6 +174,7 @@ def FittingParamPolar( "precision": precision, "numb_fparam": numb_param, "numb_aparam": numb_param, + "dim_case_embd": numb_param, } return input_dict @@ -197,7 +201,7 @@ def FittingParamProperty( exclude_types=[], precision="float64", embedding_width=None, - numb_param=0, # test numb_fparam and numb_aparam together + numb_param=0, # test numb_fparam, numb_aparam and dim_case_embd together ): input_dict = { "ntypes": ntypes, @@ -209,6 +213,7 @@ def FittingParamProperty( "precision": precision, "numb_fparam": numb_param, "numb_aparam": numb_param, + "dim_case_embd": numb_param, } return input_dict