From 67b838d31bb3568576de143520f98df1a1258a78 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sun, 17 Mar 2024 23:45:38 +0800
Subject: [PATCH 01/14] pt:support multitask finetune

---
 deepmd/pt/entrypoints/main.py                 |  15 +-
 .../model/atomic_model/linear_atomic_model.py |   4 -
 .../atomic_model/pairtab_atomic_model.py      |   4 -
 deepmd/pt/model/model/dipole_model.py         |   5 +
 deepmd/pt/model/model/dos_model.py            |   5 +
 deepmd/pt/model/model/ener_model.py           |  71 +++++
 deepmd/pt/model/model/polar_model.py          |   5 +
 deepmd/pt/model/model/spin_model.py           |   5 +
 deepmd/pt/model/task/fitting.py               |  78 ------
 deepmd/pt/train/training.py                   | 170 ++++++++----
 deepmd/pt/utils/finetune.py                   | 244 ++++++++++++------
 deepmd/pt/utils/stat.py                       |  79 +++++-
 source/tests/pt/test_finetune.py              | 151 +++++------
 13 files changed, 512 insertions(+), 324 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 46d284a395..5ee4d28166 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -90,13 +90,13 @@ def get_trainer(
         dist.init_process_group(backend="nccl")
 
     ckpt = init_model if init_model is not None else restart_model
-    config["model"] = change_finetune_model_params(
-        ckpt,
-        finetune_model,
-        config["model"],
-        multi_task=multi_task,
-        model_branch=model_branch,
-    )
+    finetune_links = None
+    if finetune_model is not None:
+        config["model"], finetune_links = change_finetune_model_params(
+            finetune_model,
+            config["model"],
+            model_branch=model_branch,
+        )
     config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None)
 
     def prepare_trainer_input_single(
@@ -194,6 +194,7 @@ def prepare_trainer_input_single(
         finetune_model=finetune_model,
         force_load=force_load,
         shared_links=shared_links,
+        finetune_links=finetune_links,
         init_frz_model=init_frz_model,
     )
     return trainer
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index f7216f46ef..482972b5d5 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -390,10 +390,6 @@ def compute_or_load_stat(
         self.models[0].compute_or_load_stat(sampled_func, stat_file_path)
         self.models[1].compute_or_load_stat(sampled_func, stat_file_path)
 
-    def change_energy_bias(self):
-        # need to implement
-        pass
-
     def serialize(self) -> dict:
         dd = BaseAtomicModel.serialize(self)
         dd.update(
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index a4aa43ede1..cd139cdead 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -234,10 +234,6 @@ def compute_or_load_stat(
             torch.tensor(bias_atom_e, device=env.DEVICE).view([self.ntypes, 1])
         )
 
-    def change_energy_bias(self) -> None:
-        # need to implement
-        pass
-
     def forward_atomic(
         self,
         extended_coord: torch.Tensor,
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 45b120771b..231ddf89a4 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -92,3 +92,8 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        raise NotImplementedError
diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py
index 680eac41f5..147354d32e 100644
--- a/deepmd/pt/model/model/dos_model.py
+++ b/deepmd/pt/model/model/dos_model.py
@@ -78,3 +78,8 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        raise NotImplementedError
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 5217293623..4338297eac 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -1,15 +1,31 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+import tempfile
 from typing import (
     Dict,
     Optional,
 )
 
+import numpy as np
 import torch
 
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
 from .dp_model import (
     DPModel,
 )
 
+log = logging.getLogger(__name__)
+
 
 class EnergyModel(DPModel):
     model_type = "ener"
@@ -97,3 +113,58 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        """Change the energy bias according to the input data and the pretrained model.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        origin_type_map : List[str]
+            The original type_map in dataset, they are targets to change the energy bias.
+        full_type_map : List[str]
+            The full type_map in pre-trained model
+        bias_shift : str
+            The mode for changing energy bias : ['delta', 'statistic']
+            'delta' : perform predictions on energies of target dataset,
+                    and do least sqaure on the errors to obtain the target shift as bias.
+            'statistic' : directly use the statistic energy bias in the target dataset.
+        """
+        sorter = np.argsort(full_type_map)
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        original_bias = self.get_fitting_net()["bias_atom_e"]
+        if bias_shift == "delta":
+            tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
+            model = torch.jit.script(self)
+            torch.jit.save(model, tmp_model.name)
+            dp = DeepEval(tmp_model.name)
+            os.unlink(tmp_model.name)
+            delta_bias_e = compute_output_stats(
+                merged,
+                self.atomic_model.get_ntypes(),
+                model=dp,
+            )
+            bias_atom_e = delta_bias_e + original_bias
+        elif bias_shift == "statistic":
+            bias_atom_e = compute_output_stats(
+                merged,
+                self.atomic_model.get_ntypes(),
+            )
+        else:
+            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+        log.info(
+            f"Change energy bias of {origin_type_map!s} "
+            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
+            f"to {to_numpy_array(bias_atom_e[idx_type_map]).reshape(-1)!s}."
+        )
+        self.get_fitting_net()["bias_atom_e"] = bias_atom_e
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index 403058aa47..ec669583b0 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -76,3 +76,8 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        raise NotImplementedError
diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py
index df2f48e2e4..75ecee7601 100644
--- a/deepmd/pt/model/model/spin_model.py
+++ b/deepmd/pt/model/model/spin_model.py
@@ -558,3 +558,8 @@ def forward_lower(
             ].squeeze(-2)
         # not support virial by far
         return model_predict
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        raise NotImplementedError
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 4637178318..128c6e378c 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import copy
 import logging
-import os
-import tempfile
 from abc import (
     abstractmethod,
 )
@@ -15,9 +13,6 @@
 import numpy as np
 import torch
 
-from deepmd.infer.deep_eval import (
-    DeepEval,
-)
 from deepmd.pt.model.network.mlp import (
     FittingNet,
     NetworkCollection,
@@ -33,7 +28,6 @@
 )
 from deepmd.pt.utils.env import (
     DEFAULT_PRECISION,
-    DEVICE,
     PRECISION_DICT,
 )
 from deepmd.pt.utils.exclude_mask import (
@@ -43,12 +37,6 @@
     to_numpy_array,
     to_torch_tensor,
 )
-from deepmd.utils.data_system import (
-    DeepmdDataSystem,
-)
-from deepmd.utils.finetune import (
-    change_energy_bias_lower,
-)
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 device = env.DEVICE
@@ -88,72 +76,6 @@ def share_params(self, base_class, shared_level, resume=False):
         else:
             raise NotImplementedError
 
-    def change_energy_bias(
-        self,
-        config,
-        model,
-        old_type_map: List[str],
-        new_type_map: List[str],
-        bias_shift="delta",
-        ntest=10,
-    ):
-        """Change the energy bias according to the input data and the pretrained model.
-
-        Parameters
-        ----------
-        config : Dict
-            The configuration.
-        model : EnergyModel
-            Energy model loaded pre-trained model.
-        new_type_map : List[str]
-            The original type_map in dataset, they are targets to change the energy bias.
-        old_type_map : List[str]
-            The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
-        ntest : int
-            The number of test samples in a system to change the energy bias.
-        """
-        log.info(
-            f"Changing energy bias in pretrained model for types {new_type_map!s}... "
-            "(this step may take long time)"
-        )
-        # data
-        systems = config["training"]["training_data"]["systems"]
-        finetune_data = DeepmdDataSystem(
-            systems=systems,
-            batch_size=config["training"]["training_data"].get("batch_size", "auto"),
-            test_size=1,
-        )
-        finetune_data.add("energy", ndof=1, atomic=False, must=True, high_prec=True)
-        model = torch.jit.script(model)
-        if model.get_dim_fparam() > 0:
-            finetune_data.add("fparam", model.get_dim_fparam(), atomic=False, must=True)
-        if model.get_dim_aparam() > 0:
-            finetune_data.add("aparam", model.get_dim_aparam(), atomic=True, must=True)
-        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
-        torch.jit.save(model, tmp_model.name)
-        dp = DeepEval(tmp_model.name)
-        os.unlink(tmp_model.name)
-        bias = change_energy_bias_lower(
-            finetune_data,
-            dp,
-            new_type_map,
-            old_type_map,
-            self.bias_atom_e.detach().cpu().numpy().reshape(-1),
-            bias_shift=bias_shift,
-            ntest=ntest,
-        )
-        self.bias_atom_e = (
-            torch.from_numpy(bias)
-            .type_as(self.bias_atom_e)
-            .reshape(self.bias_atom_e.shape)
-            .to(DEVICE)
-        )
-
 
 class GeneralFitting(Fitting):
     """Construct a general fitting net.
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index fc293f70ec..032aa993b0 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -30,7 +30,7 @@
     TensorLoss,
 )
 from deepmd.pt.model.model import (
-    DPZBLModel,
+    EnergyModel,
     get_model,
     get_zbl_model,
 )
@@ -96,6 +96,7 @@ def __init__(
         finetune_model=None,
         force_load=False,
         shared_links=None,
+        finetune_links=None,
         init_frz_model=None,
     ):
         """Construct a DeePMD trainer.
@@ -116,8 +117,9 @@ def __init__(
         model_params = config["model"]
         training_params = config["training"]
         self.multi_task = "model_dict" in model_params
-        self.finetune_multi_task = model_params.pop(
-            "finetune_multi_task", False
+        self.finetune_links = finetune_links
+        self.finetune_from_multi_task = model_params.pop(
+            "finetune_from_multi_task", False
         )  # should use pop for next finetune
         self.model_keys = (
             list(model_params["model_dict"]) if self.multi_task else ["Default"]
@@ -234,23 +236,24 @@ def single_model_stat(
             _training_data.add_data_requirement(_data_requirement)
             if _validation_data is not None:
                 _validation_data.add_data_requirement(_data_requirement)
-            if not resuming and self.rank == 0:
 
-                @functools.lru_cache
-                def get_sample():
-                    sampled = make_stat_input(
-                        _training_data.systems,
-                        _training_data.dataloaders,
-                        _data_stat_nbatch,
-                    )
-                    return sampled
+            @functools.lru_cache
+            def get_sample():
+                sampled = make_stat_input(
+                    _training_data.systems,
+                    _training_data.dataloaders,
+                    _data_stat_nbatch,
+                )
+                return sampled
 
+            if not resuming and self.rank == 0:
                 _model.compute_or_load_stat(
                     sampled_func=get_sample,
                     stat_file_path=_stat_file_path,
                 )
                 if isinstance(_stat_file_path, DPH5Path):
                     _stat_file_path.root.close()
+            return get_sample
 
         def get_single_model(
             _model_params,
@@ -355,7 +358,7 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
         # Data
         dp_random.seed(training_params["seed"])
         if not self.multi_task:
-            single_model_stat(
+            self.get_sample_func = single_model_stat(
                 self.model,
                 model_params.get("data_stat_nbatch", 10),
                 training_data,
@@ -385,9 +388,10 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
                 self.validation_dataloader,
                 self.validation_data,
                 self.valid_numb_batch,
-            ) = {}, {}, {}, {}, {}
+                self.get_sample_func,
+            ) = {}, {}, {}, {}, {}, {}
             for model_key in self.model_keys:
-                single_model_stat(
+                self.get_sample_func[model_key] = single_model_stat(
                     self.model[model_key],
                     model_params["model_dict"][model_key].get("data_stat_nbatch", 10),
                     training_data[model_key],
@@ -486,60 +490,114 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
                         log.warning(
                             f"Force load mode allowed! These keys are not in ckpt and will re-init: {slim_keys}"
                         )
-                elif self.finetune_multi_task:
+
+                if finetune_model is not None:
                     new_state_dict = {}
-                    model_branch_chosen = model_params.pop("model_branch_chosen")
-                    new_fitting = model_params.pop("new_fitting", False)
                     target_state_dict = self.wrapper.state_dict()
-                    target_keys = [
-                        i for i in target_state_dict.keys() if i != "_extra_state"
-                    ]
-                    for item_key in target_keys:
-                        if new_fitting and ".fitting_net." in item_key:
-                            # print(f'Keep {item_key} in old model!')
-                            new_state_dict[item_key] = (
-                                target_state_dict[item_key].clone().detach()
-                            )
-                        else:
-                            new_key = item_key.replace(
-                                ".Default.", f".{model_branch_chosen}."
-                            )
-                            # print(f'Replace {item_key} with {new_key} in pretrained_model!')
-                            new_state_dict[item_key] = (
-                                state_dict[new_key].clone().detach()
+
+                    def update_single_finetune_params(
+                        _model_key,
+                        _model_key_from,
+                        _new_state_dict,
+                        _origin_state_dict,
+                        _random_state_dict,
+                        _new_fitting=False,
+                    ):
+                        target_keys = [
+                            i
+                            for i in _random_state_dict.keys()
+                            if i != "_extra_state" and f".{_model_key}." in i
+                        ]
+                        for item_key in target_keys:
+                            if _new_fitting and ".fitting_net." in item_key:
+                                # print(f'Keep {item_key} in old model!')
+                                _new_state_dict[item_key] = (
+                                    _random_state_dict[item_key].clone().detach()
+                                )
+                            else:
+                                new_key = item_key.replace(
+                                    f".{_model_key}.", f".{_model_key_from}."
+                                )
+                                # print(f'Replace {item_key} with {new_key} in pretrained_model!')
+                                _new_state_dict[item_key] = (
+                                    _origin_state_dict[new_key].clone().detach()
+                                )
+
+                    if not self.multi_task:
+                        model_key = "Default"
+                        model_key_from = self.finetune_links[model_key]
+                        new_fitting = model_params.pop("new_fitting", False)
+                        update_single_finetune_params(
+                            model_key,
+                            model_key_from,
+                            new_state_dict,
+                            state_dict,
+                            target_state_dict,
+                            _new_fitting=new_fitting,
+                        )
+                    else:
+                        for model_key in self.model_keys:
+                            if model_key in self.finetune_links:
+                                model_key_from = self.finetune_links[model_key]
+                                new_fitting = model_params["model_dict"][model_key].pop(
+                                    "new_fitting", False
+                                )
+                            else:
+                                model_key_from = model_key
+                                new_fitting = False
+                            update_single_finetune_params(
+                                model_key,
+                                model_key_from,
+                                new_state_dict,
+                                state_dict,
+                                target_state_dict,
+                                _new_fitting=new_fitting,
                             )
                     state_dict = new_state_dict
-                if finetune_model is not None:
                     state_dict["_extra_state"] = self.wrapper.state_dict()[
                         "_extra_state"
                     ]
-
                 self.wrapper.load_state_dict(state_dict)
-                # finetune
-                if finetune_model is not None and model_params["fitting_net"].get(
-                    "type", "ener"
-                ) in ["ener", "direct_force_ener", "atten_vec_lcc"]:
+
+                def single_model_finetune(
+                    _model,
+                    _model_params,
+                    _sample_func,
+                ):
                     old_type_map, new_type_map = (
-                        model_params["type_map"],
-                        model_params["new_type_map"],
+                        _model_params["type_map"],
+                        _model_params["new_type_map"],
                     )
-                    # TODO: need an interface instead of fetching fitting_net!!!!!!!!!
-                    if hasattr(self.model, "atomic_model") and hasattr(
-                        self.model.atomic_model, "fitting_net"
-                    ):
-                        self.model.atomic_model.fitting_net.change_energy_bias(
-                            config,
-                            self.model,
-                            old_type_map,
-                            new_type_map,
-                            ntest=ntest,
-                            bias_shift=model_params.get("bias_shift", "delta"),
+                    if isinstance(_model, EnergyModel):
+                        _model.change_out_bias(
+                            _sample_func,
+                            bias_shift=_model_params.get("bias_shift", "delta"),
+                            origin_type_map=new_type_map,
+                            full_type_map=old_type_map,
                         )
-                    elif isinstance(self.model, DPZBLModel):
-                        # need to updated
-                        self.model.atomic_model.change_energy_bias()
                     else:
+                        # need to updated
                         raise NotImplementedError
+
+                # finetune
+                if not self.multi_task:
+                    single_model_finetune(
+                        self.model, model_params, self.get_sample_func
+                    )
+                else:
+                    for model_key in self.model_keys:
+                        if model_key in self.finetune_links:
+                            log.info(
+                                f"Model branch {model_key} will be fine-tuned. This may take a long time..."
+                            )
+                            single_model_finetune(
+                                self.model[model_key],
+                                model_params["model_dict"][model_key],
+                                self.get_sample_func[model_key],
+                            )
+                        else:
+                            log.info(f"Model branch {model_key} will resume training.")
+
         if init_frz_model is not None:
             frz_model = torch.jit.load(init_frz_model, map_location=DEVICE)
             self.model.load_state_dict(frz_model.state_dict())
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index d555478af4..05720720ea 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
+from copy import (
+    deepcopy,
+)
 
 import torch
 
@@ -10,88 +13,173 @@
 log = logging.getLogger(__name__)
 
 
-def change_finetune_model_params(
-    ckpt, finetune_model, model_config, multi_task=False, model_branch=""
+def change_finetune_model_params_single(
+    _single_param_target,
+    _model_param_pretrained,
+    from_multitask=False,
+    model_branch="Default",
+    model_branch_from="",
 ):
-    """Load model_params according to the pretrained one.
+    single_config = deepcopy(_single_param_target)
+    trainable_param = {
+        "descriptor": True,
+        "fitting_net": True,
+    }
+    for net_type in trainable_param:
+        if net_type in single_config:
+            trainable_param[net_type] = single_config[net_type].get("trainable", True)
+    if not from_multitask:
+        old_type_map, new_type_map = (
+            _model_param_pretrained["type_map"],
+            single_config["type_map"],
+        )
+        assert set(new_type_map).issubset(
+            old_type_map
+        ), "Only support for smaller type map when finetuning or resuming."
+        single_config = deepcopy(_model_param_pretrained)
+        log.info(
+            f"Change the '{model_branch}' model configurations according to the pretrained one..."
+        )
+        single_config["new_type_map"] = new_type_map
+    else:
+        single_config["finetune_from_multi_task"] = from_multitask
+        model_dict_params = _model_param_pretrained["model_dict"]
+        new_fitting = False
+        if model_branch_from == "":
+            model_branch_chosen = next(iter(model_dict_params.keys()))
+            new_fitting = True
+            single_config["bias_shift"] = "statistic"  # fitting net re-init
+            log.warning(
+                "The fitting net will be re-init instead of using that in the pretrained model! "
+                "The bias_shift will be statistic!"
+            )
+        else:
+            model_branch_chosen = model_branch_from
+        assert model_branch_chosen in model_dict_params, (
+            f"No model branch named '{model_branch_chosen}'! "
+            f"Available ones are {list(model_dict_params.keys())}."
+        )
+        single_config_chosen = deepcopy(model_dict_params[model_branch_chosen])
+        old_type_map, new_type_map = (
+            single_config_chosen["type_map"],
+            single_config["type_map"],
+        )
+        assert set(new_type_map).issubset(
+            old_type_map
+        ), "Only support for smaller type map when finetuning or resuming."
+        for key_item in ["type_map", "descriptor"]:
+            if key_item in single_config_chosen:
+                single_config[key_item] = single_config_chosen[key_item]
+        if not new_fitting:
+            single_config["fitting_net"] = single_config_chosen["fitting_net"]
+        log.info(
+            f"Change the '{model_branch}' model configurations according to the model branch "
+            f"'{model_branch_chosen}' in the pretrained one..."
+        )
+        single_config["new_type_map"] = new_type_map
+        single_config["model_branch_chosen"] = model_branch_chosen
+        single_config["new_fitting"] = new_fitting
+    for net_type in trainable_param:
+        if net_type in single_config:
+            single_config[net_type]["trainable"] = trainable_param[net_type]
+        else:
+            single_config[net_type] = {"trainable": trainable_param[net_type]}
+    return single_config
 
-    Args:
-    - ckpt & finetune_model: origin model.
-    - config: Read from json file.
+
+def change_finetune_model_params(finetune_model, model_config, model_branch=""):
     """
-    # TODO need support for multitask mode
-    if finetune_model is not None:
-        state_dict = torch.load(finetune_model, map_location=env.DEVICE)
-        if "model" in state_dict:
-            state_dict = state_dict["model"]
-        last_model_params = state_dict["_extra_state"]["model_params"]
-        finetune_multi_task = "model_dict" in last_model_params
-        trainable_param = {
-            "descriptor": True,
-            "fitting_net": True,
-        }
-        for net_type in trainable_param:
-            if net_type in model_config:
-                trainable_param[net_type] = model_config[net_type].get(
-                    "trainable", True
-                )
-        if not finetune_multi_task:
-            old_type_map, new_type_map = (
-                last_model_params["type_map"],
-                model_config["type_map"],
-            )
-            assert set(new_type_map).issubset(
-                old_type_map
-            ), "Only support for smaller type map when finetuning or resuming."
-            model_config = last_model_params
-            log.info(
-                "Change the model configurations according to the pretrained one..."
-            )
-            model_config["new_type_map"] = new_type_map
+    Load model_params according to the pretrained one.
+
+    Parameters
+    ----------
+    finetune_model:
+        The pretrained model.
+    model_config
+        The fine-tuning input parameters.
+    model_branch
+        The model branch chosen in command-line mode, only for single-task fine-tuning.
+    This function modifies the fine-tuning input in different modes as follows:
+    1. Single-task fine-tuning from a single-task pretrained model:
+        - Updates the model parameters based on the pretrained model.
+    2. Single-task fine-tuning from a multi-task pretrained model:
+        - Updates the model parameters based on the selected branch in the pretrained model.
+        - The chosen branch can be defined from the command-line or `finetune_head` input parameter.
+        - If not defined, model parameters in the fitting network will be randomly initialized.
+    3. Multi-task fine-tuning from a single-task pretrained model:
+        - Updates model parameters in each branch based on the single branch ('Default') in the pretrained model.
+        - If `finetune_head` is not set to 'Default',
+          model parameters in the fitting network of the branch will be randomly initialized.
+    4. Multi-task fine-tuning from a multi-task pretrained model:
+        - Updates model parameters in each branch based on the selected branch in the pretrained model.
+        - The chosen branches can be defined from the `finetune_head` input parameter of each model.
+        - If `finetune_head` is not defined and the model_key is the same as in the pretrained model,
+          it will resume from the model_key branch without fine-tuning.
+        - If `finetune_head` is not defined and a new model_key is used,
+          model parameters in the fitting network of the branch will be randomly initialized.
+
+    Returns
+    -------
+    model_config:
+        Updated model parameters.
+    finetune_links:
+        Fine-tuning rules in a dict format, with `model_branch`: `model_branch_from` pairs.
+        If `model_key` is not in this dict, it will do just resuming instead of fine-tuning.
+    """
+    multi_task = "model_dict" in model_config
+    state_dict = torch.load(finetune_model, map_location=env.DEVICE)
+    if "model" in state_dict:
+        state_dict = state_dict["model"]
+    last_model_params = state_dict["_extra_state"]["model_params"]
+    finetune_from_multi_task = "model_dict" in last_model_params
+    finetune_links = {}
+    if not multi_task:
+        # use command-line first
+        if model_branch == "" and "finetune_head" in model_config:
+            model_branch = model_config["finetune_head"]
+        model_config = change_finetune_model_params_single(
+            model_config,
+            last_model_params,
+            from_multitask=finetune_from_multi_task,
+            model_branch="Default",
+            model_branch_from=model_branch,
+        )
+        finetune_links["Default"] = (
+            model_branch if finetune_from_multi_task else "Default"
+        )
+    else:
+        assert model_branch == "", (
+            "Multi-task fine-tuning does not support command-line branches chosen!"
+            "Please define the 'finetune_head' in each model params!"
+        )
+        target_keys = model_config["model_dict"].keys()
+        if not finetune_from_multi_task:
+            pretrained_keys = ["Default"]
         else:
-            model_config["finetune_multi_task"] = finetune_multi_task
-            model_dict_params = last_model_params["model_dict"]
-            new_fitting = False
-            if model_branch == "":
-                model_branch_chosen = next(iter(model_dict_params.keys()))
-                new_fitting = True
-                model_config["bias_shift"] = "statistic"  # fitting net re-init
-                log.warning(
-                    "The fitting net will be re-init instead of using that in the pretrained model! "
-                    "The bias_shift will be statistic!"
+            pretrained_keys = last_model_params["model_dict"].keys()
+        for model_key in target_keys:
+            if "finetune_head" in model_config["model_dict"][model_key]:
+                pretrained_key = model_config["model_dict"][model_key]["finetune_head"]
+                assert pretrained_key in pretrained_keys, (
+                    f"'{pretrained_key}' head chosen to finetune not exist in the pretrained model!"
+                    f"Available heads are: {list(pretrained_keys)}"
                 )
+                model_branch_from = pretrained_key
+                finetune_links[model_key] = model_branch_from
+            elif model_key in pretrained_keys:
+                # not do anything if not defined "finetune_head" in heads that exist in the pretrained model
+                # this will just do resuming
+                model_branch_from = model_key
+                pass
             else:
-                model_branch_chosen = model_branch
-            assert model_branch_chosen in model_dict_params, (
-                f"No model branch named '{model_branch_chosen}'! "
-                f"Available ones are {list(model_dict_params.keys())}."
+                # if not defined "finetune_head" in new heads, the fitting net will bre randomly initialized
+                model_branch_from = ""
+                finetune_links[model_key] = next(iter(pretrained_keys))
+            model_config["model_dict"][model_key] = change_finetune_model_params_single(
+                model_config["model_dict"][model_key],
+                last_model_params,
+                from_multitask=finetune_from_multi_task,
+                model_branch=model_key,
+                model_branch_from=model_branch_from,
             )
-            old_type_map, new_type_map = (
-                model_dict_params[model_branch_chosen]["type_map"],
-                model_config["type_map"],
-            )
-            assert set(new_type_map).issubset(
-                old_type_map
-            ), "Only support for smaller type map when finetuning or resuming."
-            for key_item in ["type_map", "descriptor"]:
-                if key_item in model_dict_params[model_branch_chosen]:
-                    model_config[key_item] = model_dict_params[model_branch_chosen][
-                        key_item
-                    ]
-            if not new_fitting:
-                model_config["fitting_net"] = model_dict_params[model_branch_chosen][
-                    "fitting_net"
-                ]
-            log.info(
-                f"Change the model configurations according to the model branch "
-                f"{model_branch_chosen} in the pretrained one..."
-            )
-            model_config["new_type_map"] = new_type_map
-            model_config["model_branch_chosen"] = model_branch_chosen
-            model_config["new_fitting"] = new_fitting
-        for net_type in trainable_param:
-            if net_type in model_config:
-                model_config[net_type]["trainable"] = trainable_param[net_type]
-            else:
-                model_config[net_type] = {"trainable": trainable_param[net_type]}
-    return model_config
+    return model_config, finetune_links
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 5e631d9412..de75740c22 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -10,13 +10,16 @@
 import numpy as np
 import torch
 
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
 from deepmd.pt.utils import (
     AtomExcludeMask,
-    env,
 )
 from deepmd.pt.utils.utils import (
     dict_to_device,
     to_numpy_array,
+    to_torch_tensor,
 )
 from deepmd.utils.out_stat import (
     compute_stats_from_redu,
@@ -76,6 +79,7 @@ def compute_output_stats(
     stat_file_path: Optional[DPPath] = None,
     rcond: Optional[float] = None,
     atom_ener: Optional[List[float]] = None,
+    model: Optional[DeepEval] = None,
 ):
     """
     Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
@@ -97,7 +101,10 @@ def compute_output_stats(
         The condition number for the regression of atomic energy.
     atom_ener : List[float], optional
         Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
-
+    model : DeepEval, optional
+        If not None, the model will be utilized to generate the original energy prediction,
+        which will be subtracted from the energy label of the data.
+        The difference will then be used to calculate the delta complement energy bias for each type.
     """
     if stat_file_path is not None:
         stat_file_path = stat_file_path / "bias_atom_e"
@@ -129,13 +136,67 @@ def compute_output_stats(
             )
         else:
             assigned_atom_ener = None
-        bias_atom_e, _ = compute_stats_from_redu(
-            merged_energy,
-            merged_natoms,
-            assigned_bias=assigned_atom_ener,
-            rcond=rcond,
-        )
+        if model is None:
+            # only use statistics result
+            bias_atom_e, _ = compute_stats_from_redu(
+                merged_energy,
+                merged_natoms,
+                assigned_bias=assigned_atom_ener,
+                rcond=rcond,
+            )
+        else:
+            # subtract the model bias and output the delta bias
+            energy_predict = []
+            for system in sampled:
+                nframes = system["coord"].shape[0]
+                coord = to_numpy_array(system["coord"]).reshape(nframes, -1)
+                box = (
+                    to_numpy_array(system["box"]).reshape(nframes, -1)
+                    if system["box"] is not None
+                    else None
+                )
+                if data_mixed_type:
+                    atype = to_numpy_array(system["atype"]).reshape(nframes, -1)
+                else:
+                    atype = to_numpy_array(system["atype"]).reshape(nframes, -1)[0]
+                fparam = (
+                    to_numpy_array(system["fparam"])
+                    if "fparam" in system is not None
+                    else None
+                )
+                aparam = (
+                    to_numpy_array(system["aparam"])
+                    if "aparam" in system is not None
+                    else None
+                )
+                ret = model.eval(
+                    coord,
+                    box,
+                    atype,
+                    mixed_type=data_mixed_type,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                energy_predict.append(ret[0].reshape([nframes, 1]))
+            energy_predict = np.concatenate(energy_predict)
+            bias_diff = merged_energy - energy_predict
+            bias_atom_e, _ = compute_stats_from_redu(
+                bias_diff,
+                merged_natoms,
+                assigned_bias=assigned_atom_ener,
+                rcond=rcond,
+            )
+            unbias_e = energy_predict + merged_natoms @ bias_atom_e
+            atom_numbs = merged_natoms.sum(-1)
+            rmse_ae = np.sqrt(
+                np.mean(
+                    np.square((unbias_e.ravel() - merged_energy.ravel()) / atom_numbs)
+                )
+            )
+            log.info(
+                f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
+            )
         if stat_file_path is not None:
             stat_file_path.save_numpy(bias_atom_e)
     assert all(x is not None for x in [bias_atom_e])
-    return torch.tensor(bias_atom_e, device=env.DEVICE)
+    return to_torch_tensor(bias_atom_e)
diff --git a/source/tests/pt/test_finetune.py b/source/tests/pt/test_finetune.py
index dd72eb4718..e77063c2ac 100644
--- a/source/tests/pt/test_finetune.py
+++ b/source/tests/pt/test_finetune.py
@@ -1,6 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import os
-import shutil
 import tempfile
 import unittest
 from copy import (
@@ -17,16 +15,16 @@
     DeepEval,
 )
 from deepmd.pt.model.model import (
-    DPZBLModel,
-    EnergyModel,
     get_model,
-    get_zbl_model,
 )
-from deepmd.utils.data_system import (
-    DeepmdDataSystem,
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
 )
-from deepmd.utils.finetune import (
-    change_energy_bias_lower,
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
 )
 
 from .model.test_permutation import (
@@ -34,63 +32,38 @@
     model_se_e2_a,
     model_zbl,
 )
+from .test_stat import (
+    energy_data_requirement,
+)
 
 
 class FinetuneTest:
-    def test_finetune_change_energy_bias(self):
+    def test_finetune_change_out_bias(self):
         # get model
-        if "use_srtab" in self.model_config:
-            model = get_zbl_model(self.model_config)
-        else:
-            model = get_model(self.model_config)
-        if isinstance(model, EnergyModel):
-            model.get_fitting_net().bias_atom_e = torch.rand_like(
-                model.get_fitting_net().bias_atom_e
-            )
-            energy_bias_before = deepcopy(
-                model.get_fitting_net().bias_atom_e.detach().cpu().numpy().reshape(-1)
-            )
-            bias_atom_e_input = deepcopy(
-                model.get_fitting_net().bias_atom_e.detach().cpu().numpy().reshape(-1)
-            )
-        elif isinstance(model, DPZBLModel):
-            model.dp_model.get_fitting_net().bias_atom_e = torch.rand_like(
-                model.dp_model.get_fitting_net().bias_atom_e
-            )
-            energy_bias_before = deepcopy(
-                model.dp_model.get_fitting_net()
-                .bias_atom_e.detach()
-                .cpu()
-                .numpy()
-                .reshape(-1)
-            )
-            bias_atom_e_input = deepcopy(
-                model.dp_model.get_fitting_net()
-                .bias_atom_e.detach()
-                .cpu()
-                .numpy()
-                .reshape(-1)
-            )
-        else:
-            bias_atom_e_input = None
-
-        model = torch.jit.script(model)
+        model = get_model(self.model_config)
+        fitting_net = model.get_fitting_net()
+        fitting_net["bias_atom_e"] = torch.rand_like(fitting_net["bias_atom_e"])
+        energy_bias_before = deepcopy(
+            to_numpy_array(fitting_net["bias_atom_e"]).reshape(-1)
+        )
+
+        # prepare original model for test
+        dp = torch.jit.script(model)
         tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
-        torch.jit.save(model, tmp_model.name)
+        torch.jit.save(dp, tmp_model.name)
         dp = DeepEval(tmp_model.name)
-        ntest = 10
         origin_type_map = ["O", "H"]
         full_type_map = ["O", "H", "B"]
 
         # change energy bias
-        energy_bias_after = change_energy_bias_lower(
-            self.data,
-            dp,
+        model.change_out_bias(
+            self.sampled,
+            bias_shift="delta",
             origin_type_map=origin_type_map,
             full_type_map=full_type_map,
-            bias_atom_e=bias_atom_e_input,
-            bias_shift="delta",
-            ntest=ntest,
+        )
+        energy_bias_after = deepcopy(
+            to_numpy_array(fitting_net["bias_atom_e"]).reshape(-1)
         )
 
         # get ground-truth energy bias change
@@ -98,12 +71,17 @@ def test_finetune_change_energy_bias(self):
         idx_type_map = sorter[
             np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
         ]
-        test_data = self.data.get_test()
-        atom_nums = np.tile(np.bincount(test_data["type"][0])[idx_type_map], (ntest, 1))
+        ntest = 1
+        atom_nums = np.tile(
+            np.bincount(to_numpy_array(self.sampled[0]["atype"][0]))[idx_type_map],
+            (ntest, 1),
+        )
         energy = dp.eval(
-            test_data["coord"][:ntest], test_data["box"][:ntest], test_data["type"][0]
+            to_numpy_array(self.sampled[0]["coord"][:ntest]),
+            to_numpy_array(self.sampled[0]["box"][:ntest]),
+            to_numpy_array(self.sampled[0]["atype"][0]),
         )[0]
-        energy_diff = test_data["energy"][:ntest] - energy
+        energy_diff = to_numpy_array(self.sampled[0]["energy"][:ntest]) - energy
         finetune_shift = (
             energy_bias_after[idx_type_map] - energy_bias_before[idx_type_map]
         )
@@ -114,60 +92,57 @@ def test_finetune_change_energy_bias(self):
         # check values
         np.testing.assert_almost_equal(finetune_shift, ground_truth_shift, decimal=10)
 
-    def tearDown(self):
-        for f in os.listdir("."):
-            if f.startswith("model") and f.endswith(".pt"):
-                os.remove(f)
-            if f in ["lcurve.out"]:
-                os.remove(f)
-            if f in ["stat_files"]:
-                shutil.rmtree(f)
-
 
 class TestEnergyModelSeA(unittest.TestCase, FinetuneTest):
     def setUp(self):
         self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
-        self.data = DeepmdDataSystem(
+        self.model_config = model_se_e2_a
+        self.data = DpLoaderSet(
             self.data_file,
             batch_size=1,
-            test_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
         )
-        self.data.add("energy", ndof=1, atomic=False, must=True, high_prec=True)
-        self.model_config = model_se_e2_a
-
-    def tearDown(self) -> None:
-        FinetuneTest.tearDown(self)
 
 
 @unittest.skip("change bias not implemented yet.")
 class TestEnergyZBLModelSeA(unittest.TestCase, FinetuneTest):
     def setUp(self):
         self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
-        self.data = DeepmdDataSystem(
+        self.model_config = model_zbl
+        self.data = DpLoaderSet(
             self.data_file,
             batch_size=1,
-            test_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
         )
-        self.data.add("energy", ndof=1, atomic=False, must=True, high_prec=True)
-        self.model_config = model_zbl
-
-    def tearDown(self) -> None:
-        FinetuneTest.tearDown(self)
 
 
 class TestEnergyModelDPA2(unittest.TestCase, FinetuneTest):
     def setUp(self):
         self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
-        self.data = DeepmdDataSystem(
+        self.model_config = model_dpa2
+        self.data = DpLoaderSet(
             self.data_file,
             batch_size=1,
-            test_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
         )
-        self.data.add("energy", ndof=1, atomic=False, must=True, high_prec=True)
-        self.model_config = model_dpa2
-
-    def tearDown(self) -> None:
-        FinetuneTest.tearDown(self)
 
 
 if __name__ == "__main__":

From 13e339875a7b92ca74a21b80f845aca00c87acd6 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 00:03:21 +0800
Subject: [PATCH 02/14] Update finetune.py

---
 deepmd/pt/utils/finetune.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index 05720720ea..c4f1abe6bf 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -170,7 +170,6 @@ def change_finetune_model_params(finetune_model, model_config, model_branch=""):
                 # not do anything if not defined "finetune_head" in heads that exist in the pretrained model
                 # this will just do resuming
                 model_branch_from = model_key
-                pass
             else:
                 # if not defined "finetune_head" in new heads, the fitting net will bre randomly initialized
                 model_branch_from = ""

From 385764217da45519591917792484bbe3b6d7f72b Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 00:54:30 +0800
Subject: [PATCH 03/14] Add ut for multitask finetune

---
 source/tests/pt/model/water/multitask.json |  2 +
 source/tests/pt/test_multitask.py          | 97 ++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/source/tests/pt/model/water/multitask.json b/source/tests/pt/model/water/multitask.json
index 6baddd672b..26771ee476 100644
--- a/source/tests/pt/model/water/multitask.json
+++ b/source/tests/pt/model/water/multitask.json
@@ -38,6 +38,7 @@
           ],
           "resnet_dt": true,
           "seed": 1,
+          "data_stat_nbatch": 1,
           "_comment": " that's all"
         }
       },
@@ -52,6 +53,7 @@
           ],
           "resnet_dt": true,
           "seed": 1,
+          "data_stat_nbatch": 1,
           "_comment": " that's all"
         }
       }
diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
index e959e9a128..71df16e8c4 100644
--- a/source/tests/pt/test_multitask.py
+++ b/source/tests/pt/test_multitask.py
@@ -32,6 +32,7 @@
 
 class MultiTaskTrainTest:
     def test_multitask_train(self):
+        # test multitask training
         trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
         trainer.run()
         # check model keys
@@ -51,6 +52,99 @@ def test_multitask_train(self):
                     multi_state_dict[state_key],
                     multi_state_dict[state_key.replace("model_1", "model_2")],
                 )
+
+        # test multitask fine-tuning
+        # add model_3
+        self.origin_config["model"]["model_dict"]["model_3"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_3"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_3"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_3"]["stat_file"].replace("model_2", "model_3")
+        )
+
+        # add model_4
+        self.origin_config["model"]["model_dict"]["model_4"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_4"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_4"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_4"]["stat_file"].replace("model_2", "model_4")
+        )
+
+        # set finetune rules
+        # model_1 resuming from model_1
+        # pass
+
+        # model_2 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_2"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_3 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_3"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_4 fine-tuning with randomly initialized fitting net
+        # pass
+
+        self.origin_config["model"], shared_links_finetune = preprocess_shared_params(
+            self.origin_config["model"]
+        )
+
+        trainer_finetune = get_trainer(
+            deepcopy(self.origin_config),
+            finetune_model=self.config["training"].get("save_ckpt", "model.ckpt")
+            + ".pt",
+        )
+
+        # check parameters
+        multi_state_dict_finetuned = trainer_finetune.wrapper.model.state_dict()
+        for state_key in multi_state_dict_finetuned:
+            if "model_1" in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_2" in state_key and "bias_atom_e" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_3" in state_key and "bias_atom_e" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key.replace("model_3", "model_2")],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_4" in state_key and "fitting_net" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key.replace("model_4", "model_2")],
+                    multi_state_dict_finetuned[state_key],
+                )
+
+        # check running
+        trainer_finetune.run()
         self.tearDown()
 
     def tearDown(self):
@@ -93,6 +187,7 @@ def setUp(self):
         )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
         self.config["model"], self.shared_links = preprocess_shared_params(
             self.config["model"]
         )
@@ -131,6 +226,7 @@ def setUp(self):
         )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
         self.config["model"], self.shared_links = preprocess_shared_params(
             self.config["model"]
         )
@@ -169,6 +265,7 @@ def setUp(self):
         )
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
         self.config["model"], self.shared_links = preprocess_shared_params(
             self.config["model"]
         )

From 9829bad6bb5e47479ab99535fa638f082b27adcb Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 01:01:19 +0800
Subject: [PATCH 04/14] delete unused param `finetune_from_multi_task`

---
 deepmd/pt/train/training.py | 3 ---
 deepmd/pt/utils/finetune.py | 1 -
 2 files changed, 4 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 032aa993b0..d79051516f 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -118,9 +118,6 @@ def __init__(
         training_params = config["training"]
         self.multi_task = "model_dict" in model_params
         self.finetune_links = finetune_links
-        self.finetune_from_multi_task = model_params.pop(
-            "finetune_from_multi_task", False
-        )  # should use pop for next finetune
         self.model_keys = (
             list(model_params["model_dict"]) if self.multi_task else ["Default"]
         )
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index c4f1abe6bf..e178e856bb 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -42,7 +42,6 @@ def change_finetune_model_params_single(
         )
         single_config["new_type_map"] = new_type_map
     else:
-        single_config["finetune_from_multi_task"] = from_multitask
         model_dict_params = _model_param_pretrained["model_dict"]
         new_fitting = False
         if model_branch_from == "":

From 7c84f524650280489bdd8aea197734fa3eb42a79 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 01:03:38 +0800
Subject: [PATCH 05/14] Update training.py

---
 deepmd/pt/train/training.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index d79051516f..08f349bacf 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -575,6 +575,7 @@ def single_model_finetune(
                     else:
                         # need to updated
                         raise NotImplementedError
+                        pass
 
                 # finetune
                 if not self.multi_task:

From 1dd180a77f3917f8d61798bb366a670d93817ca3 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 01:04:03 +0800
Subject: [PATCH 06/14] Update training.py

---
 deepmd/pt/train/training.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 08f349bacf..93b0e9dac1 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -574,7 +574,6 @@ def single_model_finetune(
                         )
                     else:
                         # need to updated
-                        raise NotImplementedError
                         pass
 
                 # finetune

From 7cfe43eaf7d7801c5a3342e2a0f7d3292fd67f4e Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 18 Mar 2024 01:07:57 +0800
Subject: [PATCH 07/14] Update test_multitask.py

---
 source/tests/pt/test_multitask.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
index 71df16e8c4..8bdb42df52 100644
--- a/source/tests/pt/test_multitask.py
+++ b/source/tests/pt/test_multitask.py
@@ -117,6 +117,7 @@ def test_multitask_train(self):
             deepcopy(self.origin_config),
             finetune_model=self.config["training"].get("save_ckpt", "model.ckpt")
             + ".pt",
+            shared_links=shared_links_finetune,
         )
 
         # check parameters

From 398d26e742bd62ee90e467531e8b3f697de2be5c Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 19 Mar 2024 17:12:54 +0800
Subject: [PATCH 08/14] Move `change_out_bias` to atomic model

---
 .../pt/model/atomic_model/dp_atomic_model.py  | 90 +++++++++++++++++++
 .../model/atomic_model/linear_atomic_model.py |  6 ++
 .../atomic_model/pairtab_atomic_model.py      |  6 ++
 deepmd/pt/model/model/dipole_model.py         |  5 --
 deepmd/pt/model/model/dos_model.py            |  5 --
 deepmd/pt/model/model/ener_model.py           | 68 --------------
 deepmd/pt/model/model/polar_model.py          |  5 --
 deepmd/pt/model/model/spin_model.py           |  5 --
 deepmd/pt/train/training.py                   |  2 +-
 deepmd/pt/utils/auto_batch_size.py            | 77 ++++++++++++++++
 deepmd/pt/utils/finetune.py                   | 18 ++--
 deepmd/pt/utils/stat.py                       | 68 +++++++-------
 source/tests/pt/model/water/multitask.json    |  8 +-
 source/tests/pt/test_finetune.py              |  2 +-
 14 files changed, 228 insertions(+), 137 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 6aa8df7aee..bcb4c13b01 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -8,6 +8,7 @@
     Optional,
 )
 
+import numpy as np
 import torch
 
 from deepmd.dpmodel import (
@@ -19,6 +20,15 @@
 from deepmd.pt.model.task.base_fitting import (
     BaseFitting,
 )
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -223,6 +233,86 @@ def wrapped_sampler():
         if self.fitting_net is not None:
             self.fitting_net.compute_output_stats(wrapped_sampler, stat_file_path)
 
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        """Change the energy bias according to the input data and the pretrained model.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        origin_type_map : List[str]
+            The original type_map in dataset, they are targets to change the energy bias.
+        full_type_map : List[str]
+            The full type_map in pre-trained model
+        bias_shift : str
+            The mode for changing energy bias : ['delta', 'statistic']
+            'delta' : perform predictions on energies of target dataset,
+                    and do least sqaure on the errors to obtain the target shift as bias.
+            'statistic' : directly use the statistic energy bias in the target dataset.
+        """
+        sorter = np.argsort(full_type_map)
+        missing_types = [t for t in origin_type_map if t not in full_type_map]
+        assert (
+            not missing_types
+        ), f"Some types are not in the pre-trained model: {list(missing_types)} !"
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        original_bias = self.fitting_net["bias_atom_e"]
+        if bias_shift == "delta":
+
+            def model_forward(coord, atype, box, fparam=None, aparam=None):
+                with torch.no_grad():  # it's essential for pure torch forward function to use auto_batchsize
+                    (
+                        extended_coord,
+                        extended_atype,
+                        mapping,
+                        nlist,
+                    ) = extend_input_and_build_neighbor_list(
+                        coord,
+                        atype,
+                        self.get_rcut(),
+                        self.get_sel(),
+                        mixed_types=self.mixed_types(),
+                        box=box,
+                    )
+                    atomic_ret = self.forward_common_atomic(
+                        extended_coord,
+                        extended_atype,
+                        nlist,
+                        mapping=mapping,
+                        fparam=fparam,
+                        aparam=aparam,
+                    )
+                    return atomic_ret["energy"]
+
+            delta_bias_e = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+                model_forward=model_forward,
+            )
+            bias_atom_e = delta_bias_e + original_bias
+        elif bias_shift == "statistic":
+            bias_atom_e = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+            )
+        else:
+            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+        log.info(
+            f"Change energy bias of {origin_type_map!s} "
+            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
+            f"to {to_numpy_array(bias_atom_e[idx_type_map]).reshape(-1)!s}."
+        )
+        self.fitting_net["bias_atom_e"] = bias_atom_e
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index 482972b5d5..f878ca7fc6 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -390,6 +390,12 @@ def compute_or_load_stat(
         self.models[0].compute_or_load_stat(sampled_func, stat_file_path)
         self.models[1].compute_or_load_stat(sampled_func, stat_file_path)
 
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        # need to implement
+        pass
+
     def serialize(self) -> dict:
         dd = BaseAtomicModel.serialize(self)
         dd.update(
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index 0d82670792..a71a3f6816 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -234,6 +234,12 @@ def compute_or_load_stat(
             torch.tensor(bias_atom_e, device=env.DEVICE).view([self.ntypes, 1])
         )
 
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        # need to implement
+        pass
+
     def forward_atomic(
         self,
         extended_coord: torch.Tensor,
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 231ddf89a4..45b120771b 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -92,8 +92,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        raise NotImplementedError
diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py
index 147354d32e..680eac41f5 100644
--- a/deepmd/pt/model/model/dos_model.py
+++ b/deepmd/pt/model/model/dos_model.py
@@ -78,8 +78,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        raise NotImplementedError
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 4338297eac..2577b685d3 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -1,25 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-import os
-import tempfile
 from typing import (
     Dict,
     Optional,
 )
 
-import numpy as np
 import torch
 
-from deepmd.infer.deep_eval import (
-    DeepEval,
-)
-from deepmd.pt.utils.stat import (
-    compute_output_stats,
-)
-from deepmd.pt.utils.utils import (
-    to_numpy_array,
-)
-
 from .dp_model import (
     DPModel,
 )
@@ -113,58 +100,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        """Change the energy bias according to the input data and the pretrained model.
-
-        Parameters
-        ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
-                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
-                originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
-                only when needed. Since the sampling process can be slow and memory-intensive,
-                the lazy function helps by only sampling once.
-        origin_type_map : List[str]
-            The original type_map in dataset, they are targets to change the energy bias.
-        full_type_map : List[str]
-            The full type_map in pre-trained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
-        """
-        sorter = np.argsort(full_type_map)
-        idx_type_map = sorter[
-            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
-        ]
-        original_bias = self.get_fitting_net()["bias_atom_e"]
-        if bias_shift == "delta":
-            tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
-            model = torch.jit.script(self)
-            torch.jit.save(model, tmp_model.name)
-            dp = DeepEval(tmp_model.name)
-            os.unlink(tmp_model.name)
-            delta_bias_e = compute_output_stats(
-                merged,
-                self.atomic_model.get_ntypes(),
-                model=dp,
-            )
-            bias_atom_e = delta_bias_e + original_bias
-        elif bias_shift == "statistic":
-            bias_atom_e = compute_output_stats(
-                merged,
-                self.atomic_model.get_ntypes(),
-            )
-        else:
-            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
-        log.info(
-            f"Change energy bias of {origin_type_map!s} "
-            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
-            f"to {to_numpy_array(bias_atom_e[idx_type_map]).reshape(-1)!s}."
-        )
-        self.get_fitting_net()["bias_atom_e"] = bias_atom_e
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index ec669583b0..403058aa47 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -76,8 +76,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        raise NotImplementedError
diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py
index 75ecee7601..df2f48e2e4 100644
--- a/deepmd/pt/model/model/spin_model.py
+++ b/deepmd/pt/model/model/spin_model.py
@@ -558,8 +558,3 @@ def forward_lower(
             ].squeeze(-2)
         # not support virial by far
         return model_predict
-
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        raise NotImplementedError
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 93b0e9dac1..0372c50cc9 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -566,7 +566,7 @@ def single_model_finetune(
                         _model_params["new_type_map"],
                     )
                     if isinstance(_model, EnergyModel):
-                        _model.change_out_bias(
+                        _model.atomic_model.change_out_bias(
                             _sample_func,
                             bias_shift=_model_params.get("bias_shift", "delta"),
                             origin_type_map=new_type_map,
diff --git a/deepmd/pt/utils/auto_batch_size.py b/deepmd/pt/utils/auto_batch_size.py
index 5af7760e2a..181d56f2f4 100644
--- a/deepmd/pt/utils/auto_batch_size.py
+++ b/deepmd/pt/utils/auto_batch_size.py
@@ -1,4 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Tuple,
+    Union,
+)
+
+import numpy as np
 import torch
 
 from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
@@ -24,3 +31,73 @@ def is_oom_error(self, e: Exception) -> bool:
             Exception
         """
         return isinstance(e, RuntimeError) and "CUDA out of memory." in e.args[0]
+
+    def execute_all(
+        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
+    ) -> Tuple[Union[np.ndarray, torch.Tensor]]:
+        """Excuate a method with all given data.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept *args and **kwargs as input and return the similiar array.
+        total_size : int
+            Total size
+        natoms : int
+            The number of atoms
+        *args
+            Variable length argument list.
+        **kwargs
+            If 2D np.ndarray or torch.Tensor, assume the first axis is batch; otherwise do nothing.
+        """
+
+        def execute_with_batch_size(
+            batch_size: int, start_index: int
+        ) -> Tuple[int, Tuple[torch.Tensor]]:
+            end_index = start_index + batch_size
+            end_index = min(end_index, total_size)
+            return (end_index - start_index), callable(
+                *[
+                    (
+                        vv[start_index:end_index]
+                        if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor))
+                        and vv.ndim > 1
+                        else vv
+                    )
+                    for vv in args
+                ],
+                **{
+                    kk: (
+                        vv[start_index:end_index]
+                        if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor))
+                        and vv.ndim > 1
+                        else vv
+                    )
+                    for kk, vv in kwargs.items()
+                },
+            )
+
+        index = 0
+        results = []
+        while index < total_size:
+            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
+            if not isinstance(result, tuple):
+                result = (result,)
+            index += n_batch
+            if n_batch:
+                for rr in result:
+                    rr.reshape((n_batch, -1))
+                results.append(result)
+        r_list = []
+        for r in zip(*results):
+            if isinstance(r[0], np.ndarray):
+                r_list.append(np.concatenate(r, axis=0))
+            elif isinstance(r[0], torch.Tensor):
+                r_list.append(torch.cat(r, dim=0))
+            else:
+                raise RuntimeError(f"Unexpected result type {type(r[0])}")
+        r = tuple(r_list)
+        if len(r) == 1:
+            # avoid returning tuple if callable doesn't return tuple
+            r = r[0]
+        return r
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index e178e856bb..55bab5f0ad 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -89,15 +89,6 @@ def change_finetune_model_params_single(
 def change_finetune_model_params(finetune_model, model_config, model_branch=""):
     """
     Load model_params according to the pretrained one.
-
-    Parameters
-    ----------
-    finetune_model:
-        The pretrained model.
-    model_config
-        The fine-tuning input parameters.
-    model_branch
-        The model branch chosen in command-line mode, only for single-task fine-tuning.
     This function modifies the fine-tuning input in different modes as follows:
     1. Single-task fine-tuning from a single-task pretrained model:
         - Updates the model parameters based on the pretrained model.
@@ -117,6 +108,15 @@ def change_finetune_model_params(finetune_model, model_config, model_branch=""):
         - If `finetune_head` is not defined and a new model_key is used,
           model parameters in the fitting network of the branch will be randomly initialized.
 
+    Parameters
+    ----------
+    finetune_model
+        The pretrained model.
+    model_config
+        The fine-tuning input parameters.
+    model_branch
+        The model branch chosen in command-line mode, only for single-task fine-tuning.
+
     Returns
     -------
     model_config:
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index de75740c22..8061677695 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -10,12 +10,12 @@
 import numpy as np
 import torch
 
-from deepmd.infer.deep_eval import (
-    DeepEval,
-)
 from deepmd.pt.utils import (
     AtomExcludeMask,
 )
+from deepmd.pt.utils.auto_batch_size import (
+    AutoBatchSize,
+)
 from deepmd.pt.utils.utils import (
     dict_to_device,
     to_numpy_array,
@@ -79,7 +79,7 @@ def compute_output_stats(
     stat_file_path: Optional[DPPath] = None,
     rcond: Optional[float] = None,
     atom_ener: Optional[List[float]] = None,
-    model: Optional[DeepEval] = None,
+    model_forward: Optional[Callable[..., torch.Tensor]] = None,
 ):
     """
     Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
@@ -101,7 +101,8 @@ def compute_output_stats(
         The condition number for the regression of atomic energy.
     atom_ener : List[float], optional
         Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
-    model : DeepEval, optional
+    model_forward : Callable, optional
+        The wrapped forward function of atomic model.
         If not None, the model will be utilized to generate the original energy prediction,
         which will be subtracted from the energy label of the data.
         The difference will then be used to calculate the delta complement energy bias for each type.
@@ -136,7 +137,7 @@ def compute_output_stats(
             )
         else:
             assigned_atom_ener = None
-        if model is None:
+        if model_forward is None:
             # only use statistics result
             bias_atom_e, _ = compute_stats_from_redu(
                 merged_energy,
@@ -146,38 +147,37 @@ def compute_output_stats(
             )
         else:
             # subtract the model bias and output the delta bias
+            auto_batch_size = AutoBatchSize()
             energy_predict = []
             for system in sampled:
                 nframes = system["coord"].shape[0]
-                coord = to_numpy_array(system["coord"]).reshape(nframes, -1)
-                box = (
-                    to_numpy_array(system["box"]).reshape(nframes, -1)
-                    if system["box"] is not None
-                    else None
-                )
-                if data_mixed_type:
-                    atype = to_numpy_array(system["atype"]).reshape(nframes, -1)
-                else:
-                    atype = to_numpy_array(system["atype"]).reshape(nframes, -1)[0]
-                fparam = (
-                    to_numpy_array(system["fparam"])
-                    if "fparam" in system is not None
-                    else None
-                )
-                aparam = (
-                    to_numpy_array(system["aparam"])
-                    if "aparam" in system is not None
-                    else None
+                coord, atype, box, natoms = (
+                    system["coord"],
+                    system["atype"],
+                    system["box"],
+                    system["natoms"],
                 )
-                ret = model.eval(
-                    coord,
-                    box,
-                    atype,
-                    mixed_type=data_mixed_type,
-                    fparam=fparam,
-                    aparam=aparam,
+                fparam = system["fparam"] if "fparam" in system else None
+                aparam = system["aparam"] if "aparam" in system else None
+
+                def model_forward_auto_batch_size(*args, **kwargs):
+                    return auto_batch_size.execute_all(
+                        model_forward,
+                        nframes,
+                        system["atype"].shape[-1],
+                        *args,
+                        **kwargs,
+                    )
+
+                energy = (
+                    model_forward_auto_batch_size(
+                        coord, atype, box, fparam=fparam, aparam=aparam
+                    )
+                    .reshape(nframes, -1)
+                    .sum(-1)
                 )
-                energy_predict.append(ret[0].reshape([nframes, 1]))
+                energy_predict.append(to_numpy_array(energy).reshape([nframes, 1]))
+
             energy_predict = np.concatenate(energy_predict)
             bias_diff = merged_energy - energy_predict
             bias_atom_e, _ = compute_stats_from_redu(
@@ -194,7 +194,7 @@ def compute_output_stats(
                 )
             )
             log.info(
-                f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
+                f"RMSE of energy per atom after linear regression is: {rmse_ae} eV/atom."
             )
         if stat_file_path is not None:
             stat_file_path.save_numpy(bias_atom_e)
diff --git a/source/tests/pt/model/water/multitask.json b/source/tests/pt/model/water/multitask.json
index 26771ee476..2f706e4cd9 100644
--- a/source/tests/pt/model/water/multitask.json
+++ b/source/tests/pt/model/water/multitask.json
@@ -38,9 +38,9 @@
           ],
           "resnet_dt": true,
           "seed": 1,
-          "data_stat_nbatch": 1,
           "_comment": " that's all"
-        }
+        },
+        "data_stat_nbatch": 1
       },
       "model_2": {
         "type_map": "my_type_map",
@@ -53,9 +53,9 @@
           ],
           "resnet_dt": true,
           "seed": 1,
-          "data_stat_nbatch": 1,
           "_comment": " that's all"
-        }
+        },
+        "data_stat_nbatch": 1
       }
     }
   },
diff --git a/source/tests/pt/test_finetune.py b/source/tests/pt/test_finetune.py
index e77063c2ac..78b6286f94 100644
--- a/source/tests/pt/test_finetune.py
+++ b/source/tests/pt/test_finetune.py
@@ -56,7 +56,7 @@ def test_finetune_change_out_bias(self):
         full_type_map = ["O", "H", "B"]
 
         # change energy bias
-        model.change_out_bias(
+        model.atomic_model.change_out_bias(
             self.sampled,
             bias_shift="delta",
             origin_type_map=origin_type_map,

From 508ee449fdb84347902e08e1a0432383a1a15571 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 19 Mar 2024 17:19:27 +0800
Subject: [PATCH 09/14] Update ener_model.py

---
 deepmd/pt/model/model/ener_model.py | 3 ---
 deepmd/pt/utils/stat.py             | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 2577b685d3..5217293623 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
 from typing import (
     Dict,
     Optional,
@@ -11,8 +10,6 @@
     DPModel,
 )
 
-log = logging.getLogger(__name__)
-
 
 class EnergyModel(DPModel):
     model_type = "ener"
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 8061677695..e19882902c 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -101,7 +101,7 @@ def compute_output_stats(
         The condition number for the regression of atomic energy.
     atom_ener : List[float], optional
         Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
-    model_forward : Callable, optional
+    model_forward : Callable[..., torch.Tensor], optional
         The wrapped forward function of atomic model.
         If not None, the model will be utilized to generate the original energy prediction,
         which will be subtracted from the energy label of the data.

From 961ee10aaddc965a942a82ae6fb86ec08c621d59 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 20 Mar 2024 09:04:55 +0800
Subject: [PATCH 10/14] add detach

---
 deepmd/pt/model/atomic_model/dp_atomic_model.py | 2 +-
 deepmd/pt/utils/stat.py                         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index bcb4c13b01..712f1839c2 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -291,7 +291,7 @@ def model_forward(coord, atype, box, fparam=None, aparam=None):
                         fparam=fparam,
                         aparam=aparam,
                     )
-                    return atomic_ret["energy"]
+                    return atomic_ret["energy"].detach()
 
             delta_bias_e = compute_output_stats(
                 merged,
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index e19882902c..1aff4cfb37 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -157,8 +157,8 @@ def compute_output_stats(
                     system["box"],
                     system["natoms"],
                 )
-                fparam = system["fparam"] if "fparam" in system else None
-                aparam = system["aparam"] if "aparam" in system else None
+                fparam = system.get("fparam", None)
+                aparam = system.get("aparam", None)
 
                 def model_forward_auto_batch_size(*args, **kwargs):
                     return auto_batch_size.execute_all(

From bdf74c916a1490c9be459234339b0411bec43ab2 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 21 Mar 2024 20:08:37 +0800
Subject: [PATCH 11/14] Move `change_out_bias` to base atomic model

---
 .../model/atomic_model/base_atomic_model.py   | 109 ++++++++++++++++++
 .../pt/model/atomic_model/dp_atomic_model.py  |  99 +++-------------
 .../model/atomic_model/linear_atomic_model.py |  21 ++++
 .../atomic_model/pairtab_atomic_model.py      |  23 +++-
 4 files changed, 163 insertions(+), 89 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index c921538203..d453b67205 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -1,13 +1,16 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 
 
+import logging
 from typing import (
+    Callable,
     Dict,
     List,
     Optional,
     Tuple,
 )
 
+import numpy as np
 import torch
 
 from deepmd.dpmodel.atomic_model import (
@@ -21,10 +24,21 @@
     AtomExcludeMask,
     PairExcludeMask,
 )
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
 from deepmd.utils.path import (
     DPPath,
 )
 
+log = logging.getLogger(__name__)
+
 BaseAtomicModel_ = make_base_atomic_model(torch.Tensor)
 
 
@@ -176,6 +190,46 @@ def serialize(self) -> dict:
             "pair_exclude_types": self.pair_exclude_types,
         }
 
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        raise NotImplementedError
+
+    def get_out_bias(self) -> torch.Tensor:
+        raise NotImplementedError
+
+    def get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
+        """Get a forward wrapper of the atomic model for output bias calculation."""
+        model_output_type = list(self.atomic_output_def().keys())
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        out_name = model_output_type[0]
+
+        def model_forward(coord, atype, box, fparam=None, aparam=None):
+            with torch.no_grad():  # it's essential for pure torch forward function to use auto_batchsize
+                (
+                    extended_coord,
+                    extended_atype,
+                    mapping,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    coord,
+                    atype,
+                    self.get_rcut(),
+                    self.get_sel(),
+                    mixed_types=self.mixed_types(),
+                    box=box,
+                )
+                atomic_ret = self.forward_common_atomic(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                    mapping=mapping,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                return atomic_ret[out_name].detach()
+
+        return model_forward
+
     def compute_or_load_stat(
         self,
         sampled_func,
@@ -197,3 +251,58 @@ def compute_or_load_stat(
             The path to the statistics files.
         """
         raise NotImplementedError
+
+    def change_out_bias(
+        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+    ) -> None:
+        """Change the energy bias according to the input data and the pretrained model.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        origin_type_map : List[str]
+            The original type_map in dataset, they are targets to change the energy bias.
+        full_type_map : List[str]
+            The full type_map in pre-trained model
+        bias_shift : str
+            The mode for changing energy bias : ['delta', 'statistic']
+            'delta' : perform predictions on energies of target dataset,
+                    and do least sqaure on the errors to obtain the target shift as bias.
+            'statistic' : directly use the statistic energy bias in the target dataset.
+        """
+        sorter = np.argsort(full_type_map)
+        missing_types = [t for t in origin_type_map if t not in full_type_map]
+        assert (
+            not missing_types
+        ), f"Some types are not in the pre-trained model: {list(missing_types)} !"
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        original_bias = self.get_out_bias()
+        if bias_shift == "delta":
+            delta_bias = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+                model_forward=self.get_forward_wrapper_func(),
+            )
+            self.set_out_bias(delta_bias, add=True)
+        elif bias_shift == "statistic":
+            bias_atom = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+            )
+            self.set_out_bias(bias_atom)
+        else:
+            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+        bias_atom = self.get_out_bias()
+        log.info(
+            f"Change output bias of {origin_type_map!s} "
+            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
+            f"to {to_numpy_array(bias_atom[idx_type_map]).reshape(-1)!s}."
+        )
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 712f1839c2..13b8f09a79 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -8,7 +8,6 @@
     Optional,
 )
 
-import numpy as np
 import torch
 
 from deepmd.dpmodel import (
@@ -20,15 +19,6 @@
 from deepmd.pt.model.task.base_fitting import (
     BaseFitting,
 )
-from deepmd.pt.utils.nlist import (
-    extend_input_and_build_neighbor_list,
-)
-from deepmd.pt.utils.stat import (
-    compute_output_stats,
-)
-from deepmd.pt.utils.utils import (
-    to_numpy_array,
-)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -233,85 +223,26 @@ def wrapped_sampler():
         if self.fitting_net is not None:
             self.fitting_net.compute_output_stats(wrapped_sampler, stat_file_path)
 
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        """Change the energy bias according to the input data and the pretrained model.
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
 
         Parameters
         ----------
-        merged : Union[Callable[[], List[dict]], List[dict]]
-            - List[dict]: A list of data samples from various data systems.
-                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
-                originating from the `i`-th data system.
-            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
-                only when needed. Since the sampling process can be slow and memory-intensive,
-                the lazy function helps by only sampling once.
-        origin_type_map : List[str]
-            The original type_map in dataset, they are targets to change the energy bias.
-        full_type_map : List[str]
-            The full type_map in pre-trained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
         """
-        sorter = np.argsort(full_type_map)
-        missing_types = [t for t in origin_type_map if t not in full_type_map]
-        assert (
-            not missing_types
-        ), f"Some types are not in the pre-trained model: {list(missing_types)} !"
-        idx_type_map = sorter[
-            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
-        ]
-        original_bias = self.fitting_net["bias_atom_e"]
-        if bias_shift == "delta":
-
-            def model_forward(coord, atype, box, fparam=None, aparam=None):
-                with torch.no_grad():  # it's essential for pure torch forward function to use auto_batchsize
-                    (
-                        extended_coord,
-                        extended_atype,
-                        mapping,
-                        nlist,
-                    ) = extend_input_and_build_neighbor_list(
-                        coord,
-                        atype,
-                        self.get_rcut(),
-                        self.get_sel(),
-                        mixed_types=self.mixed_types(),
-                        box=box,
-                    )
-                    atomic_ret = self.forward_common_atomic(
-                        extended_coord,
-                        extended_atype,
-                        nlist,
-                        mapping=mapping,
-                        fparam=fparam,
-                        aparam=aparam,
-                    )
-                    return atomic_ret["energy"].detach()
-
-            delta_bias_e = compute_output_stats(
-                merged,
-                self.get_ntypes(),
-                model_forward=model_forward,
-            )
-            bias_atom_e = delta_bias_e + original_bias
-        elif bias_shift == "statistic":
-            bias_atom_e = compute_output_stats(
-                merged,
-                self.get_ntypes(),
-            )
-        else:
-            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
-        log.info(
-            f"Change energy bias of {origin_type_map!s} "
-            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
-            f"to {to_numpy_array(bias_atom_e[idx_type_map]).reshape(-1)!s}."
+        self.fitting_net["bias_atom_e"] = (
+            out_bias + self.fitting_net["bias_atom_e"] if add else out_bias
         )
-        self.fitting_net["bias_atom_e"] = bias_atom_e
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the output bias of the atomic model."""
+        return self.fitting_net["bias_atom_e"]
 
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index f878ca7fc6..a75099bec1 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -289,6 +289,27 @@ def _compute_weight(
             for _ in range(nmodels)
         ]
 
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for all the models in the linear atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        for model in self.models:
+            model.set_out_bias(out_bias, add=add)
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the weighted output bias of the linear atomic model."""
+        # TODO add get_out_bias for linear atomic model
+        raise NotImplementedError
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         # tricky...
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
index a71a3f6816..c20abf6a12 100644
--- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -234,11 +234,24 @@ def compute_or_load_stat(
             torch.tensor(bias_atom_e, device=env.DEVICE).view([self.ntypes, 1])
         )
 
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        # need to implement
-        pass
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the output bias of the atomic model."""
+        return self.bias_atom_e
 
     def forward_atomic(
         self,

From aa3365f7eb90d18276a13ac395efa51010c6953f Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 21 Mar 2024 20:15:12 +0800
Subject: [PATCH 12/14] Update base_atomic_model.py

---
 deepmd/pt/model/atomic_model/base_atomic_model.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index d453b67205..cdbc5fc639 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -191,9 +191,22 @@ def serialize(self) -> dict:
         }
 
     def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
         raise NotImplementedError
 
     def get_out_bias(self) -> torch.Tensor:
+        """Return the output bias of the atomic model."""
         raise NotImplementedError
 
     def get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:

From b4d4816e210f9018a3682124322cc71034deb781 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 22 Mar 2024 00:44:42 +0800
Subject: [PATCH 13/14] resolve conversations

---
 .../dpmodel/atomic_model/dp_atomic_model.py   | 21 +++++++++
 .../atomic_model/linear_atomic_model.py       | 21 +++++++++
 .../atomic_model/make_base_atomic_model.py    | 19 ++++++++
 .../atomic_model/pairtab_atomic_model.py      | 19 ++++++++
 .../model/atomic_model/base_atomic_model.py   | 45 +++++++------------
 .../model/atomic_model/linear_atomic_model.py |  6 ---
 deepmd/pt/model/model/make_model.py           | 35 +++++++++++++++
 deepmd/pt/utils/finetune.py                   |  6 ++-
 deepmd/tf/fit/ener.py                         |  6 +--
 deepmd/tf/model/ener.py                       | 12 ++---
 deepmd/tf/model/model.py                      | 10 ++---
 deepmd/tf/train/trainer.py                    | 20 +++++----
 deepmd/utils/finetune.py                      | 18 ++++----
 source/tests/pt/test_finetune.py              |  2 +-
 14 files changed, 170 insertions(+), 70 deletions(-)

diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
index 4907483d1d..8a40f8d238 100644
--- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -83,6 +83,27 @@ def mixed_types(self) -> bool:
         """
         return self.descriptor.mixed_types()
 
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : np.ndarray
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.fitting["bias_atom_e"] = (
+            out_bias + self.fitting["bias_atom_e"] if add else out_bias
+        )
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the output bias of the atomic model."""
+        return self.fitting["bias_atom_e"]
+
     def forward_atomic(
         self,
         extended_coord: np.ndarray,
diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
index 088cf34900..93a885f3ab 100644
--- a/deepmd/dpmodel/atomic_model/linear_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -275,6 +275,27 @@ def get_sel_type(self) -> List[int]:
         # join all the selected types
         return list(set().union(*[model.get_sel_type() for model in self.models]))
 
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for all the models in the linear atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        for model in self.models:
+            model.set_out_bias(out_bias, add=add)
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the weighted output bias of the linear atomic model."""
+        # TODO add get_out_bias for linear atomic model
+        raise NotImplementedError
+
     def is_aparam_nall(self) -> bool:
         """Check whether the shape of atomic parameters is (nframes, nall, ndim).
 
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
index 936c2b0943..3e02a5d076 100644
--- a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -95,6 +95,25 @@ def get_sel_type(self) -> List[int]:
             If returning an empty list, all atom types are selected.
             """
 
+        @abstractmethod
+        def set_out_bias(self, out_bias: t_tensor, add=False) -> None:
+            """
+            Modify the output bias for the atomic model.
+
+            Parameters
+            ----------
+            out_bias : t_tensor
+                The new bias to be applied.
+            add : bool, optional
+                Whether to add the new bias to the existing one.
+                If False, the output bias will be directly replaced by the new bias.
+                If True, the new bias will be added to the existing one.
+            """
+
+        @abstractmethod
+        def get_out_bias(self) -> t_tensor:
+            """Return the output bias of the atomic model."""
+
         @abstractmethod
         def is_aparam_nall(self) -> bool:
             """Check whether the shape of atomic parameters is (nframes, nall, ndim).
diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
index 99b8ec1eff..30ab58928b 100644
--- a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
@@ -126,6 +126,25 @@ def mixed_types(self) -> bool:
         # to match DPA1 and DPA2.
         return True
 
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the output bias of the atomic model."""
+        return self.bias_atom_e
+
     def serialize(self) -> dict:
         dd = BaseAtomicModel.serialize(self)
         dd.update(
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index cdbc5fc639..fa30655f8a 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -190,25 +190,6 @@ def serialize(self) -> dict:
             "pair_exclude_types": self.pair_exclude_types,
         }
 
-    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
-        """
-        Modify the output bias for the atomic model.
-
-        Parameters
-        ----------
-        out_bias : torch.Tensor
-            The new bias to be applied.
-        add : bool, optional
-            Whether to add the new bias to the existing one.
-            If False, the output bias will be directly replaced by the new bias.
-            If True, the new bias will be added to the existing one.
-        """
-        raise NotImplementedError
-
-    def get_out_bias(self) -> torch.Tensor:
-        """Return the output bias of the atomic model."""
-        raise NotImplementedError
-
     def get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
         model_output_type = list(self.atomic_output_def().keys())
@@ -266,9 +247,13 @@ def compute_or_load_stat(
         raise NotImplementedError
 
     def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
+        self,
+        merged,
+        origin_type_map,
+        full_type_map,
+        bias_adjust_mode="change-by-statistic",
     ) -> None:
-        """Change the energy bias according to the input data and the pretrained model.
+        """Change the output bias according to the input data and the pretrained model.
 
         Parameters
         ----------
@@ -280,14 +265,14 @@ def change_out_bias(
                 only when needed. Since the sampling process can be slow and memory-intensive,
                 the lazy function helps by only sampling once.
         origin_type_map : List[str]
-            The original type_map in dataset, they are targets to change the energy bias.
+            The original type_map in dataset, they are targets to change the output bias.
         full_type_map : List[str]
             The full type_map in pre-trained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+        bias_adjust_mode : str
+            The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on labels of target dataset,
+                    and do least square on the errors to obtain the target shift as bias.
+            'set-by-statistic' : directly use the statistic output bias in the target dataset.
         """
         sorter = np.argsort(full_type_map)
         missing_types = [t for t in origin_type_map if t not in full_type_map]
@@ -298,21 +283,21 @@ def change_out_bias(
             np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
         ]
         original_bias = self.get_out_bias()
-        if bias_shift == "delta":
+        if bias_adjust_mode == "change-by-statistic":
             delta_bias = compute_output_stats(
                 merged,
                 self.get_ntypes(),
                 model_forward=self.get_forward_wrapper_func(),
             )
             self.set_out_bias(delta_bias, add=True)
-        elif bias_shift == "statistic":
+        elif bias_adjust_mode == "set-by-statistic":
             bias_atom = compute_output_stats(
                 merged,
                 self.get_ntypes(),
             )
             self.set_out_bias(bias_atom)
         else:
-            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+            raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
         bias_atom = self.get_out_bias()
         log.info(
             f"Change output bias of {origin_type_map!s} "
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
index a75099bec1..f599399e66 100644
--- a/deepmd/pt/model/atomic_model/linear_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -411,12 +411,6 @@ def compute_or_load_stat(
         self.models[0].compute_or_load_stat(sampled_func, stat_file_path)
         self.models[1].compute_or_load_stat(sampled_func, stat_file_path)
 
-    def change_out_bias(
-        self, merged, origin_type_map, full_type_map, bias_shift="delta"
-    ) -> None:
-        # need to implement
-        pass
-
     def serialize(self) -> dict:
         dd = BaseAtomicModel.serialize(self)
         dd.update(
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 167ad81923..0e89c05b79 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -172,6 +172,41 @@ def forward_common(
             model_predict = self.output_type_cast(model_predict, input_prec)
             return model_predict
 
+        def change_out_bias(
+            self,
+            merged,
+            origin_type_map,
+            full_type_map,
+            bias_adjust_mode="change-by-statistic",
+        ) -> None:
+            """Change the output bias of atomic model according to the input data and the pretrained model.
+
+            Parameters
+            ----------
+            merged : Union[Callable[[], List[dict]], List[dict]]
+                - List[dict]: A list of data samples from various data systems.
+                    Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                    originating from the `i`-th data system.
+                - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                    only when needed. Since the sampling process can be slow and memory-intensive,
+                    the lazy function helps by only sampling once.
+            origin_type_map : List[str]
+                The original type_map in dataset, they are targets to change the output bias.
+            full_type_map : List[str]
+                The full type_map in pre-trained model
+            bias_adjust_mode : str
+                The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+                'change-by-statistic' : perform predictions on labels of target dataset,
+                        and do least square on the errors to obtain the target shift as bias.
+                'set-by-statistic' : directly use the statistic output bias in the target dataset.
+            """
+            self.atomic_model.change_out_bias(
+                merged,
+                origin_type_map,
+                full_type_map,
+                bias_adjust_mode=bias_adjust_mode,
+            )
+
         def forward_common_lower(
             self,
             extended_coord,
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index 55bab5f0ad..3f76454442 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -47,10 +47,12 @@ def change_finetune_model_params_single(
         if model_branch_from == "":
             model_branch_chosen = next(iter(model_dict_params.keys()))
             new_fitting = True
-            single_config["bias_shift"] = "statistic"  # fitting net re-init
+            single_config["bias_adjust_mode"] = (
+                "set-by-statistic"  # fitting net re-init
+            )
             log.warning(
                 "The fitting net will be re-init instead of using that in the pretrained model! "
-                "The bias_shift will be statistic!"
+                "The bias_adjust_mode will be set-by-statistic!"
             )
         else:
             model_branch_chosen = model_branch_from
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
index b391b00052..2229c51630 100644
--- a/deepmd/tf/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -793,11 +793,11 @@ def change_energy_bias(
         frozen_model,
         origin_type_map,
         full_type_map,
-        bias_shift="delta",
+        bias_adjust_mode="change-by-statistic",
         ntest=10,
     ) -> None:
         dp = None
-        if bias_shift == "delta":
+        if bias_adjust_mode == "change-by-statistic":
             # init model
             dp = DeepPotential(frozen_model)
         self.bias_atom_e = change_energy_bias_lower(
@@ -806,7 +806,7 @@ def change_energy_bias(
             origin_type_map,
             full_type_map,
             self.bias_atom_e,
-            bias_shift=bias_shift,
+            bias_adjust_mode=bias_adjust_mode,
             ntest=ntest,
         )
 
diff --git a/deepmd/tf/model/ener.py b/deepmd/tf/model/ener.py
index 70e0f4d2ba..a493fe0517 100644
--- a/deepmd/tf/model/ener.py
+++ b/deepmd/tf/model/ener.py
@@ -486,7 +486,7 @@ def change_energy_bias(
         frozen_model: str,
         origin_type_map: list,
         full_type_map: str,
-        bias_shift: str = "delta",
+        bias_adjust_mode: str = "change-by-statistic",
     ) -> None:
         """Change the energy bias according to the input data and the pretrained model.
 
@@ -500,17 +500,17 @@ def change_energy_bias(
             The original type_map in dataset, they are targets to change the energy bias.
         full_type_map : str
             The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         self.fitting.change_energy_bias(
             data,
             frozen_model,
             origin_type_map,
             full_type_map,
-            bias_shift,
+            bias_adjust_mode,
             self.data_bias_nsample,
         )
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
index a0e234a547..951ae09396 100644
--- a/deepmd/tf/model/model.py
+++ b/deepmd/tf/model/model.py
@@ -408,7 +408,7 @@ def change_energy_bias(
         frozen_model: str,
         origin_type_map: list,
         full_type_map: str,
-        bias_shift: str = "delta",
+        bias_adjust_mode: str = "change-by-statistic",
     ) -> None:
         """Change the energy bias according to the input data and the pretrained model.
 
@@ -422,11 +422,11 @@ def change_energy_bias(
             The original type_map in dataset, they are targets to change the energy bias.
         full_type_map : str
             The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         raise RuntimeError("Not supported")
 
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
index 125b795d2e..931cf87246 100644
--- a/deepmd/tf/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -1114,7 +1114,7 @@ def _init_from_ckpt(self, ckpt_meta: str):
             self.ckpt_meta = ckpt_meta
 
     def _init_from_pretrained_model(
-        self, data, origin_type_map=None, bias_shift="delta"
+        self, data, origin_type_map=None, bias_adjust_mode="change-by-statistic"
     ):
         """Init the embedding net variables with the given frozen model.
 
@@ -1124,11 +1124,11 @@ def _init_from_pretrained_model(
             The training data.
         origin_type_map : list
             The original type_map in dataset, they are targets to change the energy bias.
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         try:
             graph, graph_def = load_graph_def(self.run_opt.finetune)
@@ -1159,11 +1159,15 @@ def _init_from_pretrained_model(
             "(this step may take long time)"
         )
         self._change_energy_bias(
-            data, self.run_opt.finetune, origin_type_map, bias_shift
+            data, self.run_opt.finetune, origin_type_map, bias_adjust_mode
         )
 
     def _change_energy_bias(
-        self, data, frozen_model, origin_type_map, bias_shift="delta"
+        self,
+        data,
+        frozen_model,
+        origin_type_map,
+        bias_adjust_mode="change-by-statistic",
     ):
         full_type_map = data.get_type_map()
         self.model.change_energy_bias(
@@ -1171,7 +1175,7 @@ def _change_energy_bias(
             frozen_model,
             origin_type_map,
             full_type_map,
-            bias_shift=bias_shift,
+            bias_adjust_mode=bias_adjust_mode,
         )
 
 
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index a454ad72ea..1150fe2701 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -26,7 +26,7 @@ def change_energy_bias_lower(
     origin_type_map: List[str],
     full_type_map: List[str],
     bias_atom_e: np.ndarray,
-    bias_shift="delta",
+    bias_adjust_mode="change-by-statistic",
     ntest=10,
 ):
     """Change the energy bias according to the input data and the pretrained model.
@@ -43,11 +43,11 @@ def change_energy_bias_lower(
         The full type_map in pretrained model
     bias_atom_e : np.ndarray
         The old energy bias in the pretrained model.
-    bias_shift : str
-        The mode for changing energy bias : ['delta', 'statistic']
-        'delta' : perform predictions on energies of target dataset,
+    bias_adjust_mode : str
+        The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+        'change-by-statistic' : perform predictions on energies of target dataset,
                 and do least sqaure on the errors to obtain the target shift as bias.
-        'statistic' : directly use the statistic energy bias in the target dataset.
+        'set-by-statistic' : directly use the statistic energy bias in the target dataset.
     ntest : int
         The number of test samples in a system to change the energy bias.
     """
@@ -88,7 +88,7 @@ def change_energy_bias_lower(
                     (numb_test, 1),
                 )
             )
-        if bias_shift == "delta":
+        if bias_adjust_mode == "change-by-statistic":
             coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
             if sys.pbc:
                 box = test_data["box"][:numb_test]
@@ -114,7 +114,7 @@ def change_energy_bias_lower(
     type_numbs = np.concatenate(type_numbs)
     energy_ground_truth = np.concatenate(energy_ground_truth)
     old_bias = bias_atom_e[idx_type_map]
-    if bias_shift == "delta":
+    if bias_adjust_mode == "change-by-statistic":
         energy_predict = np.concatenate(energy_predict)
         bias_diff = energy_ground_truth - energy_predict
         delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
@@ -129,11 +129,11 @@ def change_energy_bias_lower(
         log.info(
             f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
         )
-    elif bias_shift == "statistic":
+    elif bias_adjust_mode == "set-by-statistic":
         statistic_bias = np.linalg.lstsq(type_numbs, energy_ground_truth, rcond=None)[0]
         bias_atom_e[idx_type_map] = statistic_bias.reshape(-1)
     else:
-        raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+        raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
     log.info(
         f"Change energy bias of {origin_type_map!s} from {old_bias!s} to {bias_atom_e[idx_type_map]!s}."
     )
diff --git a/source/tests/pt/test_finetune.py b/source/tests/pt/test_finetune.py
index 78b6286f94..79f8c57cb8 100644
--- a/source/tests/pt/test_finetune.py
+++ b/source/tests/pt/test_finetune.py
@@ -58,7 +58,7 @@ def test_finetune_change_out_bias(self):
         # change energy bias
         model.atomic_model.change_out_bias(
             self.sampled,
-            bias_shift="delta",
+            bias_adjust_mode="change-by-statistic",
             origin_type_map=origin_type_map,
             full_type_map=full_type_map,
         )

From 19b9e79942c040609e5a95aadc6788ea59fe809d Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 22 Mar 2024 00:44:53 +0800
Subject: [PATCH 14/14] Update training.py

---
 deepmd/pt/train/training.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 7d5fbfac01..aa1ec1c206 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -566,9 +566,11 @@ def single_model_finetune(
                         _model_params["new_type_map"],
                     )
                     if isinstance(_model, EnergyModel):
-                        _model.atomic_model.change_out_bias(
+                        _model.change_out_bias(
                             _sample_func,
-                            bias_shift=_model_params.get("bias_shift", "delta"),
+                            bias_adjust_mode=_model_params.get(
+                                "bias_adjust_mode", "change-by-statistic"
+                            ),
                             origin_type_map=new_type_map,
                             full_type_map=old_type_map,
                         )