[Fix] Fix activation checkpointing bug (InternLM#159)

* fix lora checkpoint bug * rename * fix pre-commit
llkn-2 · Oct 9, 2023 · 772a6d6 · 772a6d6
1 parent 800eef1
commit 772a6d6
Showing 1 changed file with 15 additions and 14 deletions.
diff --git a/xtuner/model/sft.py b/xtuner/model/sft.py
@@ -18,23 +18,14 @@ def __init__(self,
                  llm,
                  lora=None,
                  peft_model=None,
-                 use_gradient_checkpointing=True):
+                 use_activation_checkpointing=True):
         super().__init__()
         with LoadWoInit():
             self.llm = self._build_from_cfg_or_module(llm)
         self.llm.config.use_cache = False
         dispatch_modules(self.llm)
 
-        if isinstance(lora, dict) or isinstance(lora, Config) or isinstance(
-                lora, ConfigDict):
-            self.lora = BUILDER.build(lora)
-        else:
-            self.lora = lora
-        self.peft_model = peft_model
-        self.use_lora = lora is not None
-        if self.use_lora:
-            self._prepare_for_lora(peft_model, use_gradient_checkpointing)
-        elif use_gradient_checkpointing:
+        if use_activation_checkpointing:
             # For backward compatibility
             if hasattr(self.llm, 'enable_input_require_grads'):
                 self.llm.enable_input_require_grads()
@@ -49,13 +40,23 @@ def make_inputs_require_grad(module, input, output):
             # enable gradient checkpointing for memory efficiency
             self.llm.gradient_checkpointing_enable()
 
+        if isinstance(lora, dict) or isinstance(lora, Config) or isinstance(
+                lora, ConfigDict):
+            self.lora = BUILDER.build(lora)
+        else:
+            self.lora = lora
+        self.peft_model = peft_model
+        self.use_lora = lora is not None
+        if self.use_lora:
+            self._prepare_for_lora(peft_model, use_activation_checkpointing)
+
         self._is_init = True
 
     def _prepare_for_lora(self,
                           peft_model=None,
-                          use_gradient_checkpointing=True):
-        self.llm = prepare_model_for_kbit_training(self.llm,
-                                                   use_gradient_checkpointing)
+                          use_activation_checkpointing=True):
+        self.llm = prepare_model_for_kbit_training(
+            self.llm, use_activation_checkpointing)
         if self.lora.target_modules is None:
             modules = find_all_linear_names(self.llm)
             self.lora.target_modules = modules