add back fasterquant() for compat (#892)

* add back fasterquant() for compat * Update quantizer.py
ModelCloud · Dec 17, 2024 · 836a344 · 836a344
1 parent b13d23b
commit 836a344
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 8 deletions.
diff --git a/gptqmodel/integration/src/optimum/gptq/quantizer.py b/gptqmodel/integration/src/optimum/gptq/quantizer.py
@@ -625,7 +625,7 @@ def tmp(_, input, output):
                     h.remove()
                 for name in subset_name_list:
                     logger.info(f"Quantizing {name} in block {i + 1}/{len(blocks)}...")
-                    quant_outputs = gptq[name].hf_quantize(
+                    quant_outputs = gptq[name].fasterquant(
                         percdamp=self.damp_percent, group_size=self.group_size, actorder=self.desc_act
                     )
                     scale, zero, g_idx = quant_outputs[0], quant_outputs[1], quant_outputs[2]

diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py
@@ -70,14 +70,28 @@ def add_batch(self, inp, out):
         # self.H += 2 / self.nsamples * inp.matmul(inp.t())
         self.H += inp.matmul(inp.t())
 
+    # wrapper for backward compat with optimum
+    # TODO: mark for deprecation 
+    def fasterquant(
+        self,
+        blocksize=128,
+        percdamp=0.01,
+        damp_auto_increment=0.0015,
+        group_size=-1,
+        actorder=False,
+        static_groups=False,
+    ):
+        return self.hf_quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)
+
+    # public api exposed to hf
     def hf_quantize(
-            self,
-            blocksize=128,
-            percdamp=0.01,
-            damp_auto_increment=0.0015,
-            group_size=-1,
-            actorder=False,
-            static_groups=False,
+        self,
+        blocksize=128,
+        percdamp=0.01,
+        damp_auto_increment=0.0015,
+        group_size=-1,
+        actorder=False,
+        static_groups=False,
     ):
         return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)