diff --git a/gptqmodel/integration/src/optimum/gptq/quantizer.py b/gptqmodel/integration/src/optimum/gptq/quantizer.py index f87d99d7d..4706b38f3 100644 --- a/gptqmodel/integration/src/optimum/gptq/quantizer.py +++ b/gptqmodel/integration/src/optimum/gptq/quantizer.py @@ -625,7 +625,7 @@ def tmp(_, input, output): h.remove() for name in subset_name_list: logger.info(f"Quantizing {name} in block {i + 1}/{len(blocks)}...") - quant_outputs = gptq[name].hf_quantize( + quant_outputs = gptq[name].fasterquant( percdamp=self.damp_percent, group_size=self.group_size, actorder=self.desc_act ) scale, zero, g_idx = quant_outputs[0], quant_outputs[1], quant_outputs[2] diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index c04b445a2..40455bb88 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -70,14 +70,28 @@ def add_batch(self, inp, out): # self.H += 2 / self.nsamples * inp.matmul(inp.t()) self.H += inp.matmul(inp.t()) + # wrapper for backward compat with optimum + # TODO: mark for deprecation + def fasterquant( + self, + blocksize=128, + percdamp=0.01, + damp_auto_increment=0.0015, + group_size=-1, + actorder=False, + static_groups=False, + ): + return self.hf_quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups) + + # public api exposed to hf def hf_quantize( - self, - blocksize=128, - percdamp=0.01, - damp_auto_increment=0.0015, - group_size=-1, - actorder=False, - static_groups=False, + self, + blocksize=128, + percdamp=0.01, + damp_auto_increment=0.0015, + group_size=-1, + actorder=False, + static_groups=False, ): return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)