Skip to content

Commit

Permalink
add back fasterquant() for compat (#892)
Browse files Browse the repository at this point in the history
* add back fasterquant() for compat

* Update quantizer.py
  • Loading branch information
Qubitium authored Dec 17, 2024
1 parent b13d23b commit 836a344
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
2 changes: 1 addition & 1 deletion gptqmodel/integration/src/optimum/gptq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def tmp(_, input, output):
h.remove()
for name in subset_name_list:
logger.info(f"Quantizing {name} in block {i + 1}/{len(blocks)}...")
quant_outputs = gptq[name].hf_quantize(
quant_outputs = gptq[name].fasterquant(
percdamp=self.damp_percent, group_size=self.group_size, actorder=self.desc_act
)
scale, zero, g_idx = quant_outputs[0], quant_outputs[1], quant_outputs[2]
Expand Down
28 changes: 21 additions & 7 deletions gptqmodel/quantization/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,28 @@ def add_batch(self, inp, out):
# self.H += 2 / self.nsamples * inp.matmul(inp.t())
self.H += inp.matmul(inp.t())

# wrapper for backward compat with optimum
# TODO: mark for deprecation
def fasterquant(
self,
blocksize=128,
percdamp=0.01,
damp_auto_increment=0.0015,
group_size=-1,
actorder=False,
static_groups=False,
):
return self.hf_quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)

# public api exposed to hf
def hf_quantize(
self,
blocksize=128,
percdamp=0.01,
damp_auto_increment=0.0015,
group_size=-1,
actorder=False,
static_groups=False,
self,
blocksize=128,
percdamp=0.01,
damp_auto_increment=0.0015,
group_size=-1,
actorder=False,
static_groups=False,
):
return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)

Expand Down

0 comments on commit 836a344

Please sign in to comment.