diff --git a/vllm/model_executor/layers/quantization/kernels/machete.py b/vllm/model_executor/layers/quantization/kernels/machete.py index 3aac938aad0e5..15df0200f30b5 100644 --- a/vllm/model_executor/layers/quantization/kernels/machete.py +++ b/vllm/model_executor/layers/quantization/kernels/machete.py @@ -108,11 +108,11 @@ def apply_weights(self, x_2d = self.act_perm(x_2d) output = ops.machete_mm(a=x_2d, - b_q=w_q, - b_type=c.weight_type, - b_group_zeros=None, - b_group_scales=w_s, - b_group_size=c.group_size) + b_q=w_q, + b_type=c.weight_type, + b_group_zeros=None, + b_group_scales=w_s, + b_group_size=c.group_size) if bias is not None: output.add_(bias) # In-place add