Skip to content

Commit

Permalink
merge upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
LeiWang1999 committed Dec 19, 2024
1 parent f0a1ec3 commit 7d5dd06
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
14 changes: 7 additions & 7 deletions vllm/model_executor/layers/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
logger = init_logger(__name__)

WEIGHT_LOADER_V2_SUPPORTED = [
"CompressedTensorsLinearMethod", "AWQMarlinLinearMethod", "GPTQBitBLASLinearMethod",
"AWQLinearMethod", "GPTQMarlinLinearMethod", "Fp8LinearMethod",
"MarlinLinearMethod", "QQQLinearMethod", "GPTQMarlin24LinearMethod",
"TPUInt8LinearMethod", "GPTQLinearMethod", "FBGEMMFp8LinearMethod",
"ModelOptFp8LinearMethod", "IPEXAWQLinearMethod", "IPEXGPTQLinearMethod",
"HQQMarlinMethod"
"CompressedTensorsLinearMethod", "AWQMarlinLinearMethod",
"GPTQBitBLASLinearMethod", "AWQLinearMethod", "GPTQMarlinLinearMethod",
"Fp8LinearMethod", "MarlinLinearMethod", "QQQLinearMethod",
"GPTQMarlin24LinearMethod", "TPUInt8LinearMethod", "GPTQLinearMethod",
"FBGEMMFp8LinearMethod", "ModelOptFp8LinearMethod", "IPEXAWQLinearMethod",
"IPEXGPTQLinearMethod", "HQQMarlinMethod"
]


Expand Down Expand Up @@ -514,7 +514,7 @@ def weight_loader(self,
# Special case for Marlin.
shard_size, shard_offset = adjust_marlin_shard(
param, shard_size, shard_offset)

shard_size, shard_offset = adjust_bitblas_shard(
param, shard_size, shard_offset)

Expand Down
4 changes: 4 additions & 0 deletions vllm/model_executor/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
from .aqlm import AQLMConfig
from .awq import AWQConfig
from .awq_marlin import AWQMarlinConfig
from .bitblas import BitBLASConfig
from .bitsandbytes import BitsAndBytesConfig
from .compressed_tensors.compressed_tensors import ( # noqa: E501
CompressedTensorsConfig)
Expand All @@ -48,6 +49,7 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
from .fp8 import Fp8Config
from .gguf import GGUFConfig
from .gptq import GPTQConfig
from .gptq_bitblas import GPTQBitBLASConfig
from .gptq_marlin import GPTQMarlinConfig
from .gptq_marlin_24 import GPTQMarlin24Config
from .hqq_marlin import HQQMarlinConfig
Expand All @@ -69,9 +71,11 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
# The order of gptq methods is important for config.py iteration over
# override_quantization_method(..)
"marlin": MarlinConfig,
"bitblas": BitBLASConfig,
"gguf": GGUFConfig,
"gptq_marlin_24": GPTQMarlin24Config,
"gptq_marlin": GPTQMarlinConfig,
"gptq_bitblas": GPTQBitBLASConfig,
"awq_marlin": AWQMarlinConfig,
"gptq": GPTQConfig,
"compressed-tensors": CompressedTensorsConfig,
Expand Down

0 comments on commit 7d5dd06

Please sign in to comment.