Skip to content

Commit

Permalink
[Bugfix] Only require XGrammar on x86 (#10865)
Browse files Browse the repository at this point in the history
Signed-off-by: mgoin <[email protected]>
  • Loading branch information
mgoin authored Dec 3, 2024
1 parent 2f2cdc7 commit 7090c27
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 3 deletions.
2 changes: 1 addition & 1 deletion requirements-common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer >= 0.10.9, < 0.11
outlines >= 0.0.43, < 0.1
xgrammar
xgrammar >= 0.1.5; platform_machine == "x86_64"
typing_extensions >= 4.10
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
partial-json-parser # used for parsing partial JSON outputs
Expand Down
7 changes: 7 additions & 0 deletions vllm/model_executor/guided_decoding/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING

from vllm.logger import init_logger
from vllm.platforms import CpuArchEnum, current_platform

if TYPE_CHECKING:
from transformers import PreTrainedTokenizer
Expand All @@ -25,6 +26,12 @@ def maybe_backend_fallback(
guided_params.backend = "xgrammar"

if guided_params.backend == "xgrammar":
# xgrammar only has x86 wheels for linux, fallback to outlines
if current_platform.get_cpu_architecture() is not CpuArchEnum.X86:
logger.warning("xgrammar is only supported on x86 CPUs. "
"Falling back to use outlines instead.")
guided_params.backend = "outlines"

# xgrammar doesn't support regex or choice, fallback to outlines
if guided_params.regex is not None or guided_params.choice is not None:
logger.warning(
Expand Down
4 changes: 2 additions & 2 deletions vllm/platforms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .interface import _Backend # noqa: F401
from .interface import Platform, PlatformEnum, UnspecifiedPlatform
from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform

current_platform: Platform

Expand Down Expand Up @@ -120,4 +120,4 @@ def cuda_is_jetson() -> bool:
else:
current_platform = UnspecifiedPlatform()

__all__ = ['Platform', 'PlatformEnum', 'current_platform']
__all__ = ['Platform', 'PlatformEnum', 'current_platform', 'CpuArchEnum']
26 changes: 26 additions & 0 deletions vllm/platforms/interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import enum
import platform
import random
from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple, Union

Expand Down Expand Up @@ -37,6 +38,14 @@ class PlatformEnum(enum.Enum):
UNSPECIFIED = enum.auto()


class CpuArchEnum(enum.Enum):
X86 = enum.auto()
ARM = enum.auto()
POWERPC = enum.auto()
OTHER = enum.auto()
UNKNOWN = enum.auto()


class DeviceCapability(NamedTuple):
major: int
minor: int
Expand Down Expand Up @@ -184,6 +193,23 @@ def verify_quantization(cls, quant: str) -> None:
f"{quant} quantization is currently not supported in "
f"{cls.device_name}.")

@classmethod
def get_cpu_architecture(cls) -> CpuArchEnum:
"""
Determine the CPU architecture of the current system.
Returns CpuArchEnum indicating the architecture type.
"""
machine = platform.machine().lower()

if machine in ("x86_64", "amd64", "i386", "i686"):
return CpuArchEnum.X86
elif machine.startswith("arm") or machine.startswith("aarch"):
return CpuArchEnum.ARM
elif machine.startswith("ppc"):
return CpuArchEnum.POWERPC

return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN


class UnspecifiedPlatform(Platform):
_enum = PlatformEnum.UNSPECIFIED
Expand Down

0 comments on commit 7090c27

Please sign in to comment.