diff --git a/requirements-common.txt b/requirements-common.txt
index 818f72e14be96..72fb020a82c4e 100644
--- a/requirements-common.txt
+++ b/requirements-common.txt
@@ -19,7 +19,7 @@ prometheus-fastapi-instrumentator >= 7.0.0
 tiktoken >= 0.6.0  # Required for DBRX tokenizer
 lm-format-enforcer >= 0.10.9, < 0.11
 outlines >= 0.0.43, < 0.1
-xgrammar
+xgrammar >= 0.1.5; platform_machine == "x86_64"
 typing_extensions >= 4.10
 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
 partial-json-parser # used for parsing partial JSON outputs
diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py
index 23c31fcfd7f05..3340bad38ab73 100644
--- a/vllm/model_executor/guided_decoding/__init__.py
+++ b/vllm/model_executor/guided_decoding/__init__.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 
 from vllm.logger import init_logger
+from vllm.platforms import CpuArchEnum, current_platform
 
 if TYPE_CHECKING:
     from transformers import PreTrainedTokenizer
@@ -25,6 +26,12 @@ def maybe_backend_fallback(
         guided_params.backend = "xgrammar"
 
     if guided_params.backend == "xgrammar":
+        # xgrammar only has x86 wheels for linux, fallback to outlines
+        if current_platform.get_cpu_architecture() is not CpuArchEnum.X86:
+            logger.warning("xgrammar is only supported on x86 CPUs. "
+                           "Falling back to use outlines instead.")
+            guided_params.backend = "outlines"
+
         # xgrammar doesn't support regex or choice, fallback to outlines
         if guided_params.regex is not None or guided_params.choice is not None:
             logger.warning(
diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
index 7cb8ac4b0a1e0..419237c252ffd 100644
--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
@@ -1,5 +1,5 @@
 from .interface import _Backend  # noqa: F401
-from .interface import Platform, PlatformEnum, UnspecifiedPlatform
+from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform
 
 current_platform: Platform
 
@@ -120,4 +120,4 @@ def cuda_is_jetson() -> bool:
 else:
     current_platform = UnspecifiedPlatform()
 
-__all__ = ['Platform', 'PlatformEnum', 'current_platform']
+__all__ = ['Platform', 'PlatformEnum', 'current_platform', 'CpuArchEnum']
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index eac2b413f9271..0be7df7941b8b 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -1,4 +1,5 @@
 import enum
+import platform
 import random
 from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple, Union
 
@@ -37,6 +38,14 @@ class PlatformEnum(enum.Enum):
     UNSPECIFIED = enum.auto()
 
 
+class CpuArchEnum(enum.Enum):
+    X86 = enum.auto()
+    ARM = enum.auto()
+    POWERPC = enum.auto()
+    OTHER = enum.auto()
+    UNKNOWN = enum.auto()
+
+
 class DeviceCapability(NamedTuple):
     major: int
     minor: int
@@ -184,6 +193,23 @@ def verify_quantization(cls, quant: str) -> None:
                 f"{quant} quantization is currently not supported in "
                 f"{cls.device_name}.")
 
+    @classmethod
+    def get_cpu_architecture(cls) -> CpuArchEnum:
+        """
+        Determine the CPU architecture of the current system.
+        Returns CpuArchEnum indicating the architecture type.
+        """
+        machine = platform.machine().lower()
+
+        if machine in ("x86_64", "amd64", "i386", "i686"):
+            return CpuArchEnum.X86
+        elif machine.startswith("arm") or machine.startswith("aarch"):
+            return CpuArchEnum.ARM
+        elif machine.startswith("ppc"):
+            return CpuArchEnum.POWERPC
+
+        return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN
+
 
 class UnspecifiedPlatform(Platform):
     _enum = PlatformEnum.UNSPECIFIED