Fix --load-8bit for Intel ARC GPUs (lm-sys#1697)

qlm2004 · Jun 15, 2023 · 0c65af1 · 0c65af1
1 parent 520fca7
commit 0c65af1
Showing 1 changed file with 7 additions and 6 deletions.
diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -146,6 +146,13 @@ def load_model(
         replace_llama_attn_with_non_inplace_operations()
     elif device == "xpu":
         kwargs = {"torch_dtype": torch.bfloat16}
+        # Try to load ipex, while it looks unused, it links into torch for xpu support
+        try:
+            import intel_extension_for_pytorch as ipex
+        except ImportError:
+            warnings.warn(
+                "Intel Extension for PyTorch is not installed, but is required for xpu inference."
+            )
     else:
         raise ValueError(f"Invalid device: {device}")
 
@@ -185,12 +192,6 @@ def load_model(
         model.to(device)
 
     elif device == "xpu":
-        try:
-            import intel_extension_for_pytorch as ipex
-        except ImportError:
-            warnings.warn(
-                "Intel Extension for PyTorch is not installed, but is required for xpu inference."
-            )
         model.eval()
         model = model.to("xpu")
         model = torch.xpu.optimize(model, dtype=torch.bfloat16, inplace=True)