col-major

intel · sunjiweiswift · Nov 22, 2024 · Dec 3, 2024 · Dec 5, 2024 · Nov 22, 2024
commit a70df0a7257151d40499e5f0c5840c4699f4e3e6
diff --git a/src/ATen/native/xpu/LinearInt4.cpp b/src/ATen/native/xpu/LinearInt4.cpp
@@ -14,7 +14,7 @@ Tensor _weight_int4pack_mm_xpu(
     int64_t qGroupSize,
     const Tensor& qScaleAndZeros) {
   auto M = A.size(0);
-  auto N = B.size(0);
+  auto N = B.size(1);
   auto K = A.size(1);
   TORCH_CHECK(
       A.dtype() == kBFloat16 || A.dtype() == kHalf || A.dtype() == kFloat,

diff --git a/test/xpu/test_linalg_xpu.py b/test/xpu/test_linalg_xpu.py
@@ -272,8 +272,9 @@ def weight_int4pack_mm(a, b_int4pack, b_scales_and_zeros):
         b_scales_and_zeros = b_scales_and_zeros_bf16.to(dtype=dtype)
         ref = torch.mm(a, b)
         res = weight_int4pack_mm(a, b_int4pack, b_scales_and_zeros)
-
         mean_err = ((res - ref).abs() / ref).mean()
+        print(ref)
+        print(res)
         self.assertTrue(mean_err < 0.05)
 
 @dtypes(torch.float, torch.complex64)  # Integer matmul just supported on CPU