Fix minor issues

vllm-project · Dec 17, 2024 · c459bbc · c459bbc
1 parent 0d38f0a
commit c459bbc
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 9 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -229,7 +229,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
         # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
         # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
         # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
-        # GIT_SHALLOW FALSE
+        GIT_SHALLOW FALSE
     )
   endif()
   FetchContent_MakeAvailable(cutlass)

diff --git a/csrc/cutlass_extensions/common.hpp b/csrc/cutlass_extensions/common.hpp
@@ -8,10 +8,11 @@
 /**
  * Helper function for checking CUTLASS errors
  */
-#define CUTLASS_CHECK(status)                        \
-  {                                                  \
-    TORCH_CHECK(status == cutlass::Status::kSuccess, \
-                cutlassGetStatusString(status));     \
+#define CUTLASS_CHECK(status)                       \
+  {                                                 \
+    cutlass::Status error = status;                 \
+    TORCH_CHECK(error == cutlass::Status::kSuccess, \
+                cutlassGetStatusString(error));     \
   }
 
 /**

diff --git a/csrc/sparse/cutlass/sparse_compressor_c3x.cu b/csrc/sparse/cutlass/sparse_compressor_c3x.cu
@@ -147,7 +147,7 @@ bool cutlass_sparse_compress(torch::Tensor& a_nzs, torch::Tensor& a_meta,
 }
 
 bool cutlass_sparse_compress_sm90(torch::Tensor& a_nzs, torch::Tensor& a_meta,
-                                   torch::Tensor const& a) {
+                                  torch::Tensor const& a) {
   if (a.dtype() == torch::kBFloat16) {
     return cutlass_sparse_compress<cutlass::bfloat16_t, float>(a_nzs, a_meta,
                                                                a);

diff --git a/csrc/sparse/cutlass/sparse_compressor_entry.cu b/csrc/sparse/cutlass/sparse_compressor_entry.cu
@@ -17,12 +17,12 @@ bool cutlass_sparse_compress_entry(torch::Tensor& a_nzs, torch::Tensor& a_meta,
   TORCH_CHECK(a.size(0) == a_nzs.size(0) && a.size(0) == a_meta.size(0) &&
               a_nzs.size(1) * 2 == a.size(1) &&
               a_meta.size(1) * 2 * 4 == a.size(1));
-              // Considering elemsPerMetaElem = 8b / 2b_per_nz = 4
+  // Considering elemsPerMetaElem = 8b / 2b_per_nz = 4
 
   // Check for strides and alignment
   TORCH_CHECK(a.stride(1) == 1 && a_nzs.stride(1) == 1 &&
-              a_meta.stride(1) == 1);       // Row-major
-  TORCH_CHECK(a.stride(0) % 8 == 0);       // 8 Byte Alignment for Compression
+              a_meta.stride(1) == 1);  // Row-major
+  TORCH_CHECK(a.stride(0) % 8 == 0);   // 8 Byte Alignment for Compression
 
   at::cuda::OptionalCUDAGuard const device_guard(device_of(a));
   int32_t version_num = get_sm_version_num();

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
@@ -31,6 +31,7 @@ def __init__(self,
 
     @classmethod
     def get_min_capability(cls) -> int:
+        # Only cutlass 3.x kernels are implemented so far
         return 90
 
     def create_weights(self, layer: torch.nn.Module, input_size: int,