Skip to content

Commit

Permalink
Fix minor issues
Browse files Browse the repository at this point in the history
  • Loading branch information
Faraz9877 committed Dec 17, 2024
1 parent 0d38f0a commit c459bbc
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
# GIT_SHALLOW FALSE
GIT_SHALLOW FALSE
)
endif()
FetchContent_MakeAvailable(cutlass)
Expand Down
9 changes: 5 additions & 4 deletions csrc/cutlass_extensions/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
/**
* Helper function for checking CUTLASS errors
*/
#define CUTLASS_CHECK(status) \
{ \
TORCH_CHECK(status == cutlass::Status::kSuccess, \
cutlassGetStatusString(status)); \
#define CUTLASS_CHECK(status) \
{ \
cutlass::Status error = status; \
TORCH_CHECK(error == cutlass::Status::kSuccess, \
cutlassGetStatusString(error)); \
}

/**
Expand Down
2 changes: 1 addition & 1 deletion csrc/sparse/cutlass/sparse_compressor_c3x.cu
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ bool cutlass_sparse_compress(torch::Tensor& a_nzs, torch::Tensor& a_meta,
}

bool cutlass_sparse_compress_sm90(torch::Tensor& a_nzs, torch::Tensor& a_meta,
torch::Tensor const& a) {
torch::Tensor const& a) {
if (a.dtype() == torch::kBFloat16) {
return cutlass_sparse_compress<cutlass::bfloat16_t, float>(a_nzs, a_meta,
a);
Expand Down
6 changes: 3 additions & 3 deletions csrc/sparse/cutlass/sparse_compressor_entry.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ bool cutlass_sparse_compress_entry(torch::Tensor& a_nzs, torch::Tensor& a_meta,
TORCH_CHECK(a.size(0) == a_nzs.size(0) && a.size(0) == a_meta.size(0) &&
a_nzs.size(1) * 2 == a.size(1) &&
a_meta.size(1) * 2 * 4 == a.size(1));
// Considering elemsPerMetaElem = 8b / 2b_per_nz = 4
// Considering elemsPerMetaElem = 8b / 2b_per_nz = 4

// Check for strides and alignment
TORCH_CHECK(a.stride(1) == 1 && a_nzs.stride(1) == 1 &&
a_meta.stride(1) == 1); // Row-major
TORCH_CHECK(a.stride(0) % 8 == 0); // 8 Byte Alignment for Compression
a_meta.stride(1) == 1); // Row-major
TORCH_CHECK(a.stride(0) % 8 == 0); // 8 Byte Alignment for Compression

at::cuda::OptionalCUDAGuard const device_guard(device_of(a));
int32_t version_num = get_sm_version_num();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self,

@classmethod
def get_min_capability(cls) -> int:
# Only cutlass 3.x kernels are implemented so far
return 90

def create_weights(self, layer: torch.nn.Module, input_size: int,
Expand Down

0 comments on commit c459bbc

Please sign in to comment.