Skip to content

Commit

Permalink
Add comment
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin committed Jun 13, 2024
1 parent b015904 commit 211c4fc
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions auto_fp8/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
# Deal with empty tensors (triggeted by empty MoE experts)
return torch.empty(size=(0, B.shape[0]), dtype=out_dtype, device=A.device)

native_fp8_support = (
torch.cuda.is_available()
and torch.cuda.get_device_capability() >= (8, 9)
and False
)
# TODO: Disable native fp8 gemm for now, always just dequantize
# native_fp8_support = (
# torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 9)
# )
native_fp8_support = False
if native_fp8_support:
need_reshape = A.dim() == 3
if need_reshape:
Expand Down

0 comments on commit 211c4fc

Please sign in to comment.