diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py index cdf1c52b92af8..b4f4e5bb1500a 100644 --- a/vllm/attention/backends/rocm_flash_attn.py +++ b/vllm/attention/backends/rocm_flash_attn.py @@ -462,7 +462,7 @@ def __init__( if logits_soft_cap is None: # In flash-attn, setting logits_soft_cap as 0 means no soft cap. - self.logits_soft_cap = 0 + self.logits_soft_cap = 0.0 else: self.logits_soft_cap = logits_soft_cap