Skip to content

Commit

Permalink
disable triton FA by default
Browse files Browse the repository at this point in the history
  • Loading branch information
hliuca committed Dec 12, 2024
1 parent 22f9066 commit 1f947b5
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
VLLM_ROCM_PREFER_TORCH: bool = False
VLLM_ROCM_PREFER_TRITON: bool = True
VLLM_USE_SDPA_ATTENTION: bool = False
VLLM_USE_TRITON_FLASH_ATTN: bool = True
VLLM_USE_TRITON_FLASH_ATTN: bool = False
VLLM_USE_ROCM_SKINNY_GEMM: bool = True
VLLM_USE_ROCM_CUSTOM_PAGED_ATTN: bool = True
VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT: bool = True
Expand Down Expand Up @@ -227,7 +227,7 @@ def get_default_config_root():

# flag to control if vllm should use triton flash attention
"VLLM_USE_TRITON_FLASH_ATTN":
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "False").lower() in
("true", "1")),

# Internal flag to enable Dynamo fullgraph capture
Expand Down

0 comments on commit 1f947b5

Please sign in to comment.