From 6331a34de7a9eba01252fd223e4bff0d21a2d0ed Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Fri, 20 Dec 2024 09:59:39 +0800 Subject: [PATCH] Debug Signed-off-by: wangxiyuan --- vllm/attention/layer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 05d997279893b..335accafd4d43 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -9,12 +9,15 @@ from vllm.attention.selector import backend_name_to_enum, get_attn_backend from vllm.config import CacheConfig, get_current_vllm_config from vllm.forward_context import ForwardContext, get_forward_context +from vllm.logger import init_logger from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod from vllm.platforms import _Backend, current_platform from vllm.utils import direct_register_custom_op +logger = init_logger(__name__) + class Attention(nn.Module): """Attention layer. @@ -307,6 +310,9 @@ def unified_attention_with_output_fake( return +logger.info("====================current platform===========: ", current_platform.dispatch_key) + + direct_register_custom_op( op_name="unified_attention_with_output", op_func=unified_attention_with_output,