Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
gshtras committed Dec 19, 2024
1 parent a283f40 commit 7908e9b
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
6 changes: 4 additions & 2 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,13 @@ def get_default_config_root():
"Q_SCALE_CONSTANT":
lambda: int(os.getenv("Q_SCALE_CONSTANT", "20")),

# Divisor for dynamic key scale factor calculation for FP8 KV Cache and attention
# Divisor for dynamic key scale factor calculation
# for FP8 KV Cache and attention
"K_SCALE_CONSTANT":
lambda: int(os.getenv("K_SCALE_CONSTANT", "20")),

# Divisor for dynamic value scale factor calculation for FP8 KV Cache and attention
# Divisor for dynamic value scale factor calculation
# for FP8 KV Cache and attention
"V_SCALE_CONSTANT":
lambda: int(os.getenv("V_SCALE_CONSTANT", "10")),

Expand Down
5 changes: 2 additions & 3 deletions vllm/model_executor/layers/quantization/kv_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,9 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
v_scale *= 2
layer.calculate_kv_scales = False

if not isinstance(k_scale, float) or not isinstance(
v_scale, float):
if not isinstance(k_scale, float) or not isinstance(v_scale, float):
raise ValueError("Only support per-tensor scaling factor "
"for fp8 KV cache")
"for fp8 KV cache")

# These are used in the final Attention.forward()
layer._k_scale.copy_(k_scale)
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def __init__(
self.attn_fp8 = envs.VLLM_USE_ROCM_FP8_ATTN \
and current_platform.is_rocm() \
and not is_navi() \
and isinstance(quant_config, Fp8Config)
and isinstance(quant_config, Fp8Config)

self.attn = Attention(
self.num_heads,
Expand Down

0 comments on commit 7908e9b

Please sign in to comment.