From 76aab90ab68476c353ad58019fd51fd18622056a Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Sun, 17 Nov 2024 16:44:44 +0800 Subject: [PATCH] [Hardware] [HPU]add `mark_step` for hpu (#10239) Signed-off-by: Kunshang Ji --- vllm/worker/hpu_model_runner.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 1ff30d685c6b1..99cf9a7e67256 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -272,6 +272,19 @@ def precompute_indices_and_offsets(block_size, slot_mapping, is_prompt): return indices, offsets +def modify_decoder_layer(module: torch.nn.Module, suffix="DecoderLayer"): + if module.__class__.__name__.endswith(suffix): + + def forward_hook(module, args, output): + htorch.core.mark_step() + return output + + module.register_forward_hook(forward_hook) + + for child_name, child_module in module.named_children(): + modify_decoder_layer(child_module) + + class HpuModelAdapter: def __init__(self, model, block_size, dtype, enforce_eager): @@ -636,6 +649,7 @@ def load_model(self) -> None: else: self.model = self.model.to("hpu") htcore.mark_step() + modify_decoder_layer(self.model) torch.hpu.synchronize() with HabanaMemoryProfiler() as m_wrap: