Skip to content

Commit

Permalink
[Bugfix][Hardware][CPU] Fix broken encoder-decoder CPU runner (#10218)
Browse files Browse the repository at this point in the history
Signed-off-by: Isotr0py <[email protected]>
  • Loading branch information
Isotr0py authored Nov 11, 2024
1 parent 5fb1f93 commit 2cebda4
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .buildkite/run-cpu-test-ppc64le.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ source /etc/environment
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN="$HF_TOKEN" --name cpu-test cpu-test

function cpu_tests() {
set -e

# Run basic model test
docker exec cpu-test bash -c "
set -e
Expand Down
2 changes: 2 additions & 0 deletions .buildkite/run-cpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
--cpuset-mems=1 --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2 cpu-test-avx2

function cpu_tests() {
set -e

# offline inference
docker exec cpu-test-avx2 bash -c "
set -e
Expand Down
1 change: 1 addition & 0 deletions vllm/worker/cpu_embedding_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def prepare_model_input(
model_input.seq_lens)

return dataclasses.replace(model_input,
virtual_engine=virtual_engine,
pooling_metadata=pooling_metadata)

def _prepare_pooling(
Expand Down
11 changes: 11 additions & 0 deletions vllm/worker/cpu_enc_dec_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch

from vllm.attention import AttentionMetadata
from vllm.model_executor import SamplingMetadata
from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.multimodal import MultiModalKwargs
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
Expand Down Expand Up @@ -96,11 +97,21 @@ def prepare_model_input(
encoder_input_positions_tensor,
) = self._prepare_encoder_model_input_tensors(seq_group_metadata_list,
model_input)
# Sampling metadata is only required for the final pp group
generators = self.get_generators(finished_requests_ids)
sampling_metadata = SamplingMetadata.prepare(seq_group_metadata_list,
model_input.seq_lens,
model_input.query_lens,
self.device,
pin_memory=False,
generators=generators)
return dataclasses.replace(
model_input,
sampling_metadata=sampling_metadata,
attn_metadata=attn_metadata,
encoder_input_tokens=encoder_input_tokens_tensor,
encoder_input_positions=encoder_input_positions_tensor,
virtual_engine=virtual_engine,
)

def _prepare_encoder_model_input_tensors(
Expand Down

0 comments on commit 2cebda4

Please sign in to comment.