From 52247f431ebeda18447e92cfc8cf1cf9ef95ff85 Mon Sep 17 00:00:00 2001 From: Peter Salas Date: Tue, 5 Nov 2024 16:43:27 +0000 Subject: [PATCH] [OpenVINO] Fix regression from #8346 Signed-off-by: Peter Salas --- .buildkite/run-openvino-test.sh | 2 +- vllm/attention/backends/openvino.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh index 70e56596c4a86..35ad5c0ddde77 100755 --- a/.buildkite/run-openvino-test.sh +++ b/.buildkite/run-openvino-test.sh @@ -11,4 +11,4 @@ trap remove_docker_container EXIT remove_docker_container # Run the image and launch offline inference -docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/vllm/examples/offline_inference.py +docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py diff --git a/vllm/attention/backends/openvino.py b/vllm/attention/backends/openvino.py index 6fddfc2002120..be06d16009988 100644 --- a/vllm/attention/backends/openvino.py +++ b/vllm/attention/backends/openvino.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import List, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type import openvino as ov import torch @@ -7,6 +7,7 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionMetadata) from vllm.attention.backends.utils import CommonAttentionState +from vllm.multimodal import MultiModalPlaceholderMap def copy_cache_block(src_tensor: ov.Tensor, dst_tensor: ov.Tensor, @@ -128,3 +129,12 @@ class OpenVINOAttentionMetadata: # Shape: scalar # Type: i32 max_context_len: torch.Tensor + + # The index maps that relate multi-modal embeddings to the corresponding + # placeholders. + # + # N.B. These aren't really related to attention and don't belong on this + # type -- this is just a temporary solution to make them available to + # `model_executable`. + multi_modal_placeholder_index_maps: Optional[Dict[ + str, MultiModalPlaceholderMap.IndexMap]]