Skip to content

Commit

Permalink
[OpenVINO] Fix regression from #8346
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Salas <[email protected]>
  • Loading branch information
petersalas committed Nov 5, 2024
1 parent 2094062 commit 52247f4
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .buildkite/run-openvino-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ trap remove_docker_container EXIT
remove_docker_container

# Run the image and launch offline inference
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/vllm/examples/offline_inference.py
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py
12 changes: 11 additions & 1 deletion vllm/attention/backends/openvino.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from dataclasses import dataclass
from typing import List, Tuple, Type
from typing import Dict, List, Optional, Tuple, Type

import openvino as ov
import torch

from vllm.attention.backends.abstract import (AttentionBackend,
AttentionMetadata)
from vllm.attention.backends.utils import CommonAttentionState
from vllm.multimodal import MultiModalPlaceholderMap


def copy_cache_block(src_tensor: ov.Tensor, dst_tensor: ov.Tensor,
Expand Down Expand Up @@ -128,3 +129,12 @@ class OpenVINOAttentionMetadata:
# Shape: scalar
# Type: i32
max_context_len: torch.Tensor

# The index maps that relate multi-modal embeddings to the corresponding
# placeholders.
#
# N.B. These aren't really related to attention and don't belong on this
# type -- this is just a temporary solution to make them available to
# `model_executable`.
multi_modal_placeholder_index_maps: Optional[Dict[
str, MultiModalPlaceholderMap.IndexMap]]

0 comments on commit 52247f4

Please sign in to comment.