diff --git a/examples/offline_inference.py b/examples/offline_inference.py index 97447f703e667..c67967baed67d 100644 --- a/examples/offline_inference.py +++ b/examples/offline_inference.py @@ -1,5 +1,6 @@ from vllm import LLM, SamplingParams + def print_outputs(llm, outputs): for output in outputs: prompt = output.prompt diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index 481d0fcaf22c7..c1a0895b6f9bc 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -16,8 +16,8 @@ from vllm.sequence import (Sequence, SequenceData, SequenceGroup, SequenceGroupMetadata, SequenceGroupMetadataDelta, SequenceStatus) +from vllm.store.kv_store import BlockMappingFromCPU, KVBlockStoreManager from vllm.utils import Device, PyObjectCache -from vllm.store.kv_store import KVBlockStoreManager,BlockMappingFromCPU logger = init_logger(__name__) diff --git a/vllm/sequence.py b/vllm/sequence.py index 2d06b5ddbc51a..d0c8d83601ec4 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -6,9 +6,10 @@ from collections import defaultdict from dataclasses import dataclass, field from functools import reduce -from typing import Any, Callable, DefaultDict, Dict, List, Mapping, Optional +from typing import (TYPE_CHECKING, Any, Callable, DefaultDict, Dict, List, + Mapping, Optional) from typing import Sequence as GenericSequence -from typing import TYPE_CHECKING, Set, Tuple, Union +from typing import Set, Tuple, Union import msgspec import torch diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index d8d9a5ec2bf2b..bd85cb81edbc6 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -22,7 +22,7 @@ from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.sequence import (ExecuteModelRequest, IntermediateTensors, SequenceGroupMetadata, SequenceGroupMetadataDelta) -from vllm.store.kv_store import KVBlockStore, KVStoreMeta, BlockMappingFromCPU +from vllm.store.kv_store import BlockMappingFromCPU, KVBlockStore, KVStoreMeta from vllm.worker.cache_engine import CacheEngine from vllm.worker.enc_dec_model_runner import EncoderDecoderModelRunner from vllm.worker.model_runner import GPUModelRunnerBase, ModelRunner