[Fix] add the call to get_physical_block_ids

Signed-off-by: ApostaC <[email protected]>
vllm-project · Dec 8, 2024 · daab0d6 · daab0d6
1 parent 1c94985
commit daab0d6
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 4 deletions.
diff --git a/vllm/core/block/cpu_gpu_block_allocator.py b/vllm/core/block/cpu_gpu_block_allocator.py
@@ -334,7 +334,8 @@ def get_and_reset_swaps(self,
         Returns:
             A tuple of two lists: (blocks_to_swap_out, blocks_to_swap_in).
             Each list is a List[Tuple[int, int]], containing the mapping of 
-            source to destination block IDs.
+            source to destination block IDs. The block IDs are physical block
+            IDs and it's expected to be used by the cache engine directly.
         """
         self._swap_mapping.clear()
         # return an empty list, to keep compatibility with previous behavior

diff --git a/vllm/core/block/cpu_offloading_block_allocator.py b/vllm/core/block/cpu_offloading_block_allocator.py
@@ -266,7 +266,8 @@ def get_and_reset_swaps(self,
         Returns:
             A tuple of two lists: (blocks_to_swap_out, blocks_to_swap_in).
             Each list is a List[Tuple[int, int]], containing the mapping of 
-            source to destination block IDs.
+            source to destination block IDs. The block IDs are physical block
+            IDs and it's expected to be used by the cache engine directly.
         """
 
         allocator = self._allocators[Device.GPU]
@@ -328,9 +329,13 @@ def get_and_reset_swaps(self,
             # only two possible cases: CPU -> GPU, or GPU -> CPU
             if src in self._allocators[Device.GPU].all_block_ids:
                 # swap out
+                src = self._allocators[Device.GPU].get_physical_block_id(src)
+                dst = self._allocators[Device.CPU].get_physical_block_id(dst)
                 blocks_to_swap_out.append((src, dst))
             else:
                 # swap in
+                src = self._allocators[Device.CPU].get_physical_block_id(src)
+                dst = self._allocators[Device.GPU].get_physical_block_id(dst)
                 blocks_to_swap_in.append((src, dst))
         self._swap_mapping.clear()
         return blocks_to_swap_out, blocks_to_swap_in

diff --git a/vllm/core/block/interfaces.py b/vllm/core/block/interfaces.py
@@ -304,6 +304,7 @@ def get_and_reset_swaps(self,
         Returns:
             A tuple of two lists: (blocks_to_swap_out, blocks_to_swap_in).
             Each list is a List[Tuple[int, int]], containing the mapping of 
-            source to destination block IDs.
+            source to destination block IDs. The block IDs are physical block
+            IDs and it's expected to be used by the cache engine directly.
         """
         pass
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
@@ -536,6 +536,7 @@ def get_and_reset_swaps(self,
         Returns:
             A tuple of two lists: (blocks_to_swap_out, blocks_to_swap_in).
             Each list is a List[Tuple[int, int]], containing the mapping of 
-            source to destination block IDs.
+            source to destination block IDs. The block IDs are physical block
+            IDs and it's expected to be used by the cache engine directly.
         """
         return self.block_allocator.get_and_reset_swaps(now)