Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Nov 4, 2024
1 parent 87f5195 commit b8e5fe1
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 22 deletions.
42 changes: 20 additions & 22 deletions tests/v1/core/test_prefix_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,21 @@ def test_prefill():
assert manager.block_pool[block_id].ref_cnt == 2

# At this point, we should have 3 free blocks left.
assert manager.num_free_blocks == 3
assert manager.free_block_queue.num_free_blocks == 3

manager.free(req0)
manager.free(req1)

# All blocks should be available.
assert manager.num_free_blocks == 10
assert manager.free_block_queue.num_free_blocks == 10
# The order should be
# [unallocated (7, 8)]
# [unique_req0 (4, 3)]
# [unique_req1 (6, 5)]
# [common (2, 1, 0)]
assert [b.block_id for b in manager.free_block_queue
] == [7, 8, 9, 4, 3, 6, 5, 2, 1, 0]
assert [
b.block_id for b in manager.free_block_queue.get_all_free_blocks()
] == [7, 8, 9, 4, 3, 6, 5, 2, 1, 0]

# Cache hit in the common prefix when the original block is already free.
# Incomplete 1 block (6 tokens)
Expand All @@ -100,9 +101,12 @@ def test_prefill():

# Although we only have 5 free blocks, we have 8 blocks in
# the free block queue due to lazy removal.
assert manager.num_free_blocks == 5
assert len(list(manager.free_block_queue)) == 8
assert len([b for b in manager.free_block_queue if b.ref_cnt > 0]) == 3
assert manager.free_block_queue.num_free_blocks == 5
assert all([
b.ref_cnt == 0 for b in manager.free_block_queue.get_all_free_blocks()
])
assert len([b
for b in manager.free_block_queue.get_all_free_blocks()]) == 5

manager.free(req2)

Expand All @@ -113,8 +117,9 @@ def test_prefill():
block_ids = manager.allocate_slots(req2, 16 * 9, computed_block_ids)
# This block ID order also checks the eviction order.
assert block_ids == [9, 4, 3, 6, 5, 8, 7, 2, 1, 0]
assert manager.num_free_blocks == 0
assert not manager.free_block_queue
assert manager.free_block_queue.num_free_blocks == 0
assert manager.free_block_queue.free_list_head is None
assert manager.free_block_queue.free_list_tail is None


def test_decode():
Expand Down Expand Up @@ -194,26 +199,19 @@ def test_evict():
assert len(block_ids) == 3 # 3 full blocks
last_token_id += 3 * 16

assert manager.num_free_blocks == 0
assert manager.free_block_queue.num_free_blocks == 0

manager.free(req0)
manager.free(req1)
assert manager.num_free_blocks == 10
assert [b.block_id for b in manager.free_block_queue
] == [6, 5, 4, 3, 2, 1, 0, 9, 8, 7]
assert manager.free_block_queue.num_free_blocks == 10
assert [
b.block_id for b in manager.free_block_queue.get_all_free_blocks()
] == [6, 5, 4, 3, 2, 1, 0, 9, 8, 7]

# Touch the first 2 blocks.
req2 = make_request("2", list(range(2 * 16 + 3)))
computed_block_ids = manager.get_computed_blocks(req2)
assert computed_block_ids == [0, 1]
block_ids = manager.allocate_slots(req2, 3, computed_block_ids)
assert block_ids == [6, 5]
# Number of free blocks should be updated immediately
assert manager.num_free_blocks == 6
# The touched blocks haven't been removed from the free block queue.
assert len(manager.free_block_queue) == 8

# The touched blocks should be removed from the free block queue.
manager.async_remove_touched_blocks()
manager.wait_for_removing_touched_blocks()
assert len(manager.free_block_queue) == 6
assert manager.free_block_queue.num_free_blocks == 6
13 changes: 13 additions & 0 deletions vllm/v1/core/kv_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def append(self, block: KVCacheBlock) -> None:
block.next_free_block = None
self.num_free_blocks += 1

def get_all_free_blocks(self) -> List[KVCacheBlock]:
"""Get all free blocks in the free list. Mainly used for testing.
Returns:
A list of free blocks.
"""
ret = []
curr_block = self.free_list_head
while curr_block is not None:
ret.append(curr_block)
curr_block = curr_block.next_free_block
return ret


class KVCacheManager:

Expand Down

0 comments on commit b8e5fe1

Please sign in to comment.