Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Feb 5, 2025
1 parent 91da711 commit e2aa6de
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 8 deletions.
33 changes: 32 additions & 1 deletion tests/v1/core/test_kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from vllm.multimodal.inputs import MultiModalKwargs
from vllm.sampling_params import SamplingParams
from vllm.v1.core.kv_cache_utils import (BlockHashType, FreeKVCacheBlockQueue,
KVCacheBlock,
KVCacheBlock, PrefixCachingMetrics,
generate_block_hash_extra_keys,
hash_block_tokens,
hash_request_tokens)
Expand Down Expand Up @@ -277,3 +277,34 @@ def test_hash_request_tokens_no_mm_inputs():
assert block_hashes[0].extra_keys is None
assert block_hashes[1].token_ids == (3, 4, 5)
assert block_hashes[1].extra_keys is None


def test_metrics():
"""
Test the prefix caching metrics.
"""
metrics = PrefixCachingMetrics(interval=5)
assert metrics.hit_rate == 0.0

metrics.add_request_query(20, 9)
# 9 / 20 = 0.45
assert metrics.hit_rate == 0.45

for _ in range(4):
metrics.add_request_query(20, 4)

# 25 / 100 = 0.25
assert metrics.hit_rate == 0.25

metrics.add_request_query(10, 2)

# Remove (20, 9) and add (10, 2): 18 / 90 = 0.2
assert metrics.aggregated_query_total == 90
assert metrics.aggregated_query_hit == 18
assert metrics.hit_rate == 0.2

metrics.reset()
assert metrics.hit_rate == 0.0
assert metrics.aggregated_query_total == 0
assert metrics.aggregated_query_hit == 0
assert not metrics.query_queue
4 changes: 2 additions & 2 deletions vllm/v1/core/kv_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def __init__(
self.req_to_blocks: DefaultDict[str,
List[KVCacheBlock]] = defaultdict(list)

# Prefix cache metrics.
self.prefix_caching_metrics = PrefixCachingMetrics()
# Prefix cache metrics. TODO: Make the interval configurable.
self.prefix_caching_metrics = PrefixCachingMetrics(interval=1000)

@property
def usage(self) -> float:
Expand Down
12 changes: 7 additions & 5 deletions vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ class PrefixCachingMetrics:

def __init__(self, interval: int = 1000):
self.interval = interval
# The current aggregated query total and hit.
self.aggregated_query_total = 0
self.aggregated_query_hit = 0
self.request_queries: deque[Tuple[int, int]] = deque()
# A deque of (num_queries, num_hits) for the most recent requests.
self.query_queue: deque[Tuple[int, int]] = deque()

def add_request_query(self, num_queries: int, num_hits: int):
"""Add a request to the metrics. This function is called when
Expand All @@ -54,9 +56,9 @@ def add_request_query(self, num_queries: int, num_hits: int):
num_hits: The number of hits in the request.
"""

self.request_queries.append((num_queries, num_hits))
if len(self.request_queries) > self.interval:
old_num_queries, old_num_hits = self.request_queries.popleft()
self.query_queue.append((num_queries, num_hits))
if len(self.query_queue) > self.interval:
old_num_queries, old_num_hits = self.query_queue.popleft()
self.aggregated_query_total -= old_num_queries
self.aggregated_query_hit -= old_num_hits

Expand All @@ -67,7 +69,7 @@ def reset(self):
"""Reset the metrics."""
self.aggregated_query_total = 0
self.aggregated_query_hit = 0
self.request_queries.clear()
self.query_queue.clear()

@property
def hit_rate(self) -> float:
Expand Down

0 comments on commit e2aa6de

Please sign in to comment.