Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion vllm/distributed/kv_transfer/kv_connector/v1/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
if TYPE_CHECKING:
from vllm.attention.backends.abstract import AttentionMetadata
from vllm.config import VllmConfig
from vllm.distributed.kv_events import KVCacheEvent
from vllm.distributed.kv_events import KVCacheEvent, KVEventBatch
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
KVConnectorPromMetrics,
KVConnectorStats,
Expand Down Expand Up @@ -343,6 +343,12 @@ def get_kv_connector_stats(self) -> Optional["KVConnectorStats"]:
"""
return None

def get_kv_connector_kv_cache_events(self) -> Optional["KVEventBatch"]:
"""
Get the KV connector kv cache events collected during the last interval.
"""
return None

def get_handshake_metadata(self) -> KVConnectorHandshakeMetadata | None:
"""
Get the KVConnector handshake metadata for this connector.
Expand Down
4 changes: 4 additions & 0 deletions vllm/v1/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
import torch

if TYPE_CHECKING:
from vllm.distributed.kv_events import KVEventBatch
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
else:
KVConnectorStats = object
KVEventBatch = object


class LogprobsLists(NamedTuple):
Expand Down Expand Up @@ -109,6 +111,7 @@ class KVConnectorOutput:
finished_sending: set[str] | None = None
finished_recving: set[str] | None = None
kv_connector_stats: KVConnectorStats | None = None
kv_cache_events: KVEventBatch | None = None
# IDs of externally computed KV blocks that failed to load.
# Requests referencing these blocks should be rescheduled to recompute them
invalid_block_ids: set[int] = field(default_factory=set)
Expand All @@ -124,6 +127,7 @@ def is_empty(self):
not self.finished_sending
and not self.finished_recving
and not self.kv_connector_stats
and not self.kv_cache_events
and not self.invalid_block_ids
)

Expand Down
10 changes: 10 additions & 0 deletions vllm/v1/worker/kv_connector_model_runner_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)

from vllm.config import VllmConfig
from vllm.distributed.kv_events import KVEventBatch
from vllm.distributed.kv_transfer import (
ensure_kv_transfer_shutdown,
get_kv_transfer_group,
Expand Down Expand Up @@ -135,10 +136,19 @@ def _get_kv_connector_output(
output.kv_connector_stats = (
KVConnectorModelRunnerMixin.get_kv_connector_stats()
)
output.kv_cache_events = (
KVConnectorModelRunnerMixin.get_kv_connector_kv_cache_events()
)
kv_connector.clear_connector_metadata()

@staticmethod
def get_kv_connector_stats() -> KVConnectorStats | None:
if has_kv_transfer_group():
return get_kv_transfer_group().get_kv_connector_stats()
return None

@staticmethod
def get_kv_connector_kv_cache_events() -> KVEventBatch | None:
if has_kv_transfer_group():
return get_kv_transfer_group().get_kv_connector_kv_cache_events()
return None