vllm-project · hickeyma · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
@@ -49,7 +49,7 @@
 if TYPE_CHECKING:
     from vllm.attention.backends.abstract import AttentionMetadata
     from vllm.config import VllmConfig
-    from vllm.distributed.kv_events import KVCacheEvent
+    from vllm.distributed.kv_events import KVCacheEvent, KVEventBatch
     from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
         KVConnectorPromMetrics,
         KVConnectorStats,
@@ -343,6 +343,12 @@ def get_kv_connector_stats(self) -> Optional["KVConnectorStats"]:
         """
         return None
 
+    def get_kv_connector_kv_cache_events(self) -> Optional["KVEventBatch"]:
+        """
+        Get the KV connector kv cache events collected during the last interval.
+        """
+        return None
+
     def get_handshake_metadata(self) -> KVConnectorHandshakeMetadata | None:
         """
         Get the KVConnector handshake metadata for this connector.

diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py
@@ -8,9 +8,11 @@
 import torch
 
 if TYPE_CHECKING:
+    from vllm.distributed.kv_events import KVEventBatch
     from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
 else:
     KVConnectorStats = object
+    KVEventBatch = object
 
 
 class LogprobsLists(NamedTuple):
@@ -109,6 +111,7 @@ class KVConnectorOutput:
     finished_sending: set[str] | None = None
     finished_recving: set[str] | None = None
     kv_connector_stats: KVConnectorStats | None = None
+    kv_cache_events: KVEventBatch | None = None
     # IDs of externally computed KV blocks that failed to load.
     # Requests referencing these blocks should be rescheduled to recompute them
     invalid_block_ids: set[int] = field(default_factory=set)
@@ -124,6 +127,7 @@ def is_empty(self):
             not self.finished_sending
             and not self.finished_recving
             and not self.kv_connector_stats
+            and not self.kv_cache_events
             and not self.invalid_block_ids
         )
 

diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@@ -12,6 +12,7 @@
 )
 
 from vllm.config import VllmConfig
+from vllm.distributed.kv_events import KVEventBatch
 from vllm.distributed.kv_transfer import (
     ensure_kv_transfer_shutdown,
     get_kv_transfer_group,
@@ -135,10 +136,19 @@ def _get_kv_connector_output(
             output.kv_connector_stats = (
                 KVConnectorModelRunnerMixin.get_kv_connector_stats()
             )
+            output.kv_cache_events = (
+                KVConnectorModelRunnerMixin.get_kv_connector_kv_cache_events()
+            )
             kv_connector.clear_connector_metadata()
 
     @staticmethod
     def get_kv_connector_stats() -> KVConnectorStats | None:
         if has_kv_transfer_group():
             return get_kv_transfer_group().get_kv_connector_stats()
         return None
+
+    @staticmethod
+    def get_kv_connector_kv_cache_events() -> KVEventBatch | None:
+        if has_kv_transfer_group():
+            return get_kv_transfer_group().get_kv_connector_kv_cache_events()
+        return None