Skip to content

Commit 83815bb

Browse files
WoosukKwonIsotr0py
authored andcommitted
[V1][BugFix] Fix edge case in VLM scheduling (vllm-project#12065)
Signed-off-by: Woosuk Kwon <[email protected]> Signed-off-by: Isotr0py <[email protected]>
1 parent 090a829 commit 83815bb

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

vllm/v1/core/scheduler.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -373,18 +373,22 @@ def _try_schedule_encoder_inputs(
373373
if self.encoder_cache_manager.has_cache(request, i):
374374
# The encoder input is already computed and cached.
375375
continue
376-
if not self.encoder_cache_manager.can_allocate(request, i):
377-
# The encoder cache is full. We can only schedule the decoder
378-
# tokens just before the encoder input.
379-
num_new_tokens = start_pos - num_computed_tokens
380-
break
381-
if num_encoder_tokens > encoder_budget:
382-
# The encoder budget is exhausted. We can only schedule the
383-
# decoder tokens up until the encoder input.
384-
# NOTE(woosuk): We assume that the encoder tokens should be
385-
# processed altogether, as the encoder usually uses
376+
if (not self.encoder_cache_manager.can_allocate(request, i)
377+
or num_encoder_tokens > encoder_budget):
378+
# The encoder cache is full or the encoder budget is exhausted.
379+
# NOTE(woosuk): We assume that the encoder input tokens should
380+
# be processed altogether, as the encoder usually uses
386381
# bidirectional attention.
387-
num_new_tokens = start_pos - num_computed_tokens
382+
if num_computed_tokens < start_pos:
383+
# We only schedule the decoder tokens just before the
384+
# encoder input.
385+
num_new_tokens = start_pos - num_computed_tokens
386+
else:
387+
# Because of prefix caching, num_computed_tokens is greater
388+
# than start_pos even though its encoder input is not
389+
# available. In this case, we can't schedule any token for
390+
# the request in this step.
391+
num_new_tokens = 0
388392
break
389393

390394
encoder_budget -= num_encoder_tokens

0 commit comments

Comments
 (0)