Skip to content

Commit 8f68177

Browse files
authored
fix sliding window multi chat (InternLM#3302)
1 parent e37a76d commit 8f68177

File tree

1 file changed

+2
-10
lines changed

1 file changed

+2
-10
lines changed

lmdeploy/pytorch/paging/block_manager/window_block_manager.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@
88
BlockTable = np.ndarray
99

1010

11-
def _div_up(x, n):
12-
"""perform div up."""
13-
return (x + n - 1) // n
14-
15-
1611
def _num_blocks_to_drop(seq: SchedulerSequence, window_size: int):
1712
"""num blocks to free."""
1813
if seq.history_len <= window_size:
@@ -44,13 +39,10 @@ def num_required_blocks(self, obj: SchedulerSequence, prealloc_size: int = 0):
4439
"""get num required blocks."""
4540

4641
# blocks is not enough
47-
if obj.num_history_ids < self.window_size:
42+
if obj.num_history_ids <= self.window_size:
4843
return super().num_required_blocks(obj, prealloc_size)
4944

50-
# we only keep history less than window_size
51-
num_tokens = self.window_size + obj.num_token_ids + prealloc_size
52-
num_all_blocks = _div_up(num_tokens, obj.block_size)
53-
return max(0, num_all_blocks - len(obj.logical_blocks))
45+
return super().num_required_blocks(obj, prealloc_size) - obj.num_ignored_history // obj.block_size
5446

5547
def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0):
5648
"""Return if physical block can be allocated for given message."""

0 commit comments

Comments
 (0)