Skip to content

Commit 455d0cb

Browse files
committed
update to use new attention_type interface
Signed-off-by: NickLucche <[email protected]>
1 parent 3eae4f6 commit 455d0cb

File tree

1 file changed

+3
-7
lines changed
  • vllm/model_executor/models

1 file changed

+3
-7
lines changed

vllm/model_executor/models/t5.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ def __init__(self,
202202
1.0,
203203
cache_config=cache_config,
204204
quant_config=quant_config,
205-
prefix=f"{prefix}.attn")
205+
prefix=f"{prefix}.attn",
206+
attn_type=self.attn_type)
206207

207208
# Only the first SelfAttention block in encoder decoder has this
208209
# embedding layer, the others reuse its output.
@@ -418,12 +419,7 @@ def forward(
418419
# Encoder/Decoder Self-Attention Layer, attn bias already cached.
419420
assert attn_bias is not None
420421

421-
attn_output = self.attn(q,
422-
k,
423-
v,
424-
kv_cache,
425-
attn_metadata,
426-
attn_type=self.attn_type)
422+
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
427423
output, _ = self.out_proj(attn_output)
428424
return output
429425

0 commit comments

Comments
 (0)