Skip to content

Commit bdb40c3

Browse files
committed
mtp
Signed-off-by: ganyi <[email protected]>
1 parent 97e7182 commit bdb40c3

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

vllm/v1/attention/backends/mla/rocm_aiter_mla.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
7777
# TODO(luka, lucas): audit this as part of:
7878
# https://github.com/vllm-project/vllm/issues/22945
7979
cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
80-
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.VARLEN
80+
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
8181

8282
def __init__(
8383
self,
@@ -282,11 +282,11 @@ def __init__(
282282
kv_sharing_target_layer_name,
283283
**mla_args,
284284
)
285-
assert num_heads == 16 or num_heads == 128, (
286-
f"Aiter MLA only supports 16 or 128 number of heads.\n"
287-
f"Provided {num_heads} number of heads.\n"
288-
"Try adjusting tensor_parallel_size value."
289-
)
285+
# assert num_heads == 16 or num_heads == 128, (
286+
# f"Aiter MLA only supports 16 or 128 number of heads.\n"
287+
# f"Provided {num_heads} number of heads.\n"
288+
# "Try adjusting tensor_parallel_size value."
289+
# )
290290
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
291291
if any(unsupported_features):
292292
raise NotImplementedError(

0 commit comments

Comments
 (0)