File tree Expand file tree Collapse file tree 1 file changed +6
-6
lines changed
vllm/v1/attention/backends/mla Expand file tree Collapse file tree 1 file changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
7777 # TODO(luka, lucas): audit this as part of:
7878 # https://github.com/vllm-project/vllm/issues/22945
7979 cudagraph_support : ClassVar [AttentionCGSupport ] = AttentionCGSupport .UNIFORM_BATCH
80- query_len_support : ClassVar [QueryLenSupport ] = QueryLenSupport .VARLEN
80+ query_len_support : ClassVar [QueryLenSupport ] = QueryLenSupport .UNIFORM
8181
8282 def __init__ (
8383 self ,
@@ -282,11 +282,11 @@ def __init__(
282282 kv_sharing_target_layer_name ,
283283 ** mla_args ,
284284 )
285- assert num_heads == 16 or num_heads == 128 , (
286- f"Aiter MLA only supports 16 or 128 number of heads.\n "
287- f"Provided { num_heads } number of heads.\n "
288- "Try adjusting tensor_parallel_size value."
289- )
285+ # assert num_heads == 16 or num_heads == 128, (
286+ # f"Aiter MLA only supports 16 or 128 number of heads.\n"
287+ # f"Provided {num_heads} number of heads.\n"
288+ # "Try adjusting tensor_parallel_size value."
289+ # )
290290 unsupported_features = [alibi_slopes , sliding_window , logits_soft_cap ]
291291 if any (unsupported_features ):
292292 raise NotImplementedError (
You can’t perform that action at this time.
0 commit comments