File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
QEfficient/transformers/models/llama4 Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 32
32
repeat_kv ,
33
33
)
34
34
35
- from QEfficient .transformers .cache_utils import QEffHybridChunkedCache
35
+ from QEfficient .transformers .cache_utils import QEffDynamicCache
36
36
from QEfficient .transformers .modeling_attn_mask_utils import _create_causal_mask
37
37
from QEfficient .utils import constants
38
38
from QEfficient .utils ._utils import IOInfo
@@ -638,7 +638,7 @@ def forward(
638
638
return_legacy_cache = False
639
639
if use_cache and not isinstance (past_key_values , Cache ):
640
640
return_legacy_cache = True
641
- past_key_values = QEffHybridChunkedCache .from_legacy_cache (self . config , past_key_values )
641
+ past_key_values = QEffDynamicCache .from_legacy_cache (past_key_values )
642
642
643
643
if cache_position is None :
644
644
past_seen_tokens = past_key_values .get_seq_length () if past_key_values is not None else 0
You can’t perform that action at this time.
0 commit comments