Simplify KV cache assignments

mseeger · mseeger · commit 3847686fc888 · 2025-02-24T16:19:17.000+01:00
diff --git a/litgpt/adapter.py b/litgpt/adapter.py
@@ -9,7 +9,7 @@
 """
 
 from dataclasses import dataclass
-from typing import Any, Dict, Optional, Tuple, List
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 import torch.nn as nn
@@ -30,39 +30,28 @@ class Config(BaseConfig):
 
 class GPT(BaseModel):
     # Copy & paste from :class:`model.GPT`. Note that :class:`Block` is new here.
-    def __init__(
-        self,
-        config: Config,
-        kv_cache: Optional[List[KVCache]] = None
+    def __init__(self, config: Config,
     ) -> None:
         nn.Module.__init__(self)
         assert config.padded_vocab_size is not None
         self.config = config
 
-        if kv_cache is not None:
-            if len(kv_cache) != config.n_layer:
-                raise ValueError(f"kv_cache length {len(kv_cache)} != {config.n_layer} = config.n_layer")
-            for kvc in kv_cache:
-                self._check_kv_cache(config, kvc)
-            self._default_kv_cache = False
-        else:
-            kv_cache = [None] * config.n_layer
-            self._default_kv_cache = True
         self.lm_head = nn.Linear(
             config.n_embd, config.padded_vocab_size, bias=config.lm_head_bias
         )
         self.transformer = nn.ModuleDict(
             dict(
                 wte=nn.Embedding(config.padded_vocab_size, config.n_embd),
                 h=nn.ModuleList(
-                    Block(config, block_idx, kv_cache=kvc)
-                    for block_idx, kvc in enumerate(kv_cache)
+                    Block(config, block_idx)
+                    for block_idx in range(config.n_layer)
                 ),
                 ln_f=config.norm_class(config.n_embd, eps=config.norm_eps),
             )
         )
         self.mask_cache: Optional[torch.Tensor] = None
         self.max_seq_length = self.config.block_size
+        self._default_kv_cache = False
 
     @classmethod
     def from_name(cls, name: str, **kwargs: Any) -> Self:
diff --git a/litgpt/adapter_v2.py b/litgpt/adapter_v2.py
@@ -65,39 +65,27 @@ def reset_parameters(self) -> None:
 
 class GPT(BaseModel):
     # Copy & paste from :class:`model.GPT`. Note that :class:`Block` is new here.
-    def __init__(
-        self,
-        config: Config,
-        kv_cache: Optional[List[KVCache]] = None
-    ) -> None:
+    def __init__(self, config: Config) -> None:
         nn.Module.__init__(self)
         assert config.padded_vocab_size is not None
         self.config = config
 
-        if kv_cache is not None:
-            if len(kv_cache) != config.n_layer:
-                raise ValueError(f"kv_cache length {len(kv_cache)} != {config.n_layer} = config.n_layer")
-            for kvc in kv_cache:
-                self._check_kv_cache(config, kvc)
-            self._default_kv_cache = False
-        else:
-            kv_cache = [None] * config.n_layer
-            self._default_kv_cache = True
         self.lm_head = AdapterV2Linear(
             config.n_embd, config.padded_vocab_size, bias=config.lm_head_bias
         )
         self.transformer = nn.ModuleDict(
             dict(
                 wte=nn.Embedding(config.padded_vocab_size, config.n_embd),
                 h=nn.ModuleList(
-                    Block(config, block_idx, kv_cache=kvc)
-                    for block_idx, kvc in enumerate(kv_cache)
+                    Block(config, block_idx)
+                    for block_idx in range(config.n_layer)
                 ),
                 ln_f=config.norm_class(config.n_embd, eps=config.norm_eps),
             )
         )
         self.mask_cache: Optional[torch.Tensor] = None
         self.max_seq_length = self.config.block_size
+        self._default_kv_cache = False
 
     @classmethod
     def from_name(cls, name: str, **kwargs: Any) -> Self:
diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py
@@ -594,7 +594,7 @@ def main(
             temperature=temperature,
             top_k=top_k,
             top_p=top_p,
-            eos_id=int(tokenizer.eos_id),
+            eos_id=tokenizer.eos_id,
         )[0]
         t = time.perf_counter() - t0
         fabric.print(tokenizer.decode(y))
diff --git a/litgpt/lora.py b/litgpt/lora.py
@@ -482,24 +482,11 @@ def mlp_class(self) -> Type:
 
 class GPT(BaseModel):
     # Copy & paste from :class:`model.GPT`. Note that :class:`Block` is new here.
-    def __init__(
-        self,
-        config: Config,
-        kv_cache: Optional[List[KVCache]] = None
-    ) -> None:
+    def __init__(self, config: Config) -> None:
         nn.Module.__init__(self)
         assert config.padded_vocab_size is not None
         self.config = config
 
-        if kv_cache is not None:
-            if len(kv_cache) != config.n_layer:
-                raise ValueError(f"kv_cache length {len(kv_cache)} != {config.n_layer} = config.n_layer")
-            for kvc in kv_cache:
-                self._check_kv_cache(config, kvc)
-            self._default_kv_cache = False
-        else:
-            kv_cache = [None] * config.n_layer
-            self._default_kv_cache = True
         self.lm_head = create_lora_linear(
             config,
             config.n_embd,
@@ -511,8 +498,8 @@ def __init__(
             dict(
                 wte=nn.Embedding(config.padded_vocab_size, config.n_embd),
                 h=nn.ModuleList(
-                    Block(config, block_idx, kv_cache=kvc)
-                    for block_idx, kvc in enumerate(kv_cache)
+                    Block(config, block_idx)
+                    for block_idx in range(config.n_layer)
                 ),
                 ln_f=config.norm_class(config.n_embd, eps=config.norm_eps),
             )
diff --git a/litgpt/model.py b/litgpt/model.py
diff --git a/tests/test_generate.py b/tests/test_generate.py