[Bugfix] Fix LoRA extra vocab size (#15047)

jeejeelee · simon-mo · commit 966f933ee1cd · 2025-03-18T10:51:10.000-07:00
Signed-off-by: Jee Jee Li &lt;pandaleefree@gmail.com&gt;
diff --git a/examples/offline_inference/audio_language.py b/examples/offline_inference/audio_language.py
@@ -93,7 +93,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData:
         max_num_seqs=2,
         enable_lora=True,
         max_lora_rank=320,
-        lora_extra_vocab_size=0,
         limit_mm_per_prompt={"audio": audio_count},
     )
 
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
@@ -682,7 +682,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
         max_num_seqs=2,
         enable_lora=True,
         max_lora_rank=320,
-        lora_extra_vocab_size=0,
     )
 
     return ModelRequestData(
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
@@ -342,7 +342,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData:
         limit_mm_per_prompt={"image": len(image_urls)},
         enable_lora=True,
         max_lora_rank=320,
-        lora_extra_vocab_size=0,
     )
 
     placeholders = "".join(f"<|image_{i}|>"
diff --git a/tests/models/decoder_only/vision_language/test_phi4mm.py b/tests/models/decoder_only/vision_language/test_phi4mm.py
@@ -100,7 +100,6 @@ def run_test(
             distributed_executor_backend=distributed_executor_backend,
             enable_lora=True,
             max_lora_rank=320,
-            lora_extra_vocab_size=0,
             gpu_memory_utilization=0.8,  # set to 0.8 to avoid OOM in CI
             enforce_eager=True,
     ) as vllm_model:
diff --git a/vllm/config.py b/vllm/config.py
@@ -2324,7 +2324,7 @@ def __post_init__(self):
         # Setting the maximum rank to 512 should be able to satisfy the vast
         # majority of applications.
         possible_max_ranks = (8, 16, 32, 64, 128, 256, 320, 512)
-        possible_lora_extra_vocab_size = (0, 256, 512)
+        possible_lora_extra_vocab_size = (256, 512)
         if self.max_lora_rank not in possible_max_ranks:
             raise ValueError(
                 f"max_lora_rank ({self.max_lora_rank}) must be one of "

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData:`
`93`	`93`	`max_num_seqs=2,`
`94`	`94`	`enable_lora=True,`
`95`	`95`	`max_lora_rank=320,`
`96`		`- lora_extra_vocab_size=0,`
`97`	`96`	`limit_mm_per_prompt={"audio": audio_count},`
`98`	`97`	`)`
`99`	`98`
Original file line number	Diff line number	Diff line change
`@@ -682,7 +682,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:`
`682`	`682`	`max_num_seqs=2,`
`683`	`683`	`enable_lora=True,`
`684`	`684`	`max_lora_rank=320,`
`685`		`- lora_extra_vocab_size=0,`
`686`	`685`	`)`
`687`	`686`
`688`	`687`	`return ModelRequestData(`
Original file line number	Diff line number	Diff line change
`@@ -342,7 +342,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData:`
`342`	`342`	`limit_mm_per_prompt={"image": len(image_urls)},`
`343`	`343`	`enable_lora=True,`
`344`	`344`	`max_lora_rank=320,`
`345`		`- lora_extra_vocab_size=0,`
`346`	`345`	`)`
`347`	`346`
`348`	`347`	`placeholders = "".join(f"<\|image_{i}\|>"`