From 793ef4f1fa65470af5bdb5d268ab7060d9f1a953 Mon Sep 17 00:00:00 2001 From: Lu Fang Date: Tue, 4 Feb 2025 18:12:29 -0800 Subject: [PATCH] cleanup and add comments Signed-off-by: Lu Fang --- vllm/model_executor/models/deepseek_mtp.py | 1 - vllm/spec_decode/draft_model_runner.py | 2 ++ vllm/spec_decode/spec_decode_worker.py | 5 ++--- vllm/transformers_utils/configs/deepseek_mtp.py | 1 - vllm/worker/worker.py | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 6d2bf9af58286..318032a774239 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -97,7 +97,6 @@ class DeepSeekMultiTokenPredictor(nn.Module): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config - print(f"{config=}") self.mtp_start_layer_idx = config.num_hidden_layers self.num_mtp_layers = config.num_nextn_predict_layers # to map the exact layer index from weights diff --git a/vllm/spec_decode/draft_model_runner.py b/vllm/spec_decode/draft_model_runner.py index 319a2bb437ee0..b57ca0cde01ac 100644 --- a/vllm/spec_decode/draft_model_runner.py +++ b/vllm/spec_decode/draft_model_runner.py @@ -277,6 +277,8 @@ def execute_model( compute_logits_kwargs = {} # Run model if hasattr(self.model.config, "num_nextn_predict_layers"): + # for DeepSeek MTP only to use the corresponding layer for + # each step kwargs["step_idx"] = step compute_logits_kwargs["step_idx"] = step with set_forward_context(model_input.attn_metadata, diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index e73591846ffc3..bf3aa8e40b0db 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -182,9 +182,8 @@ def create_worker( draft_worker_kwargs[ "model_runner_cls"] = TP1DraftModelRunner else: - if draft_model_config.hf_config.model_type in [ - "eagle", "deepseek_mtp" - ]: + if draft_model_config.hf_config.model_type in ( + "eagle", "deepseek_mtp"): raise NotImplementedError( f"{draft_model_config.hf_config.model_type} " "does not support TP > 1 yet") diff --git a/vllm/transformers_utils/configs/deepseek_mtp.py b/vllm/transformers_utils/configs/deepseek_mtp.py index 324499a6abdda..ac2baa9c75cf9 100644 --- a/vllm/transformers_utils/configs/deepseek_mtp.py +++ b/vllm/transformers_utils/configs/deepseek_mtp.py @@ -13,7 +13,6 @@ class DeepSeekMTPConfig(PretrainedConfig): def __init__(self, model: Union[PretrainedConfig, dict, None] = None, **kwargs): - print("model: %s", model) if model is not None: self.model = DeepseekV3Config.from_dict(model, **kwargs) else: diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 090d8f44d4ef4..bd07608f788f0 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -71,7 +71,7 @@ def __init__( or (speculative_config.draft_model_config.model == model_config.model) \ or (speculative_config.draft_model_config.hf_config.model_type - not in ["medusa", "mlp_speculator", "eagle", "deepseek_mtp"]) \ + not in ("medusa", "mlp_speculator", "eagle", "deepseek_mtp")) \ else {"return_hidden_states": True} ModelRunnerClass: Type[GPUModelRunnerBase] = ModelRunner