feat: Add health genearte, health_generate test and fix mpt.py

dsingal0 · dsingal0 · commit e9300fb75041 · 2025-05-09T00:13:22.000-07:00
Signed-off-by: Dhruv Singal &lt;dhruvsingalabc@gmail.com&gt;
diff --git a/tensorrt_llm/_torch/speculative/mtp.py b/tensorrt_llm/_torch/speculative/mtp.py
@@ -689,7 +689,7 @@ def sample_and_accept_draft_tokens(
         num_accepted_tokens = torch.ones(batch_size,
                                          dtype=torch.int,
                                          device=logits.device)
-        
+
         if self.spec_config.use_relaxed_acceptance_for_thinking:
             mtp_relaxed_delta_pool = spec_metadata.mtp_hidden_states_manager.mtp_relaxed_delta_pool
 
diff --git a/tests/unittest/llmapi/apps/_test_llm_server.py b/tests/unittest/llmapi/apps/_test_llm_server.py
@@ -34,6 +34,9 @@ def test_health(client):
     response = client.get("/health")
     assert response.status_code == 200
 
+def test_health_generate(client):
+    response = client.get("/health_generate")
+    assert response.status_code == 200
 
 def test_generate(client):
     response = client.post("/generate", json={"prompt": "A B C"})