Merge pull request #767 from macrocosm-os/staging

dbobrenko · web-flow · commit e1bc8a7d79ba · 2025-06-23T11:16:55.000+02:00
v2.19.7

Changes:
  - Fix logprobs scoring when failed to connect to LLM API.
  - Fix weight synchronization.
  - Bump max model len from 8192 to max model supported.
diff --git a/prompting/api/weight_syncing/api.py b/prompting/api/weight_syncing/api.py
@@ -1,3 +1,4 @@
+import json
 import time
 
 from fastapi import APIRouter, Depends, HTTPException, Request
@@ -14,25 +15,34 @@ def get_weight_dict(request: Request):
     return request.app.state.weight_dict
 
 
-def get_uid_from_hotkey(hotkey: str):
+def get_uid_from_hotkey(hotkey: str) -> int:
     return shared_settings.METAGRAPH.hotkeys.index(hotkey)
 
 
 async def verify_weight_signature(request: Request):
     signed_by = request.headers.get("Epistula-Signed-By")
     signed_for = request.headers.get("Epistula-Signed-For")
+    if not signed_by or not signed_for:
+        raise HTTPException(400, "Missing Epistula-Signed-* headers")
+
     if signed_for != shared_settings.WALLET.hotkey.ss58_address:
         logger.error("Bad Request, message is not intended for self")
         raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self")
     validator_hotkeys = [shared_settings.METAGRAPH.hotkeys[uid] for uid in WHITELISTED_VALIDATORS_UIDS]
     if signed_by not in validator_hotkeys:
         logger.error(f"Signer not the expected ss58 address: {signed_by}")
         raise HTTPException(status_code=401, detail="Signer not the expected ss58 address")
+
     now = time.time()
-    body = await request.body()
-    if body["uid"] != get_uid_from_hotkey(signed_by):
-        logger.error("Invalid uid")
-        raise HTTPException(status_code=400, detail="Invalid uid in body")
+    body: bytes = await request.body()
+    try:
+        payload = json.loads(body)
+    except json.JSONDecodeError:
+        raise HTTPException(400, "Invalid JSON body")
+
+    if payload.get("uid") != get_uid_from_hotkey(signed_by):
+        raise HTTPException(400, "Invalid uid in body")
+
     err = verify_signature(
         request.headers.get("Epistula-Request-Signature"),
         body,
diff --git a/prompting/llms/vllm_llm.py b/prompting/llms/vllm_llm.py
@@ -20,18 +20,12 @@ def __init__(
         self.model_id = model_id
         self.sampling_params = {} if sampling_params else sampling_params
 
-        # VLLM specific initialization
-        # gpu_memory_utilization = 0.9  # Default high utilization since VLLM is memory efficient
         self.model = LLM(
             model=model_id,
-            # tensor_parallel_size=1,  # Single GPU by default
-            # dtype="float16",
             trust_remote_code=True,
             gpu_memory_utilization=0.9,
-            max_model_len=8192,
         )
 
-        # Store tokenizer from VLLM for consistency
         self.tokenizer = self.model.get_tokenizer()
 
     @classmethod
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "prompting"
-version = "2.19.6"
+version = "2.19.7"
 description = "Subnetwork 1 runs on Bittensor and is maintained by Macrocosmos. It's an effort to create decentralised AI"
 authors = ["Kalei Brady, Dmytro Bobrenko, Felix Quinque, Steffen Cruz, Richard Wardle"]
 readme = "README.md"
diff --git a/shared/docker_utils.py b/shared/docker_utils.py
@@ -32,7 +32,6 @@ async def get_generation(
         return ""
 
 
-# @async_lru_cache(maxsize=1000)
 async def get_logits(
     messages: list[str],
     model: None = None,
@@ -41,21 +40,21 @@ async def get_logits(
     continue_last_message: bool = False,
     top_logprobs: int = 10,
 ) -> dict[str, Any] | None:
-    url = f"{constants.DOCKER_BASE_URL}/v1/chat/generate_logits"
-    headers = {"Content-Type": "application/json"}
-    payload = {
-        "messages": messages,
-        "seed": seed,
-        "sampling_params": sampling_params,
-        "top_logprobs": top_logprobs,
-        "continue_last_message": continue_last_message,
-    }
-    response = requests.post(url, headers=headers, json=payload)
     try:
+        url = f"{constants.DOCKER_BASE_URL}/v1/chat/generate_logits"
+        headers = {"Content-Type": "application/json"}
+        payload = {
+            "messages": messages,
+            "seed": seed,
+            "sampling_params": sampling_params,
+            "top_logprobs": top_logprobs,
+            "continue_last_message": continue_last_message,
+        }
+        response = requests.post(url, headers=headers, json=payload)
         json_response = response.json()
         return json_response
-    except requests.exceptions.JSONDecodeError:
-        logger.error(f"Error generating logits. Status: {response.status_code}, Body: {response.text}")
+    except BaseException as exc:
+        logger.error(f"Error generating logits: {exc}")
         return None