Skip to content

Commit e1bc8a7

Browse files
authored
Merge pull request #767 from macrocosm-os/staging
v2.19.7 Changes: - Fix logprobs scoring when failed to connect to LLM API. - Fix weight synchronization. - Bump max model len from 8192 to max model supported.
2 parents 816c516 + c26d802 commit e1bc8a7

File tree

4 files changed

+28
-25
lines changed

4 files changed

+28
-25
lines changed

prompting/api/weight_syncing/api.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import time
23

34
from fastapi import APIRouter, Depends, HTTPException, Request
@@ -14,25 +15,34 @@ def get_weight_dict(request: Request):
1415
return request.app.state.weight_dict
1516

1617

17-
def get_uid_from_hotkey(hotkey: str):
18+
def get_uid_from_hotkey(hotkey: str) -> int:
1819
return shared_settings.METAGRAPH.hotkeys.index(hotkey)
1920

2021

2122
async def verify_weight_signature(request: Request):
2223
signed_by = request.headers.get("Epistula-Signed-By")
2324
signed_for = request.headers.get("Epistula-Signed-For")
25+
if not signed_by or not signed_for:
26+
raise HTTPException(400, "Missing Epistula-Signed-* headers")
27+
2428
if signed_for != shared_settings.WALLET.hotkey.ss58_address:
2529
logger.error("Bad Request, message is not intended for self")
2630
raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self")
2731
validator_hotkeys = [shared_settings.METAGRAPH.hotkeys[uid] for uid in WHITELISTED_VALIDATORS_UIDS]
2832
if signed_by not in validator_hotkeys:
2933
logger.error(f"Signer not the expected ss58 address: {signed_by}")
3034
raise HTTPException(status_code=401, detail="Signer not the expected ss58 address")
35+
3136
now = time.time()
32-
body = await request.body()
33-
if body["uid"] != get_uid_from_hotkey(signed_by):
34-
logger.error("Invalid uid")
35-
raise HTTPException(status_code=400, detail="Invalid uid in body")
37+
body: bytes = await request.body()
38+
try:
39+
payload = json.loads(body)
40+
except json.JSONDecodeError:
41+
raise HTTPException(400, "Invalid JSON body")
42+
43+
if payload.get("uid") != get_uid_from_hotkey(signed_by):
44+
raise HTTPException(400, "Invalid uid in body")
45+
3646
err = verify_signature(
3747
request.headers.get("Epistula-Request-Signature"),
3848
body,

prompting/llms/vllm_llm.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,12 @@ def __init__(
2020
self.model_id = model_id
2121
self.sampling_params = {} if sampling_params else sampling_params
2222

23-
# VLLM specific initialization
24-
# gpu_memory_utilization = 0.9 # Default high utilization since VLLM is memory efficient
2523
self.model = LLM(
2624
model=model_id,
27-
# tensor_parallel_size=1, # Single GPU by default
28-
# dtype="float16",
2925
trust_remote_code=True,
3026
gpu_memory_utilization=0.9,
31-
max_model_len=8192,
3227
)
3328

34-
# Store tokenizer from VLLM for consistency
3529
self.tokenizer = self.model.get_tokenizer()
3630

3731
@classmethod

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "prompting"
3-
version = "2.19.6"
3+
version = "2.19.7"
44
description = "Subnetwork 1 runs on Bittensor and is maintained by Macrocosmos. It's an effort to create decentralised AI"
55
authors = ["Kalei Brady, Dmytro Bobrenko, Felix Quinque, Steffen Cruz, Richard Wardle"]
66
readme = "README.md"

shared/docker_utils.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ async def get_generation(
3232
return ""
3333

3434

35-
# @async_lru_cache(maxsize=1000)
3635
async def get_logits(
3736
messages: list[str],
3837
model: None = None,
@@ -41,21 +40,21 @@ async def get_logits(
4140
continue_last_message: bool = False,
4241
top_logprobs: int = 10,
4342
) -> dict[str, Any] | None:
44-
url = f"{constants.DOCKER_BASE_URL}/v1/chat/generate_logits"
45-
headers = {"Content-Type": "application/json"}
46-
payload = {
47-
"messages": messages,
48-
"seed": seed,
49-
"sampling_params": sampling_params,
50-
"top_logprobs": top_logprobs,
51-
"continue_last_message": continue_last_message,
52-
}
53-
response = requests.post(url, headers=headers, json=payload)
5443
try:
44+
url = f"{constants.DOCKER_BASE_URL}/v1/chat/generate_logits"
45+
headers = {"Content-Type": "application/json"}
46+
payload = {
47+
"messages": messages,
48+
"seed": seed,
49+
"sampling_params": sampling_params,
50+
"top_logprobs": top_logprobs,
51+
"continue_last_message": continue_last_message,
52+
}
53+
response = requests.post(url, headers=headers, json=payload)
5554
json_response = response.json()
5655
return json_response
57-
except requests.exceptions.JSONDecodeError:
58-
logger.error(f"Error generating logits. Status: {response.status_code}, Body: {response.text}")
56+
except BaseException as exc:
57+
logger.error(f"Error generating logits: {exc}")
5958
return None
6059

6160

0 commit comments

Comments
 (0)