Skip to content

Commit 3cf33a4

Browse files
authored
Merge pull request #576 from macrocosm-os/staging
v2.17.1
2 parents 8c73917 + 9c9c87b commit 3cf33a4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+756
-304
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Validators and miners are based on large language models (LLM). The validation p
3535

3636
<div align="center">
3737

38-
**[For Validators](./assets/validator.md)** · **[For Miners](./assets/miner.md)** · **[API Documentation](./validator_api/API_docs.md)**
38+
**[For Validators](./docs/validator.md)** · **[For Miners](./docs/epistula_miner.md)** · **[API Documentation]((./docs/API_docs.md))**
3939

4040

4141
</div>
@@ -66,7 +66,7 @@ The miner is given a complex problem that requires multiple steps to solve. Each
6666

6767
# API Documentation
6868

69-
For detailed information on the available API endpoints, request/response formats, and usage examples, please refer to the [API Documentation](./validator_api/API_docs.md).
69+
For detailed information on the available API endpoints, request/response formats, and usage examples, please refer to the [API Documentation](./docs/API_docs.md).
7070

7171
# Contribute
7272
<div align="center">
File renamed without changes.
File renamed without changes.
File renamed without changes.

neurons/miners/epistula_miner/web_retrieval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from openai import OpenAI
88

99
from prompting.base.duckduckgo_patch import PatchedDDGS
10-
from shared.settings import shared_settings
10+
from shared import settings
1111

1212
# Import the patched DDGS and use that
1313

@@ -56,7 +56,7 @@ async def get_websites_with_similarity(
5656
List of dictionaries containing website URLs and their best matching chunks
5757
"""
5858
logger.debug("Getting results")
59-
ddgs = PatchedDDGS(proxy=shared_settings.PROXY_URL, verify=False)
59+
ddgs = PatchedDDGS(proxy=settings.shared_settings.PROXY_URL, verify=False)
6060
results = list(ddgs.text(query))
6161
logger.debug(f"Got {len(results)} results")
6262
urls = [r["href"] for r in results][:n_results]
@@ -66,7 +66,7 @@ async def get_websites_with_similarity(
6666
extracted = await asyncio.gather(*[extract_content(c) for c in content])
6767

6868
# Create embeddings
69-
client = OpenAI(api_key=shared_settings.OPENAI_API_KEY)
69+
client = OpenAI(api_key=settings.shared_settings.OPENAI_API_KEY)
7070
query_embedding = client.embeddings.create(model="text-embedding-ada-002", input=query).data[0].embedding
7171
# Process each website
7272
results_with_similarity = []

neurons/validator.py

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
# ruff: noqa: E402
1111
from shared import settings
12+
from shared.logging import init_wandb
1213

13-
shared_settings = settings.shared_settings
1414
settings.shared_settings = settings.SharedSettings.load(mode="validator")
1515

1616

@@ -26,13 +26,12 @@
2626

2727

2828
def create_loop_process(task_queue, scoring_queue, reward_events):
29+
settings.shared_settings = settings.SharedSettings.load(mode="validator")
30+
if settings.shared_settings.WANDB_ON:
31+
init_wandb(neuron="validator")
32+
2933
async def spawn_loops(task_queue, scoring_queue, reward_events):
3034
# ruff: noqa: E402
31-
wandb.setup()
32-
from shared import settings
33-
34-
settings.shared_settings = settings.SharedSettings.load(mode="validator")
35-
3635
from prompting.llms.model_manager import model_scheduler
3736
from prompting.miner_availability.miner_availability import availability_checking_loop
3837
from prompting.rewards.scoring import task_scorer
@@ -88,10 +87,11 @@ async def start():
8887
# TODO: We should not use 2 availability loops for each process, in reality
8988
# we should only be sharing the miner availability data between processes.
9089
from prompting.miner_availability.miner_availability import availability_checking_loop
90+
from prompting.rewards.scoring import task_scorer
9191

9292
asyncio.create_task(availability_checking_loop.start())
9393

94-
await start_scoring_api(scoring_queue, reward_events)
94+
await start_scoring_api(task_scorer, scoring_queue, reward_events)
9595

9696
while True:
9797
await asyncio.sleep(10)
@@ -100,23 +100,6 @@ async def start():
100100
asyncio.run(start())
101101

102102

103-
# def create_task_loop(task_queue, scoring_queue):
104-
# async def start(task_queue, scoring_queue):
105-
# logger.info("Starting AvailabilityCheckingLoop...")
106-
# asyncio.create_task(availability_checking_loop.start())
107-
108-
# logger.info("Starting TaskSender...")
109-
# asyncio.create_task(task_sender.start(task_queue, scoring_queue))
110-
111-
# logger.info("Starting TaskLoop...")
112-
# asyncio.create_task(task_loop.start(task_queue, scoring_queue))
113-
# while True:
114-
# await asyncio.sleep(10)
115-
# logger.debug("Running task loop...")
116-
117-
# asyncio.run(start(task_queue, scoring_queue))
118-
119-
120103
async def main():
121104
# will start checking the availability of miners at regular intervals, needed for API and Validator
122105
with torch.multiprocessing.Manager() as manager:
@@ -130,7 +113,7 @@ async def main():
130113
try:
131114
# # Start checking the availability of miners at regular intervals
132115

133-
if shared_settings.DEPLOY_SCORING_API:
116+
if settings.shared_settings.DEPLOY_SCORING_API:
134117
# Use multiprocessing to bypass API blocking issue
135118
api_process = mp.Process(target=start_api, args=(scoring_queue, reward_events), name="API_Process")
136119
api_process.start()
@@ -152,17 +135,17 @@ async def main():
152135
while True:
153136
await asyncio.sleep(30)
154137
if (
155-
shared_settings.SUBTENSOR.get_current_block()
156-
- shared_settings.METAGRAPH.last_update[shared_settings.UID]
138+
settings.shared_settings.SUBTENSOR.get_current_block()
139+
- settings.shared_settings.METAGRAPH.last_update[settings.shared_settings.UID]
157140
> 500
158141
and step > 120
159142
):
143+
current_block = settings.shared_settings.SUBTENSOR.get_current_block()
144+
last_update_block = settings.shared_settings.METAGRAPH.last_update[settings.shared_settings.UID]
160145
logger.warning(
161-
f"UPDATES HAVE STALED FOR {shared_settings.SUBTENSOR.get_current_block() - shared_settings.METAGRAPH.last_update[shared_settings.UID]} BLOCKS AND {step} STEPS"
162-
)
163-
logger.warning(
164-
f"STALED: {shared_settings.SUBTENSOR.get_current_block()}, {shared_settings.METAGRAPH.block}"
146+
f"UPDATES HAVE STALED FOR {current_block - last_update_block} BLOCKS AND {step} STEPS"
165147
)
148+
logger.warning(f"STALED: {current_block}, {settings.shared_settings.METAGRAPH.block}")
166149
sys.exit(1)
167150
step += 1
168151

prompting/api/api.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
from prompting.api.miner_availabilities.api import router as miner_availabilities_router
66
from prompting.api.scoring.api import router as scoring_router
7-
from prompting.rewards.scoring import task_scorer
8-
from shared.settings import shared_settings
7+
from shared import settings
98

109
app = FastAPI()
1110
app.include_router(miner_availabilities_router, prefix="/miner_availabilities", tags=["miner_availabilities"])
@@ -18,10 +17,17 @@ def health():
1817
return {"status": "healthy"}
1918

2019

21-
async def start_scoring_api(scoring_queue, reward_events):
22-
task_scorer.scoring_queue = scoring_queue
23-
task_scorer.reward_events = reward_events
24-
logger.info(f"Starting Scoring API on https://0.0.0.0:{shared_settings.SCORING_API_PORT}")
20+
async def start_scoring_api(task_scorer, scoring_queue, reward_events):
21+
# We pass an object of task scorer then override it's attributes to ensure that they are managed by mp
22+
app.state.task_scorer = task_scorer
23+
app.state.task_scorer.scoring_queue = scoring_queue
24+
app.state.task_scorer.reward_events = reward_events
25+
26+
logger.info(f"Starting Scoring API on https://0.0.0.0:{settings.shared_settings.SCORING_API_PORT}")
2527
uvicorn.run(
26-
"prompting.api.api:app", host="0.0.0.0", port=shared_settings.SCORING_API_PORT, loop="asyncio", reload=False
28+
"prompting.api.api:app",
29+
host="0.0.0.0",
30+
port=settings.shared_settings.SCORING_API_PORT,
31+
loop="asyncio",
32+
reload=False,
2733
)

prompting/api/scoring/api.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,33 @@
66

77
from prompting.datasets.random_website import DDGDatasetEntry
88
from prompting.llms.model_zoo import ModelZoo
9-
from prompting.rewards.scoring import task_scorer
109
from prompting.tasks.inference import InferenceTask
1110
from prompting.tasks.web_retrieval import WebRetrievalTask
11+
from shared import settings
1212
from shared.base import DatasetEntry
1313
from shared.dendrite import DendriteResponseEvent
1414
from shared.epistula import SynapseStreamResult
15-
from shared.settings import shared_settings
1615

1716
router = APIRouter()
1817

1918

2019
def validate_scoring_key(api_key: str = Header(...)):
21-
if api_key != shared_settings.SCORING_KEY:
20+
if api_key != settings.shared_settings.SCORING_KEY:
2221
raise HTTPException(status_code=403, detail="Invalid API key")
2322

2423

24+
def get_task_scorer(request: Request):
25+
return request.app.state.task_scorer
26+
27+
2528
@router.post("/scoring")
26-
async def score_response(request: Request, api_key_data: dict = Depends(validate_scoring_key)):
29+
async def score_response(
30+
request: Request, api_key_data: dict = Depends(validate_scoring_key), task_scorer=Depends(get_task_scorer)
31+
):
2732
model = None
2833
payload: dict[str, Any] = await request.json()
2934
body = payload.get("body")
30-
timeout = payload.get("timeout", shared_settings.NEURON_TIMEOUT)
35+
timeout = payload.get("timeout", settings.shared_settings.NEURON_TIMEOUT)
3136
uids = payload.get("uid", [])
3237
chunks = payload.get("chunks", {})
3338
if not uids or not chunks:
@@ -54,7 +59,7 @@ async def score_response(request: Request, api_key_data: dict = Depends(validate
5459
llm_model=llm_model,
5560
llm_model_id=body.get("model"),
5661
seed=int(body.get("seed", 0)),
57-
sampling_params=body.get("sampling_parameters", shared_settings.SAMPLING_PARAMS),
62+
sampling_params=body.get("sampling_parameters", settings.shared_settings.SAMPLING_PARAMS),
5863
query=body.get("messages"),
5964
)
6065
logger.info(f"Task created: {organic_task}")
@@ -66,7 +71,7 @@ async def score_response(request: Request, api_key_data: dict = Depends(validate
6671
timeout=timeout,
6772
),
6873
dataset_entry=DatasetEntry(),
69-
block=shared_settings.METAGRAPH.block,
74+
block=settings.shared_settings.METAGRAPH.block,
7075
step=-1,
7176
task_id=str(uuid.uuid4()),
7277
)
@@ -90,10 +95,10 @@ async def score_response(request: Request, api_key_data: dict = Depends(validate
9095
stream_results=[
9196
SynapseStreamResult(accumulated_chunks=[chunk for chunk in chunks if chunk is not None])
9297
],
93-
timeout=body.get("timeout", shared_settings.NEURON_TIMEOUT),
98+
timeout=body.get("timeout", settings.shared_settings.NEURON_TIMEOUT),
9499
),
95100
dataset_entry=DDGDatasetEntry(search_term=search_term),
96-
block=shared_settings.METAGRAPH.block,
101+
block=settings.shared_settings.METAGRAPH.block,
97102
step=-1,
98103
task_id=str(uuid.uuid4()),
99104
)

prompting/datasets/random_website.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import random
2+
from functools import lru_cache
23
from typing import Optional
34

45
import trafilatura
56
from loguru import logger
67

7-
# from duckduckgo_search import DDGS
88
from prompting.base.duckduckgo_patch import PatchedDDGS
99
from prompting.datasets.utils import ENGLISH_WORDS
10+
from shared import settings
1011
from shared.base import BaseDataset, Context, DatasetEntry
11-
from shared.settings import shared_settings
1212

1313
MAX_CHARS = 5000
1414

@@ -25,7 +25,7 @@ class DDGDataset(BaseDataset):
2525
english_words: list[str] = None
2626

2727
def search_random_term(self, retries: int = 3) -> tuple[Optional[str], Optional[list[dict[str, str]]]]:
28-
ddg = PatchedDDGS(proxy=shared_settings.PROXY_URL, verify=False)
28+
ddg = PatchedDDGS(proxy=settings.shared_settings.PROXY_URL, verify=False)
2929
for _ in range(retries):
3030
random_words = " ".join(random.sample(ENGLISH_WORDS, 3))
3131
try:
@@ -38,6 +38,7 @@ def search_random_term(self, retries: int = 3) -> tuple[Optional[str], Optional[
3838
return None, None
3939

4040
@staticmethod
41+
@lru_cache(maxsize=1000)
4142
def extract_website_content(url: str) -> Optional[str]:
4243
try:
4344
website = trafilatura.fetch_url(url)

prompting/llms/apis/gpt_wrapper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from pydantic import BaseModel
66

77
from prompting.llms.apis.llm_messages import LLMMessage, LLMMessages
8-
from shared.settings import shared_settings
8+
from shared import settings
9+
10+
shared_settings = settings.shared_settings
911

1012

1113
class GPT(BaseModel):

0 commit comments

Comments
 (0)