1- # ruff: noqa: E402
2- from shared import settings
3-
4- settings .shared_settings = settings .SharedSettings .load (mode = "validator" )
5- shared_settings = settings .shared_settings
6-
71import asyncio
82import multiprocessing as mp
3+ import sys
94import time
105
116import loguru
127import torch
8+ import wandb
9+
10+ # ruff: noqa: E402
11+ from shared import settings
12+
13+ shared_settings = settings .shared_settings
14+ settings .shared_settings = settings .SharedSettings .load (mode = "validator" )
15+
1316
14- from prompting .api .api import start_scoring_api
15- from prompting .llms .model_manager import model_scheduler
1617from prompting .llms .utils import GPUInfo
17- from prompting .miner_availability .miner_availability import availability_checking_loop
18- from prompting .rewards .scoring import task_scorer
19- from prompting .tasks .task_creation import task_loop
20- from prompting .tasks .task_sending import task_sender
21- from prompting .weight_setting .weight_setter import weight_setter
22- from shared .profiling import profiler
2318
2419# Add a handler to write logs to a file
2520loguru .logger .add ("logfile.log" , rotation = "1000 MB" , retention = "10 days" , level = "DEBUG" )
3227
3328def create_loop_process (task_queue , scoring_queue , reward_events ):
3429 async def spawn_loops (task_queue , scoring_queue , reward_events ):
30+ # ruff: noqa: E402
31+ wandb .setup ()
32+ from shared import settings
33+
34+ settings .shared_settings = settings .SharedSettings .load (mode = "validator" )
35+
36+ from prompting .llms .model_manager import model_scheduler
37+ from prompting .miner_availability .miner_availability import availability_checking_loop
38+ from prompting .rewards .scoring import task_scorer
39+ from prompting .tasks .task_creation import task_loop
40+ from prompting .tasks .task_sending import task_sender
41+ from prompting .weight_setting .weight_setter import weight_setter
42+ from shared .profiling import profiler
43+
3544 logger .info ("Starting Profiler..." )
3645 asyncio .create_task (profiler .print_stats (), name = "Profiler" ),
46+
47+ # -------- Duplicate of create_task_loop ----------
48+ logger .info ("Starting AvailabilityCheckingLoop..." )
49+ asyncio .create_task (availability_checking_loop .start ())
50+
51+ logger .info ("Starting TaskSender..." )
52+ asyncio .create_task (task_sender .start (task_queue , scoring_queue ))
53+
54+ logger .info ("Starting TaskLoop..." )
55+ asyncio .create_task (task_loop .start (task_queue , scoring_queue ))
56+ # -------------------------------------------------
57+
3758 logger .info ("Starting ModelScheduler..." )
3859 asyncio .create_task (model_scheduler .start (scoring_queue ), name = "ModelScheduler" ),
3960 logger .info ("Starting TaskScorer..." )
@@ -62,6 +83,8 @@ async def spawn_loops(task_queue, scoring_queue, reward_events):
6283
6384def start_api ():
6485 async def start ():
86+ from prompting .api .api import start_scoring_api # noqa: F401
87+
6588 await start_scoring_api ()
6689 while True :
6790 await asyncio .sleep (10 )
@@ -70,21 +93,21 @@ async def start():
7093 asyncio .run (start ())
7194
7295
73- def create_task_loop (task_queue , scoring_queue ):
74- async def start (task_queue , scoring_queue ):
75- logger .info ("Starting AvailabilityCheckingLoop..." )
76- asyncio .create_task (availability_checking_loop .start ())
96+ # def create_task_loop(task_queue, scoring_queue):
97+ # async def start(task_queue, scoring_queue):
98+ # logger.info("Starting AvailabilityCheckingLoop...")
99+ # asyncio.create_task(availability_checking_loop.start())
77100
78- logger .info ("Starting TaskSender..." )
79- asyncio .create_task (task_sender .start (task_queue , scoring_queue ))
101+ # logger.info("Starting TaskSender...")
102+ # asyncio.create_task(task_sender.start(task_queue, scoring_queue))
80103
81- logger .info ("Starting TaskLoop..." )
82- asyncio .create_task (task_loop .start (task_queue , scoring_queue ))
83- while True :
84- await asyncio .sleep (10 )
85- logger .debug ("Running task loop..." )
104+ # logger.info("Starting TaskLoop...")
105+ # asyncio.create_task(task_loop.start(task_queue, scoring_queue))
106+ # while True:
107+ # await asyncio.sleep(10)
108+ # logger.debug("Running task loop...")
86109
87- asyncio .run (start (task_queue , scoring_queue ))
110+ # asyncio.run(start(task_queue, scoring_queue))
88111
89112
90113async def main ():
@@ -109,23 +132,38 @@ async def main():
109132 loop_process = mp .Process (
110133 target = create_loop_process , args = (task_queue , scoring_queue , reward_events ), name = "LoopProcess"
111134 )
112- task_loop_process = mp .Process (
113- target = create_task_loop , args = (task_queue , scoring_queue ), name = "TaskLoopProcess"
114- )
135+ # task_loop_process = mp.Process(
136+ # target=create_task_loop, args=(task_queue, scoring_queue), name="TaskLoopProcess"
137+ # )
115138 loop_process .start ()
116- task_loop_process .start ()
139+ # task_loop_process.start()
117140 processes .append (loop_process )
118- processes .append (task_loop_process )
141+ # processes.append(task_loop_process)
119142 GPUInfo .log_gpu_info ()
120143
144+ step = 0
121145 while True :
122- await asyncio .sleep (10 )
123- logger .debug ("Running..." )
146+ await asyncio .sleep (30 )
147+ if (
148+ shared_settings .SUBTENSOR .get_current_block ()
149+ - shared_settings .METAGRAPH .last_update [shared_settings .UID ]
150+ > 500
151+ and step > 120
152+ ):
153+ logger .warning (
154+ f"UPDATES HAVE STALED FOR { shared_settings .SUBTENSOR .get_current_block () - shared_settings .METAGRAPH .last_update [shared_settings .UID ]} BLOCKS AND { step } STEPS"
155+ )
156+ logger .warning (
157+ f"STALED: { shared_settings .SUBTENSOR .get_current_block ()} , { shared_settings .METAGRAPH .block } "
158+ )
159+ sys .exit (1 )
160+ step += 1
124161
125162 except Exception as e :
126163 logger .error (f"Main loop error: { e } " )
127164 raise
128165 finally :
166+ wandb .teardown ()
129167 # Clean up processes
130168 for process in processes :
131169 if process .is_alive ():
0 commit comments