Skip to content

Commit 87da497

Browse files
committed
Strip out multiturn features
1 parent 72420a2 commit 87da497

File tree

6 files changed

+12
-111
lines changed

6 files changed

+12
-111
lines changed

src/guidellm/request/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55
RequestLoaderDescription,
66
)
77
from .request import GenerationRequest
8-
from .session import GenerativeRequestSession, RequestSession
98
from .types import RequestT, ResponseT
109

1110
__all__ = [
1211
"GenerationRequest",
1312
"GenerativeRequestLoader",
1413
"GenerativeRequestLoaderDescription",
15-
"GenerativeRequestSession",
1614
"RequestLoader",
1715
"RequestLoaderDescription",
18-
"RequestSession",
1916
"RequestT",
2017
"ResponseT",
2118
]

src/guidellm/request/loader.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from guidellm.dataset import ColumnInputTypes, load_dataset
1616
from guidellm.objects import StandardBaseModel
1717
from guidellm.request.request import GenerationRequest
18-
from guidellm.request.session import GenerativeRequestSession
1918

2019
__all__ = [
2120
"GenerativeRequestLoader",
@@ -106,14 +105,14 @@ def __init__(
106105
self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests
107106
self._preserved_iter = None
108107

109-
def __iter__(self) -> Iterator[GenerativeRequestSession]:
108+
def __iter__(self) -> Iterator[GenerationRequest]:
110109
scope_create_count = 0
111110

112111
while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
113112
scope_create_count += 1
114113

115114
for item in dataset_iter:
116-
yield GenerativeRequestSession(self._create_request(item))
115+
yield self._create_request(item)
117116

118117
self._preserved_iter = None
119118

src/guidellm/request/session.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

src/guidellm/scheduler/result.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
)
77

88
from guidellm.objects import StandardBaseModel
9-
from guidellm.request.session import RequestSession
109
from guidellm.request.types import RequestT, ResponseT
1110
from guidellm.scheduler.strategy import SchedulingStrategy
1211

@@ -143,7 +142,7 @@ class SchedulerRequestResult(
143142

144143
@dataclass
145144
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
146-
session: RequestSession[RequestT, ResponseT]
145+
request: RequestT
147146
timeout_time: float
148147
queued_time: float
149148

src/guidellm/scheduler/scheduler.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,6 @@ async def _start_processes(
234234
queues,
235235
scheduling_strategy,
236236
stop_event,
237-
False, # TODO: Make configurable
238237
requests_limit,
239238
id_,
240239
num_processes,
@@ -300,17 +299,16 @@ def _add_requests(
300299
if run_info.created_requests >= run_info.end_number:
301300
raise StopIteration
302301

303-
session = next(requests_iter)
304-
work_req = WorkerProcessRequest(
305-
session=session,
302+
work_req = WorkerProcessRequest[RequestT, ResponseT](
303+
request=next(requests_iter),
306304
timeout_time=run_info.end_time,
307305
queued_time=time.time(),
308306
)
309307
requests_queue.put(work_req)
310308

311-
run_info.created_requests += len(session)
312-
run_info.queued_requests += len(session)
313-
added_count += len(session)
309+
run_info.created_requests += 1
310+
run_info.queued_requests += 1
311+
added_count += 1
314312
except StopIteration:
315313
# we've reached the limit number, limit time, or exhausted the requests
316314
# set to None to stop adding more and tell the loop no more requests

src/guidellm/scheduler/worker.py

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ async def resolve_scheduler_request(
123123
results_queue: Queue[WorkerProcessResult[RequestT, ResponseT]],
124124
process_id: int,
125125
) -> WorkerProcessRequest[RequestT, ResponseT]:
126-
request = process_request.session.get_next_request()
126+
request = process_request.request
127127
timeout_time = process_request.timeout_time
128128
queued_time = process_request.queued_time
129129

@@ -170,22 +170,19 @@ async def resolve_scheduler_request(
170170
)
171171
asyncio.create_task(self.send_result(results_queue, result))
172172

173-
process_request.session.push_response(response)
174173
return process_request
175174

176175
def process_loop_asynchronous(
177176
self,
178177
queues: MPQueues[RequestT, ResponseT],
179178
strategy: SchedulingStrategy,
180179
stop_event: Event,
181-
prioritize_sessions: bool,
182180
max_concurrency: int,
183181
process_id: int,
184182
num_processes: int,
185183
):
186184
async def _process_runner():
187185
lock = asyncio.Semaphore(max_concurrency)
188-
pending_requests: list[WorkerProcessRequest[RequestT, ResponseT]] = []
189186
times_iter = islice(
190187
strategy.request_times(),
191188
process_id,
@@ -202,50 +199,18 @@ async def _process_runner():
202199
await asyncio.sleep(start_time - time.time() - 1)
203200
await lock.acquire()
204201

205-
process_request = None
206202
try:
207-
process_request = (
208-
pending_requests.pop()
209-
if pending_requests
210-
else queues.requests.get_nowait()
211-
)
203+
process_request = queues.requests.get_nowait()
212204
dequeued_time = time.time()
213205
except QueueEmpty:
214206
lock.release()
215207
continue
216208

217-
async def wait_then_requeue(
218-
process_request: WorkerProcessRequest[RequestT, ResponseT],
219-
):
220-
# Wait to requeue the request session if it specifies a delay
221-
if delay := process_request.session.get_next_delay():
222-
await asyncio.sleep(delay)
223-
224-
# Push session to the stack
225-
process_request.queued_time = time.time()
226-
pending_requests.append(process_request)
227-
if prioritize_sessions:
228-
# Release the lock with the session on top of the stack
229-
lock.release()
230-
231209
def _request_callback(
232-
future: asyncio.Future[WorkerProcessRequest[RequestT, ResponseT]],
210+
_: asyncio.Future[WorkerProcessRequest[RequestT, ResponseT]],
233211
):
234-
# If we are prioritizing sessions, hold
235-
# the lock until the session is done
236212
nonlocal lock
237-
if not prioritize_sessions:
238-
lock.release()
239-
240-
try:
241-
process_request = future.result()
242-
except asyncio.CancelledError:
243-
return
244-
if not process_request.session.complete:
245-
asyncio.create_task(wait_then_requeue(process_request))
246-
elif prioritize_sessions:
247-
# no more requests in this session, release the lock
248-
lock.release()
213+
lock.release()
249214

250215
task = asyncio.create_task(
251216
self.resolve_scheduler_request(
@@ -319,7 +284,6 @@ def process_loop_asynchronous(
319284
queues: MPQueues[GenerationRequest, ResponseSummary],
320285
strategy: SchedulingStrategy,
321286
stop_event: Event,
322-
prioritize_sessions: bool,
323287
max_concurrency: int,
324288
process_id: int,
325289
num_processes: int,
@@ -329,7 +293,6 @@ def process_loop_asynchronous(
329293
queues=queues,
330294
strategy=strategy,
331295
stop_event=stop_event,
332-
prioritize_sessions=prioritize_sessions,
333296
max_concurrency=max_concurrency,
334297
process_id=process_id,
335298
num_processes=num_processes,

0 commit comments

Comments
 (0)