Skip to content

Commit 533db09

Browse files
authored
[benchmark] add max-concurrency in result table (#21095)
Signed-off-by: Peter Pan <[email protected]>
1 parent fc91da5 commit 533db09

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

benchmarks/benchmark_serving.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,10 @@ async def limited_request_func(request_func_input, pbar):
413413

414414
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
415415
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
416+
if max_concurrency is not None:
417+
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
418+
if request_rate != float("inf"):
419+
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
416420
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
417421
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
418422
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))

benchmarks/benchmark_serving_structured_output.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,10 @@ async def limited_request_func(request_func_input, pbar):
555555

556556
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
557557
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
558+
if max_concurrency is not None:
559+
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
560+
if request_rate != float("inf"):
561+
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
558562
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
559563
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
560564
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))

vllm/benchmarks/serve.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,12 @@ async def limited_request_func(request_func_input, pbar):
486486

487487
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
488488
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
489+
if max_concurrency is not None:
490+
print("{:<40} {:<10}".format("Maximum request concurrency:",
491+
max_concurrency))
492+
if request_rate != float('inf'):
493+
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):",
494+
request_rate ))
489495
print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
490496
benchmark_duration))
491497
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))

0 commit comments

Comments
 (0)