Skip to content

Commit e5cab71

Browse files
authored
[Frontend] Add --logprobs argument to benchmark_serving.py (vllm-project#8191)
1 parent baa5467 commit e5cab71

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

benchmarks/backend_request_func.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class RequestFuncInput:
2424
model: str
2525
best_of: int = 1
2626
use_beam_search: bool = False
27+
logprobs: Optional[int] = None
2728

2829

2930
@dataclass
@@ -236,6 +237,7 @@ async def async_request_openai_completions(
236237
"temperature": 0.0,
237238
"best_of": request_func_input.best_of,
238239
"max_tokens": request_func_input.output_len,
240+
"logprobs": request_func_input.logprobs,
239241
"stream": True,
240242
}
241243
headers = {

benchmarks/benchmark_serving.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ async def benchmark(
318318
model_id: str,
319319
tokenizer: PreTrainedTokenizerBase,
320320
input_requests: List[Tuple[str, int, int]],
321+
logprobs: Optional[int],
321322
best_of: int,
322323
use_beam_search: bool,
323324
request_rate: float,
@@ -339,6 +340,7 @@ async def benchmark(
339340
api_url=api_url,
340341
prompt_len=test_prompt_len,
341342
output_len=test_output_len,
343+
logprobs=logprobs,
342344
best_of=best_of,
343345
use_beam_search=use_beam_search,
344346
)
@@ -358,6 +360,7 @@ async def benchmark(
358360
api_url=base_url + "/start_profile",
359361
prompt_len=test_prompt_len,
360362
output_len=test_output_len,
363+
logprobs=logprobs,
361364
best_of=best_of,
362365
use_beam_search=use_beam_search,
363366
)
@@ -379,6 +382,7 @@ async def benchmark(
379382
api_url=api_url,
380383
prompt_len=prompt_len,
381384
output_len=output_len,
385+
logprobs=logprobs,
382386
best_of=best_of,
383387
use_beam_search=use_beam_search,
384388
)
@@ -396,6 +400,7 @@ async def benchmark(
396400
api_url=base_url + "/stop_profile",
397401
prompt_len=test_prompt_len,
398402
output_len=test_output_len,
403+
logprobs=logprobs,
399404
best_of=best_of,
400405
use_beam_search=use_beam_search,
401406
)
@@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
580585
model_id=model_id,
581586
tokenizer=tokenizer,
582587
input_requests=input_requests,
588+
logprobs=args.logprobs,
583589
best_of=args.best_of,
584590
use_beam_search=args.use_beam_search,
585591
request_rate=args.request_rate,
@@ -721,6 +727,16 @@ def main(args: argparse.Namespace):
721727
help=
722728
"Number of output tokens per request, used only for sonnet dataset.",
723729
)
730+
parser.add_argument(
731+
"--logprobs",
732+
type=int,
733+
default=None,
734+
help=("Number of logprobs-per-token to compute & return as part of "
735+
"the request. If unspecified, then either (1) if beam search "
736+
"is disabled, no logprobs are computed & a single dummy "
737+
"logprob is returned for each token; or (2) if beam search "
738+
"is enabled 1 logprob per token is computed"),
739+
)
724740
parser.add_argument(
725741
"--sonnet-prefix-len",
726742
type=int,

tests/multi_step/test_correctness_llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_multi_step_llm(
5757
GPU -> CPU output transfer
5858
num_prompts: number of example prompts under test
5959
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
60-
completions endpoint; `None` -> no logprobs
60+
completions endpoint; `None` -> 1 logprob returned.
6161
"""
6262

6363
prompts = example_prompts

0 commit comments

Comments
 (0)