@@ -318,6 +318,7 @@ async def benchmark(
318
318
model_id : str ,
319
319
tokenizer : PreTrainedTokenizerBase ,
320
320
input_requests : List [Tuple [str , int , int ]],
321
+ logprobs : Optional [int ],
321
322
best_of : int ,
322
323
use_beam_search : bool ,
323
324
request_rate : float ,
@@ -339,6 +340,7 @@ async def benchmark(
339
340
api_url = api_url ,
340
341
prompt_len = test_prompt_len ,
341
342
output_len = test_output_len ,
343
+ logprobs = logprobs ,
342
344
best_of = best_of ,
343
345
use_beam_search = use_beam_search ,
344
346
)
@@ -358,6 +360,7 @@ async def benchmark(
358
360
api_url = base_url + "/start_profile" ,
359
361
prompt_len = test_prompt_len ,
360
362
output_len = test_output_len ,
363
+ logprobs = logprobs ,
361
364
best_of = best_of ,
362
365
use_beam_search = use_beam_search ,
363
366
)
@@ -379,6 +382,7 @@ async def benchmark(
379
382
api_url = api_url ,
380
383
prompt_len = prompt_len ,
381
384
output_len = output_len ,
385
+ logprobs = logprobs ,
382
386
best_of = best_of ,
383
387
use_beam_search = use_beam_search ,
384
388
)
@@ -396,6 +400,7 @@ async def benchmark(
396
400
api_url = base_url + "/stop_profile" ,
397
401
prompt_len = test_prompt_len ,
398
402
output_len = test_output_len ,
403
+ logprobs = logprobs ,
399
404
best_of = best_of ,
400
405
use_beam_search = use_beam_search ,
401
406
)
@@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
580
585
model_id = model_id ,
581
586
tokenizer = tokenizer ,
582
587
input_requests = input_requests ,
588
+ logprobs = args .logprobs ,
583
589
best_of = args .best_of ,
584
590
use_beam_search = args .use_beam_search ,
585
591
request_rate = args .request_rate ,
@@ -721,6 +727,16 @@ def main(args: argparse.Namespace):
721
727
help =
722
728
"Number of output tokens per request, used only for sonnet dataset." ,
723
729
)
730
+ parser .add_argument (
731
+ "--logprobs" ,
732
+ type = int ,
733
+ default = None ,
734
+ help = ("Number of logprobs-per-token to compute & return as part of "
735
+ "the request. If unspecified, then either (1) if beam search "
736
+ "is disabled, no logprobs are computed & a single dummy "
737
+ "logprob is returned for each token; or (2) if beam search "
738
+ "is enabled 1 logprob per token is computed" ),
739
+ )
724
740
parser .add_argument (
725
741
"--sonnet-prefix-len" ,
726
742
type = int ,
0 commit comments