Skip to content

Commit ec4f211

Browse files
committed
feat: add total_token_throughput metric
1 parent 669fb62 commit ec4f211

File tree

3 files changed

+125
-10
lines changed

3 files changed

+125
-10
lines changed

src/aiperf/metrics/types/prefill_throughput.py renamed to src/aiperf/metrics/types/prefill_throughput_per_user.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@
1010
from aiperf.metrics.types.ttft_metric import TTFTMetric
1111

1212

13-
class PrefillThroughputMetric(BaseRecordMetric[float]):
13+
class PrefillThroughputPerUserMetric(BaseRecordMetric[float]):
1414
"""
15-
Post-processor for calculating Prefill Throughput metrics from records. This is only applicable to streaming responses.
15+
Post-processor for calculating Prefill Throughput Per User metrics from records. This is only applicable to streaming responses.
1616
1717
Formula:
18-
Prefill Throughput = Prefill Sequence Length / Time to First Token (seconds)
18+
Prefill Throughput Per User = Prefill Sequence Length / Time to First Token (seconds)
1919
"""
2020

21-
tag = "prefill_throughput"
22-
header = "Prefill Throughput"
23-
short_header = "Prefill TPS"
21+
tag = "prefill_throughput_per_user"
22+
header = "Prefill Throughput Per User"
23+
short_header = "Prefill TPS/User"
2424
short_header_hide_unit = True
25-
unit = MetricOverTimeUnit.TOKENS_PER_SECOND
25+
unit = MetricOverTimeUnit.TOKENS_PER_SECOND_PER_USER
2626
flags = (
27-
MetricFlags.STREAMING_ONLY
27+
MetricFlags.STREAMING_TOKENS_ONLY
2828
| MetricFlags.TOKENIZES_INPUT_ONLY
2929
| MetricFlags.LARGER_IS_BETTER
3030
| MetricFlags.NO_CONSOLE
@@ -39,7 +39,7 @@ def _parse_record(
3939
record: ParsedResponseRecord,
4040
record_metrics: MetricRecordDict,
4141
) -> float:
42-
"""This method calculates the prefill throughput by dividing the input sequence length by the TTFT."""
42+
"""This method calculates the prefill throughput per user by dividing the input sequence length by the TTFT."""
4343

4444
isl = record_metrics.get_or_raise(InputSequenceLengthMetric)
4545
converted_ttft = record_metrics.get_converted_or_raise(
@@ -48,6 +48,6 @@ def _parse_record(
4848
)
4949
if converted_ttft == 0:
5050
raise NoMetricValue(
51-
"TTFT is zero, cannot calculate prefill throughput metric"
51+
"TTFT is zero, cannot calculate prefill throughput per user metric"
5252
)
5353
return isl / converted_ttft # type: ignore
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from aiperf.common.enums import MetricFlags, MetricOverTimeUnit
5+
from aiperf.common.exceptions import NoMetricValue
6+
from aiperf.metrics import BaseDerivedMetric
7+
from aiperf.metrics.metric_dicts import MetricResultsDict
8+
from aiperf.metrics.types.benchmark_duration_metric import BenchmarkDurationMetric
9+
from aiperf.metrics.types.input_sequence_length_metric import (
10+
TotalInputSequenceLengthMetric,
11+
)
12+
from aiperf.metrics.types.output_sequence_length_metric import (
13+
TotalOutputSequenceLengthMetric,
14+
)
15+
16+
17+
class TotalTokenThroughputMetric(BaseDerivedMetric[float]):
18+
"""
19+
Post Processor for calculating Total Token Throughput Metric.
20+
21+
Formula:
22+
Total Token Throughput = (Total Input Tokens + Total Output Tokens) / Benchmark Duration (seconds)
23+
"""
24+
25+
tag = "total_token_throughput"
26+
header = "Total Token Throughput"
27+
short_header = "Total TPS"
28+
short_header_hide_unit = True
29+
unit = MetricOverTimeUnit.TOKENS_PER_SECOND
30+
flags = (
31+
MetricFlags.PRODUCES_TOKENS_ONLY
32+
| MetricFlags.LARGER_IS_BETTER
33+
| MetricFlags.NO_CONSOLE
34+
)
35+
required_metrics = {
36+
TotalInputSequenceLengthMetric.tag,
37+
TotalOutputSequenceLengthMetric.tag,
38+
BenchmarkDurationMetric.tag,
39+
}
40+
41+
def _derive_value(
42+
self,
43+
metric_results: MetricResultsDict,
44+
) -> float:
45+
total_input_tokens = metric_results.get_or_raise(TotalInputSequenceLengthMetric)
46+
total_output_tokens = metric_results.get_or_raise(
47+
TotalOutputSequenceLengthMetric
48+
)
49+
benchmark_duration_converted = metric_results.get_converted_or_raise(
50+
BenchmarkDurationMetric,
51+
self.unit.time_unit, # type: ignore
52+
)
53+
if benchmark_duration_converted == 0:
54+
raise NoMetricValue(
55+
"Benchmark duration is zero, cannot calculate total token throughput metric"
56+
)
57+
return (total_input_tokens + total_output_tokens) / benchmark_duration_converted # type: ignore
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import pytest
5+
6+
from aiperf.common.constants import NANOS_PER_SECOND
7+
from aiperf.common.exceptions import NoMetricValue
8+
from aiperf.metrics.metric_dicts import MetricResultsDict
9+
from aiperf.metrics.types.benchmark_duration_metric import BenchmarkDurationMetric
10+
from aiperf.metrics.types.input_sequence_length_metric import (
11+
TotalInputSequenceLengthMetric,
12+
)
13+
from aiperf.metrics.types.output_sequence_length_metric import (
14+
TotalOutputSequenceLengthMetric,
15+
)
16+
from aiperf.metrics.types.total_token_throughput import TotalTokenThroughputMetric
17+
18+
19+
class TestTotalTokenThroughputMetric:
20+
@pytest.mark.parametrize(
21+
"input_tokens,output_tokens,duration,expected",
22+
[
23+
(600, 400, 2, 500.0), # basic: (600+400) / 2s
24+
(500, 250, 1.5, 500.0), # fractional duration: (500+250) / 1.5s
25+
(0, 0, 1, 0.0), # zero tokens
26+
(1000, 0, 1, 1000.0), # only input tokens
27+
(0, 1000, 1, 1000.0), # only output tokens
28+
(1, 1, 1, 2.0), # minimal tokens
29+
(500_000, 500_000, 1, 1_000_000.0), # large token counts
30+
(50, 50, 0.1, 1000.0), # small duration: (50+50) / 0.1s
31+
],
32+
) # fmt: skip
33+
def test_total_token_throughput_calculation(
34+
self, input_tokens: int, output_tokens: int, duration: float, expected: float
35+
):
36+
"""Test throughput calculation: (input_tokens + output_tokens) / duration"""
37+
metric = TotalTokenThroughputMetric()
38+
39+
metric_results = MetricResultsDict()
40+
metric_results[TotalInputSequenceLengthMetric.tag] = input_tokens
41+
metric_results[TotalOutputSequenceLengthMetric.tag] = output_tokens
42+
metric_results[BenchmarkDurationMetric.tag] = duration * NANOS_PER_SECOND
43+
44+
result = metric.derive_value(metric_results)
45+
assert result == pytest.approx(expected)
46+
47+
@pytest.mark.parametrize("duration", [0, 0.0, None])
48+
def test_total_token_throughput_invalid_duration_raises(self, duration: float):
49+
"""Test error when benchmark duration is zero or None"""
50+
metric = TotalTokenThroughputMetric()
51+
52+
metric_results = MetricResultsDict()
53+
metric_results[TotalInputSequenceLengthMetric.tag] = 600
54+
metric_results[TotalOutputSequenceLengthMetric.tag] = 400
55+
metric_results[BenchmarkDurationMetric.tag] = duration * NANOS_PER_SECOND
56+
57+
with pytest.raises(NoMetricValue):
58+
metric.derive_value(metric_results)

0 commit comments

Comments
 (0)