Parametrize use_token_latencies

wangshangsam · wangshangsam · commit faa5d8bd462e · 2025-11-07T03:31:18.000-05:00
diff --git a/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/cli.py b/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/cli.py
@@ -112,6 +112,13 @@ class TestSettings(BaseModel):
         ),
     ] = timedelta(seconds=5)
 
+    use_token_latencies: Annotated[
+        bool,
+        Field(
+            description="When set to True, LoadGen will track TTFT and TPOT.",
+        ),
+    ] = True
+
     @field_validator("min_duration", mode="before")
     @classmethod
     def parse_min_duration(cls, value: timedelta | float | str) -> timedelta | str:
@@ -136,9 +143,8 @@ def to_lgtype(self) -> lg.TestSettings:
         settings.scenario = self.scenario.to_lgtype()
         settings.mode = self.mode.to_lgtype()
         settings.offline_expected_qps = self.offline_expected_qps
-        settings.min_duration_ms = round(
-            self.min_duration.total_seconds() * 1000)
-        settings.use_token_latencies = True
+        settings.min_duration_ms = round(self.min_duration.total_seconds() * 1000)
+        settings.use_token_latencies = self.use_token_latencies
         return settings
 
 
@@ -222,9 +228,7 @@ def main(
     logger.info("Running VL2L benchmark with settings: {}", settings)
     logger.info("Running VL2L benchmark with model: {}", model)
     logger.info("Running VL2L benchmark with dataset: {}", dataset)
-    logger.info(
-        "Running VL2L benchmark with OpenAI API endpoint: {}",
-        endpoint)
+    logger.info("Running VL2L benchmark with OpenAI API endpoint: {}", endpoint)
     logger.info("Running VL2L benchmark with random seed: {}", random_seed)
     lg_settings = settings.to_lgtype()
     task = ShopifyGlobalCatalogue(