Skip to content

Commit 754207e

Browse files
committed
clean-up
1 parent e4f5a7e commit 754207e

File tree

6 files changed

+541
-179
lines changed

6 files changed

+541
-179
lines changed

multimodal/vl2l/README.md

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## Quick Start
44

5+
### Get the source code
6+
57
Clone the MLPerf Inference repo via:
68

79
```bash
@@ -14,6 +16,8 @@ Then enter the repo:
1416
cd mlperf-inference/
1517
```
1618

19+
### Create a Conda environment
20+
1721
Follow [this link](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions)
1822
on how to install Miniconda on your host machine. Then, you can create a new conda
1923
environment via:
@@ -50,7 +54,7 @@ Run a quick test to validate that LoadGen was installed correctly:
5054
python loadgen/demos/token_metrics/py_demo_server.py
5155
```
5256

53-
### Install VL2L Benchmark CLI
57+
### Install the VL2L benchmarking CLI
5458

5559
For users, install `mlperf-inf-mm-vl2l` with:
5660

@@ -64,6 +68,57 @@ For developers, install `mlperf-inf-mm-vl2l` and the development tools with:
6468
pip install multimodal/vl2l/[dev]
6569
```
6670

71+
After installation, you can check the CLI flags that `mlperf-inf-mm-vl2l` can take with:
72+
73+
```bash
74+
mlperf-inf-mm-vl2l --help
75+
```
76+
77+
You can enable shell autocompletion for `mlperf-inf-mm-vl2l` with:
78+
79+
```bash
80+
mlperf-inf-mm-vl2l --install-completion
81+
```
82+
83+
> NOTE: Shell auto-completion will take effect once you restart the terminal.
84+
85+
### Start an inference endpoint on your local host machine with vLLM
86+
87+
Please refer to [this guide on how to launch vLLM for various Qwen3 VL MoE models](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3-VL.html).
88+
89+
```bash
90+
docker run --gpus all \ # Use all the GPUs on this host machine.
91+
-v ~/.cache/huggingface:/root/.cache/huggingface \ # Use the HuggingFace cache from your host machine.
92+
-p 8000:8000 \ # This assumes the endpoint will use port 8000.
93+
--ipc=host \ # The container can access and utilize the host's IPC mechanisms (e.g., shared memory).
94+
vllm/vllm-openai:nightly \ # You can also use the `:latest` container or a specific release.
95+
--model Qwen/Qwen3-VL-235B-A22B-Instruct \ # Specifies the model for vLLM to deploy.
96+
--tensor-parallel-size 8 \ # 8-way tensor-parallel inference across 8 GPUs.
97+
--limit-mm-per-prompt.video 0 # The input requests will contain images only (i.e., no videos).
98+
```
99+
100+
### Run the benchmark for the Offline scenario
101+
102+
Performance only mode:
103+
104+
```bash
105+
mlperf-inf-mm-vl2l --settings.senario offline --settings.mode performance_only
106+
```
107+
108+
Accuracy only mode:
109+
110+
TBD
111+
112+
### Run the benchmark for the Server scenario
113+
114+
Performance only mode:
115+
116+
TBD
117+
118+
Accuracy only mode:
119+
120+
TBD
121+
67122
## Developer Guide
68123

69124
### Linting

multimodal/vl2l/notebooks/shopify-global-catalogue.ipynb

Lines changed: 327 additions & 80 deletions
Large diffs are not rendered by default.

multimodal/vl2l/pyproject.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies = [
1717
"openai[aiohttp]",
1818
"pydantic",
1919
"pydantic-typer",
20+
"pympler",
2021
"typer",
2122
]
2223
dynamic = ["version"]
@@ -50,6 +51,9 @@ lint.ignore = [
5051
"ANN003", # Missing type annotation for `**kwargs`
5152
]
5253

54+
[tool.ruff.lint.pylint]
55+
max-args = 10
56+
5357
[tool.ruff.lint.pydocstyle]
5458
convention = "google"
5559

@@ -64,3 +68,7 @@ ignore_missing_imports = true
6468
[[tool.mypy.overrides]]
6569
module = "mlperf_loadgen"
6670
ignore_missing_imports = true
71+
72+
[[tool.mypy.overrides]]
73+
module = "datasets"
74+
ignore_missing_imports = true

multimodal/vl2l/scripts/linters.sh

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,16 @@ echo "Running ruff..."
3434
ruff check --fix "${PROJECT_ROOT}"/src/
3535

3636
echo "Running mypy..."
37-
mypy --config-file="${PROJECT_ROOT}"/pyproject.toml --install-types "${PROJECT_ROOT}"/src/
37+
mypy --config-file="${PROJECT_ROOT}"/pyproject.toml \
38+
--install-types \
39+
"${PROJECT_ROOT}"/src/
3840

3941
echo "Running shellcheck..."
40-
find "${PROJECT_ROOT}" -type f -name "*.sh" -exec shellcheck -ax {} +
42+
find "${PROJECT_ROOT}" -type f -name "*.sh" -exec shellcheck -ax {} +
43+
44+
echo "Running trufflehog..."
45+
docker run --rm \
46+
-it \
47+
-v "${PROJECT_ROOT}":/to-scan \
48+
trufflesecurity/trufflehog:latest \
49+
filesystem /to-scan

multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/cli.py

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import sys
56
from datetime import timedelta
67
from enum import StrEnum, auto
78
from typing import Annotated
@@ -11,8 +12,9 @@
1112
from openai import AsyncOpenAI, DefaultAioHttpClient
1213
from pydantic import BaseModel, Field
1314
from pydantic_typer import Typer
15+
from typer import Option
1416

15-
from .task import MMMU, ShopifyGlobalCatalogue, Task
17+
from .task import ShopifyGlobalCatalogue
1618

1719
app = Typer()
1820

@@ -97,7 +99,7 @@ class TestSettings(BaseModel):
9799
Field(
98100
description="The expected QPS for the offline scenario.",
99101
),
100-
] = 10
102+
] = 100
101103

102104
min_duration: Annotated[
103105
timedelta,
@@ -112,8 +114,7 @@ def to_lgtype(self) -> lg.TestSettings:
112114
settings.scenario = self.senario.to_lgtype()
113115
settings.mode = self.mode.to_lgtype()
114116
settings.offline_expected_qps = self.offline_expected_qps
115-
settings.min_duration_ms = round(
116-
self.min_duration.total_seconds() * 1000)
117+
settings.min_duration_ms = round(self.min_duration.total_seconds() * 1000)
117118
settings.use_token_latencies = True
118119
return settings
119120

@@ -124,36 +125,12 @@ class Model(BaseModel):
124125
repo_id: Annotated[
125126
str,
126127
Field(description="The HuggingFace repository ID of the model."),
127-
] = "Qwen/Qwen3-VL-30B-A3B-Instruct"
128+
] = "Qwen/Qwen3-VL-235B-A22B-Instruct"
128129

129130

130131
class Dataset(BaseModel):
131132
"""Specifies a dataset on HuggingFace."""
132133

133-
class Task(StrEnum):
134-
"""The task for the VL2L benchmark."""
135-
136-
SHOPIFY_GLOBAL_CATALOG = auto()
137-
MMMU = auto()
138-
139-
class UnknownTaskError(ValueError):
140-
"""The exception raised when an unknown task is encountered."""
141-
142-
def __init__(self, task: Dataset.Task) -> None:
143-
"""Initialize the exception."""
144-
super().__init__(f"Unknown task: {task}")
145-
146-
task: Annotated[
147-
Dataset.Task | None,
148-
Field(
149-
description=(
150-
"The vision-language-to-language task to run the benchmark for. If not "
151-
"specified, the task will be derived from the HuggingFace repository ID"
152-
" of the dataset."
153-
),
154-
),
155-
] = None
156-
157134
repo_id: Annotated[
158135
str,
159136
Field(description="The HuggingFace repository ID of the dataset."),
@@ -169,25 +146,35 @@ def __init__(self, task: Dataset.Task) -> None:
169146
] = None
170147

171148

172-
def create_task(dataset: Dataset, model: Model,
173-
openai_api_client: AsyncOpenAI) -> Task:
174-
"""Convert the dataset configuration to its corresponding task."""
175-
match dataset.task:
176-
case Dataset.Task.MMMU:
177-
return MMMU(dataset, model, openai_api_client)
178-
case Dataset.Task.SHOPIFY_GLOBAL_CATALOG:
179-
return ShopifyGlobalCatalogue(dataset, model, openai_api_client)
180-
case None:
181-
match dataset.repo_id:
182-
case "MMMU/MMMU":
183-
return MMMU(dataset, model, openai_api_client)
184-
case "Shopify/the-catalogue-public-beta":
185-
return ShopifyGlobalCatalogue(
186-
dataset, model, openai_api_client)
187-
case _:
188-
raise Dataset.UnknownTaskError(dataset.task)
189-
case _:
190-
raise Dataset.UnknownTaskError(dataset.task)
149+
class Verbosity(StrEnum):
150+
"""The verbosity level of the logger."""
151+
152+
TRACE = auto()
153+
"""The trace verbosity level."""
154+
155+
DEBUG = auto()
156+
"""The debug verbosity level."""
157+
158+
INFO = auto()
159+
"""The info verbosity level (default)."""
160+
161+
162+
class Endpoint(BaseModel):
163+
"""Specifies the OpenAI API endpoint to use for the VL2L benchmark."""
164+
165+
url: Annotated[
166+
str,
167+
Field(
168+
description=(
169+
"The URL of the OpenAI API endpoint that the inference requests will be"
170+
" sent to."
171+
),
172+
),
173+
] = "http://localhost:8000/v1"
174+
api_key: Annotated[
175+
str,
176+
Field(description="The API key to authenticate the inference requests."),
177+
] = ""
191178

192179

193180
@app.command()
@@ -196,25 +183,39 @@ def main(
196183
settings: TestSettings,
197184
model: Model,
198185
dataset: Dataset,
199-
endpoint: str = "http://localhost:8000/v1",
200-
openai_api_key: str = "",
186+
endpoint: Endpoint,
187+
random_seed: Annotated[
188+
int,
189+
Option(help="The seed for the random number generator used by the benchmark."),
190+
] = 12345,
191+
verbosity: Annotated[
192+
Verbosity,
193+
Option(help="The verbosity level of the logger."),
194+
] = Verbosity.INFO,
201195
) -> None:
202196
"""Main CLI for running the VL2L benchmark."""
197+
logger.remove()
198+
logger.add(sys.stdout, level=verbosity.value.upper())
203199
logger.info("Running VL2L benchmark with settings: {}", settings)
200+
logger.info("Running VL2L benchmark with model: {}", model)
204201
logger.info("Running VL2L benchmark with dataset: {}", dataset)
205-
logger.info("Running VL2L benchmark with endpoint: {}", endpoint)
202+
logger.info("Running VL2L benchmark with OpenAI API endpoint: {}", endpoint)
203+
logger.info("Running VL2L benchmark with random seed: {}", random_seed)
206204
lg_settings = settings.to_lgtype()
207-
task = create_task(
208-
dataset,
209-
model,
210-
AsyncOpenAI(
211-
base_url=endpoint,
205+
task = ShopifyGlobalCatalogue(
206+
dataset_cli=dataset,
207+
model_cli=model,
208+
openai_api_client=AsyncOpenAI(
209+
base_url=endpoint.url,
212210
http_client=DefaultAioHttpClient(),
213-
api_key=openai_api_key,
211+
api_key=endpoint.api_key,
214212
),
213+
random_seed=random_seed,
215214
)
216215
sut = task.construct_sut()
217216
qsl = task.construct_qsl()
217+
logger.info("Starting the VL2L benchmark with LoadGen...")
218218
lg.StartTest(sut, qsl, lg_settings)
219+
logger.info("The VL2L benchmark with LoadGen completed.")
219220
lg.DestroyQSL(qsl)
220221
lg.DestroySUT(sut)

0 commit comments

Comments
 (0)