Skip to content

Commit 990503c

Browse files
committed
offline WIP
1 parent a240d7c commit 990503c

File tree

7 files changed

+931
-14
lines changed

7 files changed

+931
-14
lines changed

multimodal/vl2l/README.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ on how to install Miniconda on your host machine. Then, you can create a new con
1919
environment via:
2020

2121
```bash
22-
conda create -n mlperf-inf-mm-vl2l python=3.14
22+
conda create -n mlperf-inf-mm-vl2l python=3.13
2323
```
2424

2525
### Install LoadGen
@@ -47,5 +47,29 @@ cd ../
4747
Run a quick test to validate that LoadGen was installed correctly:
4848

4949
```bash
50-
python loadgen/
50+
python loadgen/demos/token_metrics/py_demo_server.py
51+
```
52+
53+
### Install VL2L Benchmark CLI
54+
55+
For users, install `mlperf-inf-mm-vl2l` with:
56+
57+
```bash
58+
pip install multimodal/vl2l/
59+
```
60+
61+
For developers, install `mlperf-inf-mm-vl2l` and the development tools with:
62+
63+
```bash
64+
pip install multimodal/vl2l/[dev]
65+
```
66+
67+
## Developer Guide
68+
69+
### Linting
70+
71+
You can lint the VL2L benchmark source code by running the following script:
72+
73+
```bash
74+
bash multimodal/vl2l/scripts/linters.sh
5175
```

multimodal/vl2l/notebooks/shopify-global-catalogue-isl-osl.ipynb

Lines changed: 348 additions & 0 deletions
Large diffs are not rendered by default.

multimodal/vl2l/pyproject.toml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,21 @@ classifiers = [
88
"Intended Audience :: Developers",
99
"Operating System :: POSIX :: Linux",
1010
]
11-
requires-python = ">=3.14"
12-
dependencies = ["typer", "loguru", "pydantic", "mlcommons_loadgen", "pydantic-typer"]
11+
requires-python = ">=3.13"
12+
dependencies = [
13+
"datasets",
14+
"loguru",
15+
"matplotlib",
16+
"mlcommons_loadgen",
17+
"openai[aiohttp]",
18+
"pydantic",
19+
"pydantic-typer",
20+
"typer",
21+
]
1322
dynamic = ["version"]
1423

1524
[project.optional-dependencies]
16-
dev = ["mypy", "pytest"]
25+
dev = ["black", "ruff", "mypy", "shellcheck-py", "pytest"]
1726

1827
[project.scripts]
1928
mlperf-inf-mm-vl2l = "mlperf_inference_multimodal_vl2l.cli:app"
@@ -47,3 +56,11 @@ convention = "google"
4756
[tool.mypy]
4857
check_untyped_defs = true
4958
plugins = ['pydantic.mypy']
59+
60+
[[tool.mypy.overrides]]
61+
module = "pydantic_typer"
62+
ignore_missing_imports = true
63+
64+
[[tool.mypy.overrides]]
65+
module = "mlperf_loadgen"
66+
ignore_missing_imports = true

multimodal/vl2l/scripts/linters.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
set -eux
4+
set -o pipefail
5+
6+
PROJECT_ROOT=$(dirname "${BASH_SOURCE[0]}")/../
7+
PROJECT_ROOT=$(realpath "${PROJECT_ROOT}")
8+
9+
function _exit_with_help_msg() {
10+
cat <<EOF
11+
Run linters for "${PROJECT_ROOT}".
12+
13+
Usage: ${BASH_SOURCE[0]}
14+
[-h | --help] Print this help message.
15+
EOF
16+
if [ -n "$1" ]; then
17+
echo "$(tput bold setab 1)$1$(tput sgr0)"
18+
fi
19+
exit "$2"
20+
}
21+
22+
while [[ $# -gt 0 ]]; do
23+
case $1 in
24+
-h | --help)
25+
_exit_with_help_msg "" 0
26+
;;
27+
*)
28+
_exit_with_help_msg "[ERROR] Unknown option: $1" 1
29+
;;
30+
esac
31+
done
32+
33+
echo "Running ruff..."
34+
ruff check --fix "${PROJECT_ROOT}"/src/
35+
36+
echo "Running mypy..."
37+
mypy --config-file="${PROJECT_ROOT}"/pyproject.toml --install-types "${PROJECT_ROOT}"/src/
38+
39+
echo "Running shellcheck..."
40+
find "${PROJECT_ROOT}" -type f -name "*.sh" -exec shellcheck -ax {} +
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
"""Reference Implementation for the Vision-language-to-language (VL2L) Benchmark"""
1+
"""Reference Implementation for the Vision-language-to-language (VL2L) Benchmark."""
22

33
from __future__ import annotations
4+
5+
import contextlib
46
from importlib.metadata import PackageNotFoundError, version
57

6-
try:
8+
with contextlib.suppress(PackageNotFoundError):
79
__version__ = version("mlperf-inference-multimodal-vl2l")
8-
except PackageNotFoundError:
9-
pass

multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/cli.py

Lines changed: 209 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,216 @@
22

33
from __future__ import annotations
44

5-
import typer
5+
from datetime import timedelta
6+
from enum import StrEnum, auto
7+
from typing import Annotated
68

7-
app = typer.Typer()
9+
import mlperf_loadgen as lg
10+
from loguru import logger
11+
from openai import AsyncOpenAI, DefaultAioHttpClient
12+
from pydantic import BaseModel, Field
13+
from pydantic_typer import Typer
14+
15+
from .task import MMMU, ShopifyGlobalCatalogue, Task
16+
17+
app = Typer()
18+
19+
20+
class TestScenario(StrEnum):
21+
"""The test scenario for the MLPerf inference LoadGen."""
22+
23+
SERVER = auto()
24+
"""Run the benchmark in server/interactive scenario."""
25+
26+
OFFLINE = auto()
27+
"""Run the benchmark in offline/batch scenario."""
28+
29+
class UnknownValueError(ValueError):
30+
"""The exception raised when an unknown test scenario is encountered."""
31+
32+
def __init__(self, test_scenario: TestScenario) -> None:
33+
"""Initialize the exception."""
34+
super().__init__(f"Unknown test scenario: {test_scenario}")
35+
36+
def to_lgtype(self) -> lg.TestScenario:
37+
"""Convert the test scenario to its corresponding LoadGen type."""
38+
match self:
39+
case TestScenario.SERVER:
40+
return lg.TestScenario.Server
41+
case TestScenario.OFFLINE:
42+
return lg.TestScenario.Offline
43+
case _:
44+
raise TestScenario.UnknownValueError(self)
45+
46+
47+
class TestMode(StrEnum):
48+
"""The test mode for the MLPerf inference LoadGen."""
49+
50+
PERFORMANCE_ONLY = auto()
51+
"""Run the benchmark to evaluate performance."""
52+
53+
ACCURACY_ONLY = auto()
54+
"""Run the benchmark to evaluate model quality."""
55+
56+
class UnknownValueError(ValueError):
57+
"""The exception raised when an unknown test mode is encountered."""
58+
59+
def __init__(self, test_mode: TestMode) -> None:
60+
"""Initialize the exception."""
61+
super().__init__(f"Unknown test mode: {test_mode}")
62+
63+
def to_lgtype(self) -> lg.TestMode:
64+
"""Convert the test mode to its corresponding LoadGen type."""
65+
match self:
66+
case TestMode.PERFORMANCE_ONLY:
67+
return lg.TestMode.PerformanceOnly
68+
case TestMode.ACCURACY_ONLY:
69+
return lg.TestMode.AccuracyOnly
70+
case _:
71+
raise TestMode.UnknownValueError(self)
72+
73+
74+
class TestSettings(BaseModel):
75+
"""The test settings for the MLPerf inference LoadGen."""
76+
77+
senario: Annotated[
78+
TestScenario,
79+
Field(
80+
description=(
81+
"The MLPerf inference benchmarking scenario to run the benchmark in."
82+
),
83+
),
84+
] = TestScenario.OFFLINE
85+
86+
mode: Annotated[
87+
TestMode,
88+
Field(
89+
description=(
90+
"Whether you want to run the benchmark for performance or accuracy."
91+
),
92+
),
93+
] = TestMode.PERFORMANCE_ONLY
94+
95+
offline_expected_qps: Annotated[
96+
float,
97+
Field(
98+
description="The expected QPS for the offline scenario.",
99+
),
100+
] = 10
101+
102+
min_duration: Annotated[
103+
timedelta,
104+
Field(
105+
description="The minimum testing duration.",
106+
),
107+
] = timedelta(seconds=5)
108+
109+
def to_lgtype(self) -> lg.TestSettings:
110+
"""Convert the test settings to its corresponding LoadGen type."""
111+
settings = lg.TestSettings()
112+
settings.scenario = self.senario.to_lgtype()
113+
settings.mode = self.mode.to_lgtype()
114+
settings.offline_expected_qps = self.offline_expected_qps
115+
settings.min_duration_ms = round(self.min_duration.total_seconds() * 1000)
116+
settings.use_token_latencies = True
117+
return settings
118+
119+
120+
class Model(BaseModel):
121+
"""Specifies the model to use for the VL2L benchmark."""
122+
123+
repo_id: Annotated[
124+
str,
125+
Field(description="The HuggingFace repository ID of the model."),
126+
] = "Qwen/Qwen3-VL-30B-A3B-Instruct"
127+
128+
129+
class Dataset(BaseModel):
130+
"""Specifies a dataset on HuggingFace."""
131+
132+
class Task(StrEnum):
133+
"""The task for the VL2L benchmark."""
134+
135+
SHOPIFY_GLOBAL_CATALOG = auto()
136+
MMMU = auto()
137+
138+
class UnknownTaskError(ValueError):
139+
"""The exception raised when an unknown task is encountered."""
140+
141+
def __init__(self, task: Dataset.Task) -> None:
142+
"""Initialize the exception."""
143+
super().__init__(f"Unknown task: {task}")
144+
145+
task: Annotated[
146+
Dataset.Task | None,
147+
Field(
148+
description=(
149+
"The vision-language-to-language task to run the benchmark for. If not "
150+
"specified, the task will be derived from the HuggingFace repository ID"
151+
" of the dataset."
152+
),
153+
),
154+
] = None
155+
156+
repo_id: Annotated[
157+
str,
158+
Field(description="The HuggingFace repository ID of the dataset."),
159+
] = "Shopify/the-catalogue-public-beta"
160+
161+
token: Annotated[
162+
str | None,
163+
Field(
164+
description=(
165+
"The token to access the HuggingFace repository of the dataset."
166+
),
167+
),
168+
] = None
169+
170+
171+
def create_task(dataset: Dataset, model: Model, openai_api_client: AsyncOpenAI) -> Task:
172+
"""Convert the dataset configuration to its corresponding task."""
173+
match dataset.task:
174+
case Dataset.Task.MMMU:
175+
return MMMU(dataset, model, openai_api_client)
176+
case Dataset.Task.SHOPIFY_GLOBAL_CATALOG:
177+
return ShopifyGlobalCatalogue(dataset, model, openai_api_client)
178+
case None:
179+
match dataset.repo_id:
180+
case "MMMU/MMMU":
181+
return MMMU(dataset, model, openai_api_client)
182+
case "Shopify/the-catalogue-public-beta":
183+
return ShopifyGlobalCatalogue(dataset, model, openai_api_client)
184+
case _:
185+
raise Dataset.UnknownTaskError(dataset.task)
186+
case _:
187+
raise Dataset.UnknownTaskError(dataset.task)
8188

9189

10190
@app.command()
11-
def main():
12-
"""VL2L benchmark CLI"""
13-
typer.echo("Hello, World!")
191+
def main(
192+
*,
193+
settings: TestSettings,
194+
model: Model,
195+
dataset: Dataset,
196+
endpoint: str = "http://localhost:8000/v1",
197+
openai_api_key: str = "",
198+
) -> None:
199+
"""Main CLI for running the VL2L benchmark."""
200+
logger.info("Running VL2L benchmark with settings: {}", settings)
201+
logger.info("Running VL2L benchmark with dataset: {}", dataset)
202+
logger.info("Running VL2L benchmark with endpoint: {}", endpoint)
203+
lg_settings = settings.to_lgtype()
204+
task = create_task(
205+
dataset,
206+
model,
207+
AsyncOpenAI(
208+
base_url=endpoint,
209+
http_client=DefaultAioHttpClient(),
210+
api_key=openai_api_key,
211+
),
212+
)
213+
sut = task.construct_sut()
214+
qsl = task.construct_qsl()
215+
lg.StartTest(sut, qsl, lg_settings)
216+
lg.DestroyQSL(qsl)
217+
lg.DestroySUT(sut)

0 commit comments

Comments
 (0)