Skip to content

Commit 751e965

Browse files
authored
feat: add quick run mode (#941)
* Set config per run mode * Set cool down duration * Update rapid mode * Hide integrationTestRun from UI
1 parent 6ae5c77 commit 751e965

17 files changed

+353
-190
lines changed

docs/build-and-run.md

+1-5
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,7 @@ flutter run
4242
If you want to run or debug the Flutter app for any platform using graphical user interface,
4343
you can use [VS Code with Flutter extension](https://docs.flutter.dev/get-started/editor?tab=vscode).
4444

45-
If you want to test something without spending a lot of time on the benchmark,
46-
you can use flag `--dart-define=FAST_MODE=true` to speed up the benchmark.
47-
You should not evaluate performance when using this flag.
48-
49-
Add `WITH_<VENDOR>=1` to make commands to build the the app with backends.
45+
Add `WITH_<VENDOR>=1` to make commands to build the app with certain backends.
5046
For example:
5147

5248
```bash

flutter/assets/tasks.pbtxt

+119-21
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,26 @@
44
task {
55
id: "image_classification_v2"
66
name: "Image Classification v2"
7-
min_query_count: 1024
8-
min_duration: 60
9-
max_duration: 600
107
max_throughput: 1000
118
max_accuracy: 1.0
129
scenario: "SingleStream"
10+
runs {
11+
normal {
12+
min_query_count: 1024
13+
min_duration: 60
14+
max_duration: 600
15+
}
16+
quick {
17+
min_query_count: 128
18+
min_duration: 6
19+
max_duration: 60
20+
}
21+
rapid {
22+
min_query_count: 64
23+
min_duration: 6
24+
max_duration: 60
25+
}
26+
}
1327
datasets {
1428
type: IMAGENET
1529
full {
@@ -47,12 +61,26 @@ task {
4761
task {
4862
id: "object_detection"
4963
name: "Object Detection"
50-
min_query_count: 1024
51-
min_duration: 60
52-
max_duration: 600
5364
max_throughput: 2000
5465
max_accuracy: 1.0
5566
scenario: "SingleStream"
67+
runs {
68+
normal {
69+
min_query_count: 1024
70+
min_duration: 60
71+
max_duration: 600
72+
}
73+
quick {
74+
min_query_count: 128
75+
min_duration: 6
76+
max_duration: 60
77+
}
78+
rapid {
79+
min_query_count: 64
80+
min_duration: 6
81+
max_duration: 60
82+
}
83+
}
5684
datasets {
5785
type: COCO
5886
full {
@@ -90,12 +118,26 @@ task {
90118
task {
91119
id: "image_segmentation_v2"
92120
name: "Image Segmentation v2"
93-
min_query_count: 1024
94-
min_duration: 60
95-
max_duration: 600
96121
max_throughput: 2000
97122
max_accuracy: 1.0
98123
scenario: "SingleStream"
124+
runs {
125+
normal {
126+
min_query_count: 1024
127+
min_duration: 60
128+
max_duration: 600
129+
}
130+
quick {
131+
min_query_count: 128
132+
min_duration: 6
133+
max_duration: 60
134+
}
135+
rapid {
136+
min_query_count: 64
137+
min_duration: 6
138+
max_duration: 60
139+
}
140+
}
99141
datasets {
100142
type: ADE20K
101143
full {
@@ -132,12 +174,26 @@ task {
132174
task {
133175
id: "natural_language_processing"
134176
name: "Language Understanding"
135-
min_query_count: 1024
136-
min_duration: 60
137-
max_duration: 600
138177
max_throughput: 2000
139178
max_accuracy: 1.0
140179
scenario: "SingleStream"
180+
runs {
181+
normal {
182+
min_query_count: 1024
183+
min_duration: 60
184+
max_duration: 600
185+
}
186+
quick {
187+
min_query_count: 128
188+
min_duration: 6
189+
max_duration: 60
190+
}
191+
rapid {
192+
min_query_count: 64
193+
min_duration: 6
194+
max_duration: 60
195+
}
196+
}
141197
datasets {
142198
type: SQUAD
143199
full {
@@ -171,12 +227,26 @@ task {
171227
task {
172228
id: "super_resolution"
173229
name: "Super Resolution "
174-
min_query_count: 1024
175-
min_duration: 60
176-
max_duration: 600
177230
max_throughput: 2000
178231
max_accuracy: 1.0
179232
scenario: "SingleStream"
233+
runs {
234+
normal {
235+
min_query_count: 1024
236+
min_duration: 60
237+
max_duration: 600
238+
}
239+
quick {
240+
min_query_count: 128
241+
min_duration: 6
242+
max_duration: 60
243+
}
244+
rapid {
245+
min_query_count: 64
246+
min_duration: 6
247+
max_duration: 60
248+
}
249+
}
180250
datasets {
181251
type: SNUSR
182252
full {
@@ -212,12 +282,26 @@ task {
212282
task {
213283
id: "image_classification_offline_v2"
214284
name: "Image Classification v2 (Offline)"
215-
min_query_count: 24576
216-
min_duration: 0
217-
max_duration: 0
218285
max_throughput: 2000
219286
max_accuracy: 1.0
220287
scenario: "Offline"
288+
runs {
289+
normal {
290+
min_query_count: 24576
291+
min_duration: 0
292+
max_duration: 0
293+
}
294+
quick {
295+
min_query_count: 2457
296+
min_duration: 0
297+
max_duration: 0
298+
}
299+
rapid {
300+
min_query_count: 64
301+
min_duration: 6
302+
max_duration: 60
303+
}
304+
}
221305
datasets {
222306
type: IMAGENET
223307
full {
@@ -255,12 +339,26 @@ task {
255339
task {
256340
id: "stable_diffusion"
257341
name: "Stable Diffusion"
258-
min_query_count: 1024
259-
min_duration: 60
260-
max_duration: 300
261342
max_throughput: 2000
262343
max_accuracy: 1.0
263344
scenario: "SingleStream"
345+
runs {
346+
normal {
347+
min_query_count: 1024
348+
min_duration: 60
349+
max_duration: 300
350+
}
351+
quick {
352+
min_query_count: 128
353+
min_duration: 6
354+
max_duration: 30
355+
}
356+
rapid {
357+
min_query_count: 64
358+
min_duration: 6
359+
max_duration: 60
360+
}
361+
}
264362
datasets {
265363
type: COCOGEN
266364
full {

flutter/cpp/proto/mlperf_task.proto

+19-7
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,42 @@ message MLPerfConfig {
3131
// Config of the mlperf tasks.
3232
// A task is basically a combination of models and a dataset.
3333
//
34-
// Next ID: 12
34+
// Next ID: 13
3535
message TaskConfig {
3636
// Must be unique in one task file. Ex: image_classification
3737
// used to match backend settings
3838
required string id = 1;
3939
// Human-readable name. Ex: Image classification.
4040
required string name = 2;
41-
// Minimum number of samples the test should run in the performance mode.
42-
required int32 min_query_count = 3;
43-
// Minimum duration the test should run in the performance mode, in seconds.
44-
required double min_duration = 4 [default = 60];
45-
// Maximum duration the test should run in the performance mode, in seconds.
46-
required double max_duration = 10 [default = 600];
4741
// Max expected throughput score
4842
required float max_throughput = 5;
4943
// Max expected accuracy
5044
required float max_accuracy = 6;
5145
// LoadGen parameter. Allowed values: SingleStream, Offline
5246
required string scenario = 7;
47+
required RunConfig runs = 12;
5348
required DatasetConfig datasets = 8;
5449
required ModelConfig model = 9;
5550
repeated CustomConfig custom_config = 11;
5651
}
5752

53+
// Run configurations
54+
message RunConfig {
55+
required OneRunConfig normal = 1;
56+
required OneRunConfig quick = 2;
57+
required OneRunConfig rapid = 3;
58+
}
59+
60+
// Config of one run
61+
message OneRunConfig {
62+
// Minimum number of samples the test should run in the performance mode.
63+
required int32 min_query_count = 3;
64+
// Minimum duration the test should run in the performance mode, in seconds.
65+
required double min_duration = 4 [default = 60];
66+
// Maximum duration the test should run in the performance mode, in seconds.
67+
required double max_duration = 10 [default = 600];
68+
}
69+
5870
// Datasets for a task
5971
//
6072
// Next ID: 5

flutter/integration_test/first_test.dart

+5-12
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,12 @@ void main() {
1919
binding.framePolicy = LiveTestWidgetsFlutterBindingFramePolicy.fullyLive;
2020

2121
final prefs = <String, Object>{
22-
StoreConstants.testMode: true,
2322
StoreConstants.selectedBenchmarkRunMode:
24-
BenchmarkRunModeEnum.submissionRun.name,
25-
StoreConstants.testMinDuration: 1,
26-
StoreConstants.testMinQueryCount: 4,
23+
BenchmarkRunModeEnum.integrationTestRun.name,
24+
StoreConstants.cooldown: true,
25+
StoreConstants.cooldownDuration:
26+
BenchmarkRunModeEnum.integrationTestRun.cooldownDuration,
2727
};
28-
if (DartDefine.perfTestEnabled) {
29-
prefs[StoreConstants.testMinDuration] = 15;
30-
prefs[StoreConstants.testMinQueryCount] = 64;
31-
prefs[StoreConstants.testCooldownDuration] = 2;
32-
}
3328
SharedPreferences.setMockInitialValues(prefs);
3429

3530
group('integration tests', () {
@@ -67,9 +62,7 @@ void checkTasks(ExtendedResult extendedResult) {
6762
expect(benchmarkResult.performanceRun!.throughput, isNotNull);
6863

6964
checkAccuracy(benchmarkResult);
70-
if (DartDefine.perfTestEnabled) {
71-
checkThroughput(benchmarkResult, extendedResult.environmentInfo);
72-
}
65+
checkThroughput(benchmarkResult, extendedResult.environmentInfo);
7366
}
7467
}
7568

flutter/lib/app_constants.dart

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ class DartDefine {
55
bool.fromEnvironment('OFFICIAL_BUILD', defaultValue: false);
66
static const firebaseCrashlyticsEnabled =
77
bool.fromEnvironment('FIREBASE_CRASHLYTICS_ENABLED', defaultValue: false);
8-
static const isFastMode =
9-
bool.fromEnvironment('FAST_MODE', defaultValue: false);
10-
118
static const perfTestEnabled =
129
bool.fromEnvironment('PERF_TEST', defaultValue: false);
1310
}

flutter/lib/benchmark/benchmark.dart

+5-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import 'package:collection/collection.dart';
22

3-
import 'package:mlperfbench/app_constants.dart';
43
import 'package:mlperfbench/backend/bridge/run_settings.dart';
54
import 'package:mlperfbench/backend/loadgen_info.dart';
65
import 'package:mlperfbench/benchmark/info.dart';
@@ -69,24 +68,13 @@ class Benchmark {
6968
required List<pb.CommonSetting> commonSettings,
7069
required String backendLibName,
7170
required String logDir,
72-
required int testMinDuration,
73-
required int testMinQueryCount,
7471
}) async {
7572
final dataset = runMode.chooseDataset(taskConfig);
73+
final runConfig = runMode.chooseRunConfig(taskConfig);
7674

77-
int minQueryCount;
78-
double minDuration;
79-
if (testMinDuration != 0) {
80-
minQueryCount = testMinQueryCount;
81-
minDuration = testMinDuration.toDouble();
82-
} else if (DartDefine.isFastMode) {
83-
minQueryCount = 8;
84-
minDuration = 1.0;
85-
} else {
86-
minQueryCount = taskConfig.minQueryCount;
87-
minDuration = taskConfig.minDuration;
88-
}
89-
double maxDuration = taskConfig.maxDuration;
75+
int minQueryCount = runConfig.minQueryCount;
76+
double minDuration = runConfig.minDuration;
77+
double maxDuration = runConfig.maxDuration;
9078

9179
final settings = pb.SettingList(
9280
setting: commonSettings,
@@ -114,7 +102,7 @@ class Benchmark {
114102
model_image_width: taskConfig.model.imageWidth,
115103
model_image_height: taskConfig.model.imageHeight,
116104
scenario: taskConfig.scenario,
117-
mode: runMode.loadgenMode,
105+
mode: runMode.loadgenMode.name,
118106
batch_size: selectedDelegate.batchSize,
119107
min_query_count: minQueryCount,
120108
min_duration: minDuration,

0 commit comments

Comments
 (0)