Skip to content

Commit dbefd09

Browse files
committed
Merge branch 'dev' of github.com:fsschneider/algorithmic-efficiency into dev
2 parents f63e906 + cabcc59 commit dbefd09

File tree

14 files changed

+57
-57
lines changed

14 files changed

+57
-57
lines changed

.github/workflows/linting.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- name: Install isort
3535
run: |
3636
python -m pip install --upgrade pip
37-
pip install isort
37+
pip install isort==5.12.0
3838
- name: Run isort
3939
run: |
4040
isort . --check --diff

algoperf/spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def eval_period_time_sec(self) -> int:
206206
@property
207207
@abc.abstractmethod
208208
def step_hint(self) -> int:
209-
"""Max num steps the baseline algo was given to reach the target."""
209+
"""Approx. steps the baseline can do in the allowed runtime budget."""
210210

211211
@property
212212
def param_shapes(self):

algoperf/workloads/criteo1tb/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def train_stddev(self):
9393

9494
@property
9595
def max_allowed_runtime_sec(self) -> int:
96-
return 7703 # ~2 hours.
96+
return 7_703 # ~2.1 hours.
9797

9898
@property
9999
def eval_period_time_sec(self) -> int:
@@ -123,7 +123,7 @@ def _build_input_queue(
123123

124124
@property
125125
def step_hint(self) -> int:
126-
"""Max num steps the baseline algo was given to reach the target."""
126+
"""Approx. steps the baseline can do in the allowed runtime budget."""
127127
return 10_666
128128

129129
def _eval_model_on_split(self,

algoperf/workloads/fastmri/workload.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,16 @@ def accelerations(self):
9595

9696
@property
9797
def max_allowed_runtime_sec(self) -> int:
98-
return 8859 # ~2.5 hours
98+
return 4_430 # ~1.2 hours
9999

100100
@property
101101
def eval_period_time_sec(self) -> int:
102102
return 80
103103

104104
@property
105105
def step_hint(self) -> int:
106-
"""Max num steps the baseline algo was given to reach the target."""
107-
return 36_189
106+
"""Approx. steps the baseline can do in the allowed runtime budget."""
107+
return 18_094
108108

109109
def _build_input_queue(self,
110110
data_rng: spec.RandomState,

algoperf/workloads/imagenet_resnet/workload.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def resize_size(self) -> int:
102102

103103
@property
104104
def max_allowed_runtime_sec(self) -> int:
105-
return 63_008 # ~17.5 hours
105+
return 66_159 # ~18.4 hours
106106

107107
@property
108108
def eval_period_time_sec(self) -> int:
@@ -144,5 +144,5 @@ def _build_input_queue(
144144

145145
@property
146146
def step_hint(self) -> int:
147-
"""Max num steps the baseline algo was given to reach the target."""
148-
return 186_666
147+
"""Approx. steps the baseline can do in the allowed runtime budget."""
148+
return 195_999

algoperf/workloads/imagenet_vit/workload.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from typing import Dict, Iterator, Optional
44

55
from algoperf import spec
6-
from algoperf.workloads.imagenet_resnet.workload import \
7-
BaseImagenetResNetWorkload
6+
from algoperf.workloads.imagenet_resnet.workload import BaseImagenetResNetWorkload
87

98

109
def decode_variant(variant: str) -> Dict[str, int]:
@@ -81,7 +80,7 @@ def eval_batch_size(self) -> int:
8180

8281
@property
8382
def max_allowed_runtime_sec(self) -> int:
84-
return 77_520 # ~22 hours
83+
return 69_768 # ~19.4 hours
8584

8685
@property
8786
def eval_period_time_sec(self) -> int:
@@ -110,5 +109,5 @@ def _build_dataset(
110109

111110
@property
112111
def step_hint(self) -> int:
113-
"""Max num steps the baseline algo was given to reach the target."""
114-
return 186_666
112+
"""Approx. steps the baseline can do in the allowed runtime budget."""
113+
return 167_999

algoperf/workloads/librispeech_conformer/workload.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,13 @@ def train_stddev(self):
7979

8080
@property
8181
def max_allowed_runtime_sec(self) -> int:
82-
return 61_068 # ~17 hours
82+
return 58_015 # ~16.1 hours
8383

8484
@property
8585
def eval_period_time_sec(self) -> int:
8686
return 24 * 60
8787

8888
@property
8989
def step_hint(self) -> int:
90-
"""Max num steps the baseline algo was given to reach the target."""
91-
return 80_000
90+
"""Approx. steps the baseline can do in the allowed runtime budget."""
91+
return 76_000

algoperf/workloads/librispeech_deepspeech/librispeech_jax/workload.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
import functools
22
from typing import Dict, Optional, Tuple
33

4-
from flax import jax_utils
54
import jax
65
import jax.numpy as jnp
76
import numpy as np
7+
from flax import jax_utils
88

9-
from algoperf import param_utils
10-
from algoperf import spec
11-
from algoperf.workloads.librispeech_conformer.librispeech_jax.workload import \
12-
LibriSpeechConformerWorkload
9+
from algoperf import param_utils, spec
10+
from algoperf.workloads.librispeech_conformer.librispeech_jax.workload import (
11+
LibriSpeechConformerWorkload,
12+
)
1313
from algoperf.workloads.librispeech_deepspeech.librispeech_jax import models
1414

1515

@@ -99,12 +99,12 @@ def test_target_value(self) -> float:
9999

100100
@property
101101
def step_hint(self) -> int:
102-
"""Max num steps the baseline algo was given to reach the target."""
103-
return 48_000
102+
"""Approx. steps the baseline can do in the allowed runtime budget."""
103+
return 38_400
104104

105105
@property
106106
def max_allowed_runtime_sec(self) -> int:
107-
return 55_506 # ~15.4 hours
107+
return 44_405 # ~12.3 hours
108108

109109
@property
110110
def use_tanh(self) -> bool:

algoperf/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@
33
import torch
44
from torch.nn.parallel import DistributedDataParallel as DDP
55

6-
from algoperf import param_utils
7-
from algoperf import spec
6+
from algoperf import param_utils, spec
87
from algoperf.pytorch_utils import pytorch_setup
9-
from algoperf.workloads.librispeech_conformer.librispeech_pytorch.models import \
10-
initialize
11-
from algoperf.workloads.librispeech_conformer.librispeech_pytorch.workload import \
12-
LibriSpeechConformerWorkload
13-
from algoperf.workloads.librispeech_deepspeech.librispeech_pytorch.models import \
14-
DeepspeechConfig
15-
from algoperf.workloads.librispeech_deepspeech.librispeech_pytorch.models import \
16-
DeepspeechEncoderDecoder
8+
from algoperf.workloads.librispeech_conformer.librispeech_pytorch.models import (
9+
initialize,
10+
)
11+
from algoperf.workloads.librispeech_conformer.librispeech_pytorch.workload import (
12+
LibriSpeechConformerWorkload,
13+
)
14+
from algoperf.workloads.librispeech_deepspeech.librispeech_pytorch.models import (
15+
DeepspeechConfig,
16+
DeepspeechEncoderDecoder,
17+
)
1718

1819
USE_PYTORCH_DDP, RANK, DEVICE, N_GPUS = pytorch_setup()
1920

@@ -76,12 +77,12 @@ def test_target_value(self) -> float:
7677

7778
@property
7879
def step_hint(self) -> int:
79-
"""Max num steps the baseline algo was given to reach the target."""
80-
return 48_000
80+
"""Approx. steps the baseline can do in the allowed runtime budget."""
81+
return 38_400
8182

8283
@property
8384
def max_allowed_runtime_sec(self) -> int:
84-
return 55_506 # ~15.4 hours
85+
return 44_405 # ~12.3 hours
8586

8687
@property
8788
def use_tanh(self) -> bool:

algoperf/workloads/ogbg/workload.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99

1010
from algoperf import random_utils as prng
1111
from algoperf import spec
12-
from algoperf.workloads.ogbg import input_pipeline
13-
from algoperf.workloads.ogbg import metrics
12+
from algoperf.workloads.ogbg import input_pipeline, metrics
1413

1514

1615
class BaseOgbgWorkload(spec.Workload):
@@ -88,7 +87,7 @@ def train_stddev(self):
8887

8988
@property
9089
def max_allowed_runtime_sec(self) -> int:
91-
return 18_477 # ~5 hours
90+
return 12_011 # ~3.3 hours
9291

9392
@property
9493
def eval_period_time_sec(self) -> int:
@@ -140,8 +139,8 @@ def loss_fn(
140139

141140
@property
142141
def step_hint(self) -> int:
143-
"""Max num steps the baseline algo was given to reach the target."""
144-
return 80_000
142+
"""Approx. steps the baseline can do in the allowed runtime budget."""
143+
return 52_000
145144

146145
@abc.abstractmethod
147146
def _normalize_eval_metrics(

algoperf/workloads/wmt/workload.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,16 @@ def train_stddev(self):
8888

8989
@property
9090
def max_allowed_runtime_sec(self) -> int:
91-
return 48_151 # ~13.5 hours
91+
return 43_336 # ~12.0 hours
9292

9393
@property
9494
def eval_period_time_sec(self) -> int:
9595
return 14 * 60
9696

9797
@property
9898
def step_hint(self) -> int:
99-
"""Max num steps the baseline algo was given to reach the target."""
100-
return 133_333
99+
"""Approx. steps the baseline can do in the allowed runtime budget."""
100+
return 120_000
101101

102102
@property
103103
def pre_ln(self) -> bool:

docs/DOCUMENTATION.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ With the exception of `_build_input_queue`, submitters can call any of these fun
8686
def step_hint(self): -> int
8787
```
8888

89-
- The `step_hint` function gives the number of global steps the baseline algorithm was allowed to use to reach the targets for a workload. Note that the baseline algorithms may have reached the target in fewer steps than this, but these were the max number of steps the baseline algorithms used for their learning rate schedules. Submitters can use this to help specify learning rate (or other) schedules.
89+
- The `step_hint` function gives the number of global steps the baseline algorithm can perform with the `max_runtime` to reach the targets for a workload. The `step_hint` is therefore dependent on the `max_runtime` and the workload. Note that the baseline algorithms may have reached the target in fewer steps than this, but these were the max number of steps the baseline algorithms used for their learning rate schedules. Submitters can use this to help specify learning rate (or other) schedules.
9090

9191
###### Data augmentation and preprocessing
9292

@@ -413,7 +413,7 @@ In each trial, the tuning trial with the fastest training time to achieve the *v
413413

414414
Submissions to this ruleset are not allowed to have user-defined hyperparameters. This ruleset allows both submissions that use the same hyperparameters for all workloads, including the randomized ones (e.g. Adam with default parameters), as well as submissions that perform inner-loop tuning during their training run (e.g. SGD with line searches).
415415

416-
Submissions will run on one instance of the [benchmarking hardware](#benchmarking-hardware). As always, submissions are allowed to perform inner-loop tuning (e.g. for their learning rate) but the tuning efforts will be part of their score. A submission will run *S=5* times and its score will be the median time to reach the target evaluation metric value on the validation set. To account for the lack of external tuning, submissions have a longer time budget to reach the target performance. Compared to the [external tuning ruleset](#external-tuning-ruleset), the `max_runtime` is tripled. Runs that do not reach the target performance of the evaluation metric within this allotted time budget have an infinite time.
416+
Submissions will run on one instance of the [benchmarking hardware](#benchmarking-hardware). As always, submissions are allowed to perform inner-loop tuning (e.g. for their learning rate) but the tuning efforts will be part of their score. A submission will run *S=5* times and its score will be the median time to reach the target evaluation metric value on the validation set. To account for the lack of external tuning, submissions have a longer time budget to reach the target performance. Compared to the [external tuning ruleset](#external-tuning-ruleset), the `max_runtime` is $1.5$ times longer. Runs that do not reach the target performance of the evaluation metric within this allotted time budget have an infinite time.
417417

418418
### Workloads
419419

@@ -434,11 +434,11 @@ The currently eight fixed workloads are:
434434
| | **Task** | **Dataset** | **Model** | **Loss** | **Metric** | Validation<br>**Target** | Test<br>**Target** | Maximum<br>**Runtime** <br>(in secs) |
435435
|------------|-------------------------------|-------------|-------------------------|----------|------------|--------------------------|----------------------|------------------------|
436436
| **1** | Clickthrough rate prediction | Criteo 1TB | DLRMsmall | CE | CE | 0.123735 | 0.126041 | 7,703 |
437-
| **2** | MRI reconstruction | fastMRI | U-Net | L1 | SSIM | 0.723653 | 0.740633 | 8,859 |
438-
| **3<br>4** | Image classification | ImageNet | ResNet-50<br>ViT | CE | ER | 0.22569<br>0.22691 | 0.3440<br>0.3481 | 63,008 <br> 77,520 |
439-
| **5<br>6** | Speech recognition | LibriSpeech | Conformer<br>DeepSpeech | CTC | WER | 0.085884<br>0.119936 | 0.052981<br>0.074143 | 61,068<br>55,506 |
440-
| **7** | Molecular property prediction | OGBG | GNN | CE | mAP | 0.28098 | 0.268729 | 18,477 |
441-
| **8** | Translation | WMT | Transformer | CE | BLEU | 30.8491 | 30.7219 | 48,151 |
437+
| **2** | MRI reconstruction | fastMRI | U-Net | L1 | SSIM | 0.723653 | 0.740633 | 4,430 |
438+
| **3<br>4** | Image classification | ImageNet | ResNet-50<br>ViT | CE | ER | 0.22569<br>0.22691 | 0.3440<br>0.3481 | 66,159 <br> 69,768 |
439+
| **5<br>6** | Speech recognition | LibriSpeech | Conformer<br>DeepSpeech | CTC | WER | 0.085884<br>0.119936 | 0.052981<br>0.074143 | 58,015<br>44,405 |
440+
| **7** | Molecular property prediction | OGBG | GNN | CE | mAP | 0.28098 | 0.268729 | 12,011 |
441+
| **8** | Translation | WMT | Transformer | CE | BLEU | 30.8491 | 30.7219 | 43,336 |
442442

443443
Default Dropout Values for Different Workloads:
444444

@@ -499,7 +499,7 @@ When self-reported results, it is acceptable to perform the tuning trials on har
499499
Target performances on the validation and test sets will be defined for each [workload](#workloads) separately. For the [fixed workloads](#fixed-workloads), we take the best performance achievable by one of four standard algorithms (AdamW, NadamW, Nesterov Momentum, and Heavy Ball Momentum). These target-setting algorithms will follow the general process of the external tuning ruleset, with a significantly larger tuning budget of $200$ trials to guarantee competitive performance. Once the best algorithm and its hyperparameters are determined, training is repeated $20$ times. The median of the best achieved validation errors across seeds is used as the *validation* target. Out of the $10$ repeated runs that achieved this validation target, we took the worst achieved test error across seeds as our *test* target. Taking the median validation performance after rerunning the best hyperparameter point prevents our procedure from selecting a lucky outlier.
500500
To save computational resources, we only tuned two training algorithms instead of four, for the [randomized workloads](#randomized-workloads). For each workload variant, we used NadamW and the other best-performing training algorithm on the corresponding fixed workload the randomized workload is based on.
501501

502-
Both [tuning rulesets](#tuning) will use the same target performances. The runtime of the target-setting algorithms on each workload will be chosen to match published results and is constrained by the overall time budget of roughly a single week for all fixed workloads. The `max_runtime` for submissions on each workload is $\frac{1}{3}$ longer than the runtime of the target-setting algorithms (this `max_runtime` will be three times as much for the self-tuning ruleset, see the [Self-tuning ruleset](#self-tuning-ruleset) section).
502+
Both [tuning rulesets](#tuning) will use the same target performances. The runtime of the target-setting algorithms on each workload will be chosen to match published results and is constrained by the overall time budget of roughly a single week for all fixed workloads. The initial `max_runtime` for submissions on each workload was $\frac{1}{3}$ longer than the runtime of the target-setting algorithms (this `max_runtime` will be $1.5$ times as much for the self-tuning ruleset, see the [Self-tuning ruleset](#self-tuning-ruleset) section). After the initial round of submissions, we have adapated the `max_runtime` based on the performance of the submissions (see [this issue](https://github.com/mlcommons/algorithmic-efficiency/issues/836)).
503503

504504
#### Benchmark score using performance profiles
505505

scoring/compute_speedups.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
'Whether to save the results to disk.')
2626
FLAGS = flags.FLAGS
2727

28+
# These are the old budgets, used in the first iteration of the competition.
2829
MAX_BUDGETS = {
2930
'criteo1tb': 7703,
3031
'fastmri': 8859,

submission_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,10 +409,10 @@ def train_once(
409409
prepare_for_eval_end_time - prepare_for_eval_start_time)
410410

411411
# Check if time is remaining,
412-
# use 3x the runtime budget for the self-tuning ruleset.
412+
# use 1.5x the runtime budget for the self-tuning ruleset.
413413
max_allowed_runtime_sec = (
414414
workload.max_allowed_runtime_sec if FLAGS.tuning_ruleset == 'external'
415-
else 3 * workload.max_allowed_runtime_sec)
415+
else 1.5 * workload.max_allowed_runtime_sec)
416416
train_state['is_time_remaining'] = (
417417
train_state['accumulated_submission_time'] < max_allowed_runtime_sec)
418418

0 commit comments

Comments
 (0)