File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -92,7 +92,6 @@ RUN cd /algorithmic-efficiency && pip install -e '.[full]'
92
92
93
93
RUN cd /algorithmic-efficiency && git fetch origin
94
94
RUN cd /algorithmic-efficiency && git pull
95
- RUN pip install wandb
96
95
97
96
# Todo: remove this, this is temporary for developing
98
97
COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh
Original file line number Diff line number Diff line change @@ -693,12 +693,21 @@ def main(_):
693
693
694
694
# Prevent OOM on librispeech conformer.
695
695
base_workload = workloads .get_base_workload_name (FLAGS .workload )
696
- if base_workload == 'librispeech_conformer' :
697
- os .environ ['XLA_PYTHON_CLIENT_MEM_FRACTION' ] = '0.85'
696
+
697
+ if base_workload == [
698
+ 'librispeech_conformer' ,
699
+ 'librispeech_deepspeech' ,
700
+ 'imagenet_vit' ,
701
+ 'criteo1tb'
702
+ ]:
703
+ os .environ ['XLA_PYTHON_CLIENT_MEM_FRACTION' ] = '0.80'
698
704
699
705
if FLAGS .set_pytorch_max_split_size :
700
706
os .environ ['PYTORCH_CUDA_ALLOC_CONF' ] = 'max_split_size_mb:256'
701
707
708
+ if FLAGS .framework == 'pytorch' and base_workload == 'librispeech_conformer' :
709
+ os .environ ['PYTORCH_CUDA_ALLOC_CONF' ] = 'expandable_segments:True'
710
+
702
711
# Extend path according to framework.
703
712
workload_metadata ['workload_path' ] = os .path .join (
704
713
BASE_WORKLOADS_DIR ,
You can’t perform that action at this time.
0 commit comments