LinPoly
diff --git a/‎PyTorch/LanguageModeling/BERT/README.md
+5-4 b/‎PyTorch/LanguageModeling/BERT/README.md
+5-4
diff --git a/‎PyTorch/LanguageModeling/BERT/lddl/docs/images/preprocess_perf.gif
-122 KB b/‎PyTorch/LanguageModeling/BERT/lddl/docs/images/preprocess_perf.gif
-122 KB
diff --git a/‎PyTorch/LanguageModeling/BERT/lddl/setup.py
+10-10 b/‎PyTorch/LanguageModeling/BERT/lddl/setup.py
+10-10
@@ -344,10 +344,10 @@ out_dir=${5:-"/workspace/bert/results/SST-2"} # For SST-2.
 This repository contains a number of predefined configurations to run the SQuAD, GLUE and pre-training on NVIDIA DGX-1, NVIDIA DGX-2H or NVIDIA DGX A100 nodes in `scripts/configs/squad_config.sh`, `scripts/configs/glue_config.sh` and `scripts/configs/pretrain_config.sh`. For example, to use the default DGX A100 8 gpu config, run:
 
 ```
-bash scripts/run_squad.sh $(source scripts/configs/squad_config.sh && dgxa10080g_8gpu_fp16_p2binned)  # For the SQuAD v1.1 dataset.
-bash scripts/run_glue.sh $(source scripts/configs/glue_config.sh && mrpc_dgxa10080g_8gpu_fp16_p2binned)  # For the MRPC dataset.
-bash scripts/run_glue.sh $(source scripts/configs/glue_config.sh && sst-2_dgxa10080g_8gpu_fp16_p2binned)  # For the SST-2 dataset.
-bash scripts/run_pretraining.sh $(source scripts/configs/pretrain_config.sh && dgxa10080g_8gpu_fp16) # For pre-training
+bash scripts/run_squad.sh $(source scripts/configs/squad_config.sh && dgxa100-80g_8gpu_fp16)  # For the SQuAD v1.1 dataset.
+bash scripts/run_glue.sh $(source scripts/configs/glue_config.sh && mrpc_dgxa100-80g_8gpu_fp16)  # For the MRPC dataset.
+bash scripts/run_glue.sh $(source scripts/configs/glue_config.sh && sst-2_dgxa100-80g_8gpu_fp16)  # For the SST-2 dataset.
+bash scripts/run_pretraining.sh $(source scripts/configs/pretrain_config.sh && dgxa100-80g_8gpu_fp16) # For pre-training
 ```
 
 ## Advanced
@@ -891,6 +891,7 @@ January 2022
 - Knowledge Distillation support
 - Pre-training with native AMP, native DDP, and TorchScript with NVFuser backend
 - Pre-training using [Language Datasets and Data Loaders (LDDL)](../../../Tools/lddl)
+- Binned pretraining for phase2 with LDDL using a bin size of 64
 
 July 2020
 -  Updated accuracy and performance tables to include A100 results
 
@@ -22,16 +22,16 @@
     packages=find_packages(),
     python_requires='>=3.6',
     install_requires=[
-        'dask[complete]>=2021.2.0',
-        'distributed>=2021.2.0',
-        'dask-mpi>=2.21.0',
-        'pyarrow>=3.0.0',
-        'mpi4py>=3.0.3',
-        'transformers>=4.3.2',
-        'wikiextractor>=3.0.5',
-        'news-please>=1.5.18',
-        'cchardet>=2.1.7',
-        'awscli>=1.19.53',
+        'dask[complete]==2021.7.1',
+        'distributed==2021.7.1',
+        'dask-mpi==2021.11.0',
+        'pyarrow==4.0.1',
+        'mpi4py==3.1.3',
+        'transformers==4.16.2',
+        'wikiextractor==3.0.6',
+        'news-please==1.5.21',
+        'cchardet==2.1.7',
+        'awscli==1.22.55',
         'wikiextractor @ git+https://github.com/attardi/wikiextractor.git',
     ],
     entry_points={