Merge branch 'main' into sql-dataset-support

huggingface · Nov 23, 2023 · b4efaa7 · b4efaa7
2 parents 346fcb6 + 13fa10d
commit b4efaa7
Show file tree

Hide file tree

Showing 20 changed files with 249 additions and 61 deletions.
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -34,7 +34,6 @@ jobs:
           cd doc-builder
           git pull origin main
           pip install .
-          pip install black
           cd ..
 
       - name: Make documentation

diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
@@ -29,12 +29,9 @@ jobs:
       run: |
         source venv/bin/activate
         pip install --upgrade pip
-        pip install black ruff
-    - name: Check style with black
-      run: |
-        source venv/bin/activate
-        black --check .
+        pip install ruff
     - name: Check style with ruff
       run: |
         source venv/bin/activate
-        ruff .
+        ruff check . setup.py
+        ruff format --check . setup.py
diff --git a/.github/workflows/slow_tests.yml b/.github/workflows/slow_tests.yml
@@ -3,8 +3,8 @@ name: Non-regression tests
 on:
   workflow_dispatch:
   schedule:
-    - cron: '0 21 * * 0-5'  # every Sunday to Friday at 11pm CET
-    - cron: '0 21 * * 6'  # every Saturday at 1am CET
+    - cron: '0 21 * * 0-5'  # every Sunday to Friday at 11pm CET (10pm winter time)
+    - cron: '0 21 * * 6'  # every Saturday at 1am CET (midnight winter time)
 
 concurrency:
   group: ${{ github.workflow }}

diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml
@@ -0,0 +1,142 @@
+name: (Gaudi2) Non-regression tests
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 23 * * 3,6'  # every Wednesday and Saturday at 1am CET (midnight winter time)
+
+concurrency:
+  group: ${{ github.workflow }}
+
+jobs:
+  stable-diffusion:
+    name: Test Stable Diffusion
+    runs-on: [self-hosted, linux, x64, gaudi2]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Pull image
+        run: |
+            docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
+      - name: Run tests
+        run: |
+            docker run \
+            -v $PWD:/root/workspace \
+            --workdir=/root/workspace \
+            --runtime=habana \
+            -e HABANA_VISIBLE_DEVICES=all \
+            -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            --cap-add=sys_nice \
+            --net=host \
+            --ipc=host \
+            vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
+            /bin/bash tests/ci/slow_tests_diffusers.sh
+  deepspeed:
+    name: Test DeepSpeed models
+    if: ${{ !cancelled() && (success() || failure()) }}
+    needs:
+      - stable-diffusion  # run the job when the previous test job is done
+    runs-on: [self-hosted, linux, x64, gaudi2]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Pull image
+        run: |
+            docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
+      - name: Run tests
+        run: |
+            docker run \
+            -v $PWD:/root/workspace \
+            --workdir=/root/workspace \
+            --runtime=habana \
+            -e HABANA_VISIBLE_DEVICES=all \
+            -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            --cap-add=sys_nice \
+            --net=host \
+            --ipc=host \
+            vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
+            /bin/bash tests/ci/slow_tests_deepspeed.sh
+  multi-card:
+    name: Test multi-card models
+    if: ${{ !cancelled() && (success() || failure()) }}
+    needs:
+      - deepspeed  # run the job when the previous test job is done
+    runs-on: [self-hosted, linux, x64, gaudi2]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Pull image
+        run: |
+            docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
+      - name: Run tests
+        run: |
+            docker run \
+            -v $PWD:/root/workspace \
+            --workdir=/root/workspace \
+            --runtime=habana \
+            -e HABANA_VISIBLE_DEVICES=all \
+            -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            --cap-add=sys_nice \
+            --net=host \
+            --ipc=host \
+            vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
+            /bin/bash tests/ci/slow_tests_8x.sh
+  single-card:
+    name: Test single-card models
+    if: ${{ !cancelled() && (success() || failure()) }}
+    needs:
+      - deepspeed
+      - multi-card  # run the job when the previous test jobs are done
+    runs-on: [self-hosted, linux, x64, gaudi2]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Pull image
+        run: |
+            docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
+      - name: Run tests
+        run: |
+            docker run \
+            -v $PWD:/root/workspace \
+            --workdir=/root/workspace \
+            --runtime=habana \
+            -e HABANA_VISIBLE_DEVICES=all \
+            -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            -e RUN_ALBERT_XXL_1X=1 \
+            --cap-add=sys_nice \
+            --net=host \
+            --ipc=host \
+            vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
+            /bin/bash tests/ci/slow_tests_1x.sh
+  text-generation:
+    name: Test text-generation example
+    if: ${{ !cancelled() && (success() || failure()) }}
+    needs:
+      - deepspeed
+      - multi-card
+      - single-card  # run the job when the previous test jobs are done
+    runs-on: [self-hosted, linux, x64, gaudi2]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Pull image
+        run: |
+            docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
+      - name: Run tests
+        run: |
+            docker run \
+            -v $PWD:/root/workspace \
+            --workdir=/root/workspace \
+            --runtime=habana \
+            -e HABANA_VISIBLE_DEVICES=all \
+            -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            --cap-add=sys_nice \
+            --net=host \
+            --ipc=host \
+            vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
+            make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
diff --git a/Makefile b/Makefile
@@ -22,12 +22,12 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
 
 # Run code quality checks
 style_check: clean
-	black --check . setup.py
-	ruff . setup.py
+	ruff check . setup.py
+	ruff format --check . setup.py
 
 style: clean
-	black . setup.py
-	ruff . setup.py --fix
+	ruff check . setup.py --fix
+	ruff format . setup.py
 
 # Run unit and integration tests
 fast_tests:

diff --git a/examples/contrastive-image-text/README.md b/examples/contrastive-image-text/README.md
@@ -207,6 +207,7 @@ python ../gaudi_spawn.py --use_mpi --world_size 8 run_bridgetower.py \
 --output_dir /tmp/bridgetower-test \
 --model_name_or_path BridgeTower/bridgetower-large-itm-mlm-itc \
 --dataset_name jmhessel/newyorker_caption_contest --dataset_config_name matching \
+--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6 \
 --image_column image --caption_column image_description \
 --remove_unused_columns=False \
 --do_train --do_eval --do_predict \

diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py
@@ -138,6 +138,10 @@ class DataTrainingArguments:
     dataset_config_name: Optional[str] = field(
         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
+    dataset_revision: str = field(
+        default="main",
+        metadata={"help": "The specific dataset version to use (can be a branch name, tag name or commit id)."},
+    )
     data_dir: Optional[str] = field(default=None, metadata={"help": "The data directory containing input files."})
     image_column: Optional[str] = field(
         default="image_path",
@@ -339,6 +343,7 @@ def main():
             keep_in_memory=False,
             data_dir=data_args.data_dir,
             token=model_args.token,
+            revision=data_args.dataset_revision,
         )
     else:
         data_files = {}

diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md
@@ -321,7 +321,8 @@ python run_clm.py \
 
 ## PEFT
 
-To run LoRA finetuning and inference. you could use `run_lora_clm.py` as an example. Multi-card examples can be simply adapted to run LoRA finetuning. Here is the CLM example with Llama1-7B and Falcon-40B:
+To run LoRA finetuning, you can use `run_lora_clm.py`.
+Here are single-/multi-device command examples for Llama1-7B, Falcon-40B and Llama2-70B:
 
 - Single-card finetuning of Llama1-7B:
 ```bash
@@ -484,6 +485,43 @@ python ../gaudi_spawn.py \
 
 ```
 
+- Multi-card finetuning of Llama2-70B with DeepSpeed ZeRO-3 optimization and LoRA:
+
+  > The following command requires Habana DeepSpeed 1.13.0 or later.
+
+```bash
+PT_HPU_MAX_COMPOUND_OP_SIZE=10 DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 \
+python3 ../gaudi_spawn.py --use_deepspeed  --world_size 8  run_lora_clm.py \
+  --model_name_or_path meta-llama/Llama-2-70b-hf \
+  --deepspeed llama2_ds_zero3_config.json \
+  --dataset_name tatsu-lab/alpaca \
+  --bf16 True \
+  --output_dir ./lora_out \
+  --num_train_epochs 2 \
+  --max_seq_len 2048 \
+  --per_device_train_batch_size 10 \
+  --per_device_eval_batch_size 10 \
+  --gradient_checkpointing \
+  --evaluation_strategy epoch \
+  --eval_delay 2 \
+  --save_strategy no \
+  --learning_rate 0.0018 \
+  --warmup_ratio 0.03 \
+  --lr_scheduler_type "cosine" \
+  --logging_steps 1 \
+  --dataset_concatenation \
+  --attn_softmax_bf16 True \
+  --do_train \
+  --do_eval \
+  --use_habana \
+  --use_lazy_mode \
+  --pipelining_fwd_bwd \
+  --throughput_warmup_steps 3 \
+  --lora_rank 4 \
+  --lora_target_modules "q_proj" "v_proj" "k_proj" "o_proj" \
+  --validation_split_percentage 4
+````
+
 ## Streaming
 
 To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` with `--max_steps` specified in the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.

diff --git a/examples/language-modeling/llama2_ds_zero3_config.json b/examples/language-modeling/llama2_ds_zero3_config.json
@@ -0,0 +1,15 @@
+{
+    "steps_per_print": 64,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "gradient_accumulation_steps": "auto",
+    "bf16": {
+        "enabled": true
+    },
+    "gradient_clipping": 1.0,
+    "zero_optimization": {
+        "stage": 3,
+        "overlap_comm": false,
+        "contiguous_gradients": false
+    }
+}
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
@@ -210,7 +210,6 @@ def setup_parser(parser):
 
     if not args.use_hpu_graphs:
         args.limit_hpu_graphs = False
-        args.reuse_cache = False
 
     return args
 

diff --git a/optimum/habana/transformers/models/codegen/modeling_codegen.py b/optimum/habana/transformers/models/codegen/modeling_codegen.py
@@ -417,9 +417,9 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, token_i
         # only last token for inputs_ids if past is defined in kwargs
         if past_key_values:
             if token_idx is not None:
-                input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1)
+                input_ids = torch.index_select(input_ids, 1, token_idx - 1)
                 if token_type_ids is not None:
-                    token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1)
+                    token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1)
             else:
                 input_ids = input_ids[:, -1].unsqueeze(-1)
                 if token_type_ids is not None:

diff --git a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -352,9 +352,9 @@ def prepare_inputs_for_generation(
         # only last token for inputs_ids if past is defined in kwargs
         if past_key_values:
             if token_idx is not None:
-                input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1)
+                input_ids = torch.index_select(input_ids, 1, token_idx - 1)
                 if token_type_ids is not None:
-                    token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1)
+                    token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1)
             else:
                 input_ids = input_ids[:, -1].unsqueeze(-1)
                 if token_type_ids is not None:

diff --git a/pyproject.toml b/pyproject.toml
@@ -12,13 +12,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-[tool.black]
-line-length = 119
-target-version = ['py37']
-
 [tool.ruff]
 # Never enforce `E501` (line length violations).
-ignore = ["C901", "E501", "E741"]
+ignore = ["C901", "E501", "E741", "F402", "F823"]
 select = ["C", "E", "F", "I", "W"]
 line-length = 119
 exclude = ["text-generation-inference"]
@@ -30,3 +26,16 @@ exclude = ["text-generation-inference"]
 [tool.ruff.isort]
 lines-after-imports = 2
 known-first-party = ["optimum.habana"]
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
diff --git a/setup.py b/setup.py
@@ -48,7 +48,6 @@
 ]
 
 QUALITY_REQUIRES = [
-    "black",
     "ruff",
     "hf_doc_builder @ git+https://github.com/huggingface/doc-builder.git",
 ]

diff --git a/tests/baselines/bridgetower_large_itm_mlm_itc.json b/tests/baselines/bridgetower_large_itm_mlm_itc.json
@@ -11,6 +11,7 @@
                     "train_samples_per_second": 921.069,
                     "extra_arguments": [
                         "--dataset_config_name matching",
+                        "--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6",
                         "--image_column image",
                         "--caption_column image_description",
                         "--remove_unused_columns False",
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,7 +48,6 @@ @@
     ]
     QUALITY_REQUIRES = [
-        "black",
         "ruff",
         "hf_doc_builder @ git+https://github.com/huggingface/doc-builder.git",
     ]
@@ Expand Down @@