From ca49360dfeadd9b25587bae879f2f34e0d94aa96 Mon Sep 17 00:00:00 2001 From: Puneesh Khanna Date: Wed, 22 Nov 2023 15:20:39 +0530 Subject: [PATCH 1/8] Fix setting of reuse cache (#553) Reuse cache is independent of HPU graphs enablement. --- examples/text-generation/run_generation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index 8c54cf6e9d..bd875ffd4f 100644 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -209,8 +209,7 @@ def setup_parser(parser): args = parser.parse_args() if not args.use_hpu_graphs: - args.limit_hpu_graphs = False - args.reuse_cache = False + args.limit_hpu_graphs = False return args From 86d37aeaa83b86e927e9d7cd414e18e33f490a90 Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:21:09 +0100 Subject: [PATCH 2/8] Add Gaudi2 regression test workflow (#554) --- .github/workflows/slow_tests.yml | 4 +- .github/workflows/slow_tests_gaudi2.yml | 137 ++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/slow_tests_gaudi2.yml diff --git a/.github/workflows/slow_tests.yml b/.github/workflows/slow_tests.yml index 045af4bbed..d37fc21305 100644 --- a/.github/workflows/slow_tests.yml +++ b/.github/workflows/slow_tests.yml @@ -3,8 +3,8 @@ name: Non-regression tests on: workflow_dispatch: schedule: - - cron: '0 21 * * 0-5' # every Sunday to Friday at 11pm CET - - cron: '0 21 * * 6' # every Saturday at 1am CET + - cron: '0 21 * * 0-5' # every Sunday to Friday at 11pm CET (10pm winter time) + - cron: '0 21 * * 6' # every Saturday at 1am CET (midnight winter time) concurrency: group: ${{ github.workflow }} diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml new file mode 100644 index 0000000000..8a538ec1cd --- /dev/null +++ b/.github/workflows/slow_tests_gaudi2.yml @@ -0,0 +1,137 @@ +name: (Gaudi2) Non-regression tests + +on: + workflow_dispatch: + schedule: + - cron: '0 23 * * 3,6' # every Wednesday and Saturday at 1am CET (midnight winter time) + +concurrency: + group: ${{ github.workflow }} + +jobs: + stable-diffusion: + name: Test Stable Diffusion + runs-on: [self-hosted, linux, x64, gaudi2] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Pull image + run: | + docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest + - name: Run tests + run: | + docker run \ + -v $PWD:/root/workspace \ + --workdir=/root/workspace \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --net=host \ + --ipc=host \ + vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ + GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_diffusers.sh + deepspeed: + name: Test DeepSpeed models + if: ${{ !cancelled() && (success() || failure()) }} + needs: + - stable-diffusion # run the job when the previous test job is done + runs-on: [self-hosted, linux, x64, gaudi2] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Pull image + run: | + docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest + - name: Run tests + run: | + docker run \ + -v $PWD:/root/workspace \ + --workdir=/root/workspace \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --net=host \ + --ipc=host \ + vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ + GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_deepspeed.sh + multi-card: + name: Test multi-card models + if: ${{ !cancelled() && (success() || failure()) }} + needs: + - deepspeed # run the job when the previous test job is done + runs-on: [self-hosted, linux, x64, gaudi2] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Pull image + run: | + docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest + - name: Run tests + run: | + docker run \ + -v $PWD:/root/workspace \ + --workdir=/root/workspace \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --net=host \ + --ipc=host \ + vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ + GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_8x.sh + single-card: + name: Test single-card models + if: ${{ !cancelled() && (success() || failure()) }} + needs: + - deepspeed + - multi-card # run the job when the previous test jobs are done + runs-on: [self-hosted, linux, x64, gaudi2] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Pull image + run: | + docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest + - name: Run tests + run: | + docker run \ + -v $PWD:/root/workspace \ + --workdir=/root/workspace \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --net=host \ + --ipc=host \ + vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ + GAUDI2_CI=1 RUN_ALBERT_XXL_1X=1 /bin/bash tests/ci/slow_tests_1x.sh + text-generation: + name: Test text-generation example + if: ${{ !cancelled() && (success() || failure()) }} + needs: + - deepspeed + - multi-card + - single-card + - albert-xxl-single-card # run the job when the previous test jobs are done + runs-on: [self-hosted, linux, x64, gaudi2] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Pull image + run: | + docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest + - name: Run tests + run: | + docker run \ + -v $PWD:/root/workspace \ + --workdir=/root/workspace \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --net=host \ + --ipc=host \ + vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ + GAUDI2_CI=1 make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} From b2efcd399f0481b4d1eb2d6886d904868f8e2f7d Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:46:13 +0100 Subject: [PATCH 3/8] Remove black and use ruff for code formatting (#555) --- .github/workflows/build_pr_documentation.yml | 1 - .github/workflows/check_code_quality.yml | 9 +++------ Makefile | 8 ++++---- examples/text-generation/run_generation.py | 2 +- pyproject.toml | 19 ++++++++++++++----- setup.py | 1 - tests/test_trainer.py | 4 +--- .../tests/models/gpt2/test_modeling_gpt2.py | 6 +----- .../tests/models/gptj/test_modeling_gptj.py | 4 +--- .../tests/models/llama/test_modeling_llama.py | 16 ++++------------ 10 files changed, 29 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index 926c97b62f..9d6b2d4005 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -34,7 +34,6 @@ jobs: cd doc-builder git pull origin main pip install . - pip install black cd .. - name: Make documentation diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml index cb7a266a04..b734c564d3 100644 --- a/.github/workflows/check_code_quality.yml +++ b/.github/workflows/check_code_quality.yml @@ -29,12 +29,9 @@ jobs: run: | source venv/bin/activate pip install --upgrade pip - pip install black ruff - - name: Check style with black - run: | - source venv/bin/activate - black --check . + pip install ruff - name: Check style with ruff run: | source venv/bin/activate - ruff . + ruff check . setup.py + ruff format --check . setup.py diff --git a/Makefile b/Makefile index 2b0b535405..f5c974136f 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,12 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) # Run code quality checks style_check: clean - black --check . setup.py - ruff . setup.py + ruff check . setup.py + ruff format --check . setup.py style: clean - black . setup.py - ruff . setup.py --fix + ruff check . setup.py --fix + ruff format . setup.py # Run unit and integration tests fast_tests: diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index bd875ffd4f..3351c72ec1 100644 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -209,7 +209,7 @@ def setup_parser(parser): args = parser.parse_args() if not args.use_hpu_graphs: - args.limit_hpu_graphs = False + args.limit_hpu_graphs = False return args diff --git a/pyproject.toml b/pyproject.toml index 7323ffa36c..87941f7e5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,13 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -[tool.black] -line-length = 119 -target-version = ['py37'] - [tool.ruff] # Never enforce `E501` (line length violations). -ignore = ["C901", "E501", "E741"] +ignore = ["C901", "E501", "E741", "F402", "F823"] select = ["C", "E", "F", "I", "W"] line-length = 119 exclude = ["text-generation-inference"] @@ -30,3 +26,16 @@ exclude = ["text-generation-inference"] [tool.ruff.isort] lines-after-imports = 2 known-first-party = ["optimum.habana"] + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/setup.py b/setup.py index c081a55db4..ae4ee1a2c4 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,6 @@ ] QUALITY_REQUIRES = [ - "black", "ruff", "hf_doc_builder @ git+https://github.com/huggingface/doc-builder.git", ] diff --git a/tests/test_trainer.py b/tests/test_trainer.py index dda4e55e20..1d82a5913a 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -1683,9 +1683,7 @@ def test_no_wd_param_group(self): args = GaudiTrainingArguments(output_dir="./test", use_habana=True, use_lazy_mode=True) trainer = GaudiTrainer(model=model, gaudi_config=gaudi_config, args=args) trainer.create_optimizer_and_scheduler(10) - # fmt: off - wd_names = ['0.linear1.weight', '0.linear2.weight', '1.0.linear1.weight', '1.0.linear2.weight', '1.1.linear1.weight', '1.1.linear2.weight'] - # fmt: on + wd_names = ['0.linear1.weight', '0.linear2.weight', '1.0.linear1.weight', '1.0.linear2.weight', '1.1.linear1.weight', '1.1.linear2.weight'] # fmt: skip wd_params = [p for n, p in model.named_parameters() if n in wd_names] no_wd_params = [p for n, p in model.named_parameters() if n not in wd_names] self.assertListEqual(trainer.optimizer.param_groups[0]["params"], wd_params) diff --git a/tests/transformers/tests/models/gpt2/test_modeling_gpt2.py b/tests/transformers/tests/models/gpt2/test_modeling_gpt2.py index 016313927f..d507b34b4d 100644 --- a/tests/transformers/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/transformers/tests/models/gpt2/test_modeling_gpt2.py @@ -727,11 +727,7 @@ def _test_lm_generate_gpt2_helper( input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog was found in a field near the intersection of West and West Streets.\n\nThe dog - # fmt: off - expected_output_ids = [ - 464, 3290, 373, 1043, 287, 257, 2214, 1474, 262, 16246, 286, 2688, 290, 2688, 27262, 13, 198, 198, 464, 3290, - ] - # fmt: on + expected_output_ids = [464, 3290, 373, 1043, 287, 257, 2214, 1474, 262, 16246, 286, 2688, 290, 2688, 27262, 13, 198, 198, 464, 3290,] # fmt: skip output_ids = model.generate(input_ids, do_sample=False) if verify_outputs: self.assertListEqual(output_ids[0].tolist(), expected_output_ids) diff --git a/tests/transformers/tests/models/gptj/test_modeling_gptj.py b/tests/transformers/tests/models/gptj/test_modeling_gptj.py index 9d222350e3..4271079915 100644 --- a/tests/transformers/tests/models/gptj/test_modeling_gptj.py +++ b/tests/transformers/tests/models/gptj/test_modeling_gptj.py @@ -543,10 +543,8 @@ def test_lm_generate_gptj(self): model.gradient_checkpointing_disable() model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog - # fmt: off # The dog is a man's best friend. It is a loyal companion, and it is a friend - expected_output_ids = [464, 3290, 318, 257, 582, 338, 1266, 1545, 13, 632, 318, 257, 9112, 15185, 11, 290, 340, 318, 257, 1545] - # fmt: on + expected_output_ids = [464, 3290, 318, 257, 582, 338, 1266, 1545, 13, 632, 318, 257, 9112, 15185, 11, 290, 340, 318, 257, 1545] # fmt: skip output_ids = model.generate(input_ids, do_sample=False) self.assertListEqual(output_ids[0].tolist(), expected_output_ids) diff --git a/tests/transformers/tests/models/llama/test_modeling_llama.py b/tests/transformers/tests/models/llama/test_modeling_llama.py index 49e78fa854..cadac239c5 100644 --- a/tests/transformers/tests/models/llama/test_modeling_llama.py +++ b/tests/transformers/tests/models/llama/test_modeling_llama.py @@ -384,9 +384,7 @@ def test_model_7b_logits(self): EXPECTED_MEAN = torch.tensor([[-6.6550, -4.1227, -4.9859, -3.2406, 0.8262, -3.0033, 1.2964, -3.3699]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) # slicing logits[0, 0, 0:30] - # fmt: off - EXPECTED_SLICE = torch.tensor([-12.8281, -7.4453, -0.4639, -8.0625, -7.2500, -8.0000, -6.4883, -7.7695, -7.8438, -7.0312, -6.2188, -7.1328, -1.8496, 1.9961, -8.6250, -6.7227, -12.8281, -6.9492, -7.0742, -7.7852, -7.5820, -7.9062, -6.9375, -7.9805, -8.3438, -8.1562, -8.0469, -7.6250, -7.7422, -7.3398,]) - # fmt: on + EXPECTED_SLICE = torch.tensor([-12.8281, -7.4453, -0.4639, -8.0625, -7.2500, -8.0000, -6.4883, -7.7695, -7.8438, -7.0312, -6.2188, -7.1328, -1.8496, 1.9961, -8.6250, -6.7227, -12.8281, -6.9492, -7.0742, -7.7852, -7.5820, -7.9062, -6.9375, -7.9805, -8.3438, -8.1562, -8.0469, -7.6250, -7.7422, -7.3398,]) # fmt: skip torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5) @unittest.skip("Logits are not exactly the same, once we fix the instabalities somehow, will update!") @@ -399,9 +397,7 @@ def test_model_13b_logits(self): EXPECTED_MEAN = torch.tensor([[-2.0622, -1.2794, -1.1638, -0.9788, -1.4603, -1.0238, -1.7893, -1.4411]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) # slicing logits[0, 0, 0:30] - # fmt: off - EXPECTED_SLICE = torch.tensor([-8.1406, -8.0547, 2.7461, -1.2344, -0.1448, -1.8262, -1.0020, -1.8154, -1.6895, -1.8516, -2.3574, -0.9277, 3.7598, 6.5742, -1.2998, -0.1177, -8.1406, -2.9688, -2.9199, -3.1699, -3.5254, -2.3555, -2.7988, -3.4141, -2.8262, -4.5195, -3.3379, -3.3164, -2.7832, -3.0273]) - # fmt: on + EXPECTED_SLICE = torch.tensor([-8.1406, -8.0547, 2.7461, -1.2344, -0.1448, -1.8262, -1.0020, -1.8154, -1.6895, -1.8516, -2.3574, -0.9277, 3.7598, 6.5742, -1.2998, -0.1177, -8.1406, -2.9688, -2.9199, -3.1699, -3.5254, -2.3555, -2.7988, -3.4141, -2.8262, -4.5195, -3.3379, -3.3164, -2.7832, -3.0273]) # fmt: skip torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5) @unittest.skip("Logits are not exactly the same, once we fix the instabalities somehow, will update!") @@ -414,9 +410,7 @@ def test_model_13bf_logits(self): EXPECTED_MEAN = torch.tensor([[-0.8562, -1.8520, -0.7551, -0.4162, -1.5161, -1.2038, -2.4823, -2.3254]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) # slicing logits[0, 0, 0:30] - # fmt: off - EXPECTED_SLICE = torch.tensor([-2.2227, 4.8828, 0.9023, -0.4578, -0.7871, -0.1033, -0.6221, -0.5786, -0.7803, -1.0674, -1.2920, -0.1570, 0.8008, 2.0723, -0.9497, 0.2771, -2.2227, -0.7612, -1.4346, -1.2061, -1.6426, -0.3000, -0.7139, -1.1934, -1.8691, -1.6973, -1.5947, -1.2705, -0.3523, -0.5513]) - # fmt: on + EXPECTED_SLICE = torch.tensor([-2.2227, 4.8828, 0.9023, -0.4578, -0.7871, -0.1033, -0.6221, -0.5786, -0.7803, -1.0674, -1.2920, -0.1570, 0.8008, 2.0723, -0.9497, 0.2771, -2.2227, -0.7612, -1.4346, -1.2061, -1.6426, -0.3000, -0.7139, -1.1934, -1.8691, -1.6973, -1.5947, -1.2705, -0.3523, -0.5513]) # fmt: skip torch.testing.assert_close(out.mean(-1), EXPECTED_SLICE, atol=1e-2, rtol=1e-2) @unittest.skip( @@ -432,9 +426,7 @@ def test_model_70b_logits(self): [[-4.2327, -3.3360, -4.6665, -4.7631, -1.8180, -3.4170, -1.4211, -3.1810]], dtype=torch.float32 ) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) - # fmt: off - EXPECTED_SLICE = torch.tensor([-9.4922, -3.9551, 1.7998, -5.6758, -5.1055, -5.8984, -4.8320, -6.8086, -6.5391, -5.6172, -5.5820, -5.5352, 1.7881, 3.6289, -6.5117, -3.4785, -9.5000, -6.0352, -6.8125, -6.0195, -6.6836, -5.4727, -6.2812, -6.0391, -7.3398, -7.4297, -7.4844, -6.5820, -5.8789, -5.5312]) - # fmt: on + EXPECTED_SLICE = torch.tensor([-9.4922, -3.9551, 1.7998, -5.6758, -5.1055, -5.8984, -4.8320, -6.8086, -6.5391, -5.6172, -5.5820, -5.5352, 1.7881, 3.6289, -6.5117, -3.4785, -9.5000, -6.0352, -6.8125, -6.0195, -6.6836, -5.4727, -6.2812, -6.0391, -7.3398, -7.4297, -7.4844, -6.5820, -5.8789, -5.5312]) # fmt: skip torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5) @unittest.skip("Model is curently gated") From bd4d43c455216126bd5fe234605de68ffa4a0ee8 Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Wed, 22 Nov 2023 19:11:02 +0100 Subject: [PATCH 4/8] Fix Gaudi2 CI workflow (#556) --- .github/workflows/slow_tests_gaudi2.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml index 8a538ec1cd..acca3abc9e 100644 --- a/.github/workflows/slow_tests_gaudi2.yml +++ b/.github/workflows/slow_tests_gaudi2.yml @@ -26,11 +26,12 @@ jobs: --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e GAUDI2_CI=1 \ --cap-add=sys_nice \ --net=host \ --ipc=host \ vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ - GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_diffusers.sh + /bin/bash tests/ci/slow_tests_diffusers.sh deepspeed: name: Test DeepSpeed models if: ${{ !cancelled() && (success() || failure()) }} @@ -51,11 +52,12 @@ jobs: --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e GAUDI2_CI=1 \ --cap-add=sys_nice \ --net=host \ --ipc=host \ vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ - GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_deepspeed.sh + /bin/bash tests/ci/slow_tests_deepspeed.sh multi-card: name: Test multi-card models if: ${{ !cancelled() && (success() || failure()) }} @@ -76,11 +78,12 @@ jobs: --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e GAUDI2_CI=1 \ --cap-add=sys_nice \ --net=host \ --ipc=host \ vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ - GAUDI2_CI=1 /bin/bash tests/ci/slow_tests_8x.sh + /bin/bash tests/ci/slow_tests_8x.sh single-card: name: Test single-card models if: ${{ !cancelled() && (success() || failure()) }} @@ -102,19 +105,20 @@ jobs: --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e GAUDI2_CI=1 \ + -e RUN_ALBERT_XXL_1X=1 \ --cap-add=sys_nice \ --net=host \ --ipc=host \ vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ - GAUDI2_CI=1 RUN_ALBERT_XXL_1X=1 /bin/bash tests/ci/slow_tests_1x.sh + /bin/bash tests/ci/slow_tests_1x.sh text-generation: name: Test text-generation example if: ${{ !cancelled() && (success() || failure()) }} needs: - deepspeed - multi-card - - single-card - - albert-xxl-single-card # run the job when the previous test jobs are done + - single-card # run the job when the previous test jobs are done runs-on: [self-hosted, linux, x64, gaudi2] steps: - name: Checkout @@ -130,8 +134,9 @@ jobs: --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e GAUDI2_CI=1 \ --cap-add=sys_nice \ --net=host \ --ipc=host \ vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \ - GAUDI2_CI=1 make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} + make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} From 110e992115879b498004e6373590928f74d6736e Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Thu, 23 Nov 2023 10:10:26 +0100 Subject: [PATCH 5/8] Fix example diff CI (#560) --- tests/example_diff/run_generation.txt | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/example_diff/run_generation.txt b/tests/example_diff/run_generation.txt index 3fc3304ebd..563d95485b 100644 --- a/tests/example_diff/run_generation.txt +++ b/tests/example_diff/run_generation.txt @@ -481,18 +481,17 @@ < distributed_state = PartialState(cpu=args.use_cpu) --- > args = parser.parse_args() -341c211,213 +341c211,212 < logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}") --- > if not args.use_hpu_graphs: > args.limit_hpu_graphs = False -> args.reuse_cache = False -343,344c215 +343,344c214 < if args.seed is not None: < set_seed(args.seed) --- > return args -346,373d216 +346,373d215 < # Initialize the model and tokenizer < try: < args.model_type = args.model_type.lower() @@ -521,7 +520,7 @@ < if requires_preprocessing: < prepare_input = PREPROCESSING_FUNCTIONS.get(args.model_type) < preprocessed_prompt_text = prepare_input(args, model, tokenizer, prompt_text) -375,378c218,221 +375,378c217,220 < if model.__class__.__name__ in ["TransfoXLLMHeadModel"]: < tokenizer_kwargs = {"add_space_before_punct_symbol": True} < else: @@ -531,7 +530,7 @@ > parser = argparse.ArgumentParser() > args = setup_parser(parser) > model, tokenizer, generation_config = initialize_model(args, logger) -380,386c223 +380,386c222 < encoded_prompt = tokenizer.encode( < preprocessed_prompt_text, add_special_tokens=False, return_tensors="pt", **tokenizer_kwargs < ) @@ -541,7 +540,7 @@ < encoded_prompt = encoded_prompt.to(distributed_state.device) --- > import habana_frameworks.torch.hpu as torch_hpu -388,389c225,336 +388,389c224,335 < if encoded_prompt.size()[-1] == 0: < input_ids = None --- @@ -657,7 +656,7 @@ > print(f"Graph compilation duration = {compilation_duration} seconds") > print(separator) > print() -391c338,353 +391c337,352 < input_ids = encoded_prompt --- > # Downloading and loading a dataset from the hub. @@ -676,7 +675,7 @@ > .shuffle() > .select(range(args.dataset_max_samples if args.dataset_max_samples > 0 else (raw_dataset[split]).num_rows)) > ) -393,399c355,362 +393,399c354,361 < if args.jit: < jit_input_texts = ["enable jit"] < jit_inputs = prepare_jit_inputs(jit_input_texts, model, tokenizer) @@ -693,7 +692,7 @@ > logger.info( > f"No column name was given so automatically choosing '{column_name}' for prompts. If you would like to use another column of the dataset, you can set the argument `--column_name`." > ) -401,439c364,384 +401,439c363,383 < sig = inspect.signature(model.__call__) < jit_inputs = tuple(jit_inputs[key] for key in sig.parameters if jit_inputs.get(key, None) is not None) < traced_model = torch.jit.trace(model, jit_inputs, strict=False) @@ -755,7 +754,7 @@ > preprocess_function, > batched=True, > desc="Running tokenizer on dataset", -440a386,463 +440a385,462 > # After tokenization, we can remove the column of interest > raw_dataset = raw_dataset.remove_columns([column_name]) > raw_dataset.set_format(type="torch") @@ -834,13 +833,13 @@ > ) > print(separator) > t_end = time.time() -442,443c465,466 +442,443c464,465 < generated_sequences.append(total_sequence) < print(total_sequence) --- > throughput = total_new_tokens_generated / duration > # Print Stats -445c468,480 +445c467,479 < return generated_sequences --- > stats = f"Throughput (including tokenization) = {throughput} tokens/second" From 542c588b8ad25214df5041c9b6008048f2e9c32a Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Thu, 23 Nov 2023 11:21:09 +0100 Subject: [PATCH 6/8] Update BridgeTower example (#561) --- examples/contrastive-image-text/README.md | 1 + examples/contrastive-image-text/run_bridgetower.py | 5 +++++ tests/baselines/bridgetower_large_itm_mlm_itc.json | 1 + 3 files changed, 7 insertions(+) diff --git a/examples/contrastive-image-text/README.md b/examples/contrastive-image-text/README.md index 058f126d96..3ee9c81b7b 100644 --- a/examples/contrastive-image-text/README.md +++ b/examples/contrastive-image-text/README.md @@ -207,6 +207,7 @@ python ../gaudi_spawn.py --use_mpi --world_size 8 run_bridgetower.py \ --output_dir /tmp/bridgetower-test \ --model_name_or_path BridgeTower/bridgetower-large-itm-mlm-itc \ --dataset_name jmhessel/newyorker_caption_contest --dataset_config_name matching \ +--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6 \ --image_column image --caption_column image_description \ --remove_unused_columns=False \ --do_train --do_eval --do_predict \ diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py index 3f592fdf16..a59205b954 100644 --- a/examples/contrastive-image-text/run_bridgetower.py +++ b/examples/contrastive-image-text/run_bridgetower.py @@ -138,6 +138,10 @@ class DataTrainingArguments: dataset_config_name: Optional[str] = field( default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} ) + dataset_revision: str = field( + default="main", + metadata={"help": "The specific dataset version to use (can be a branch name, tag name or commit id)."}, + ) data_dir: Optional[str] = field(default=None, metadata={"help": "The data directory containing input files."}) image_column: Optional[str] = field( default="image_path", @@ -339,6 +343,7 @@ def main(): keep_in_memory=False, data_dir=data_args.data_dir, token=model_args.token, + revision=data_args.dataset_revision, ) else: data_files = {} diff --git a/tests/baselines/bridgetower_large_itm_mlm_itc.json b/tests/baselines/bridgetower_large_itm_mlm_itc.json index 095ddd490e..0c571fe5be 100644 --- a/tests/baselines/bridgetower_large_itm_mlm_itc.json +++ b/tests/baselines/bridgetower_large_itm_mlm_itc.json @@ -11,6 +11,7 @@ "train_samples_per_second": 921.069, "extra_arguments": [ "--dataset_config_name matching", + "--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6", "--image_column image", "--caption_column image_description", "--remove_unused_columns False", From b5e7d131864c31098c3ca57484c835c3441a7153 Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Thu, 23 Nov 2023 23:40:00 +0800 Subject: [PATCH 7/8] Don't unsqueeze input_id in prepare_inputs_for_generation for Starcoder/Codegen (#559) --- .../habana/transformers/models/codegen/modeling_codegen.py | 4 ++-- .../transformers/models/gpt_bigcode/modeling_gpt_bigcode.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum/habana/transformers/models/codegen/modeling_codegen.py b/optimum/habana/transformers/models/codegen/modeling_codegen.py index 0387bc6a0a..329dec59b2 100644 --- a/optimum/habana/transformers/models/codegen/modeling_codegen.py +++ b/optimum/habana/transformers/models/codegen/modeling_codegen.py @@ -417,9 +417,9 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, token_i # only last token for inputs_ids if past is defined in kwargs if past_key_values: if token_idx is not None: - input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1) + input_ids = torch.index_select(input_ids, 1, token_idx - 1) if token_type_ids is not None: - token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1) + token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1) else: input_ids = input_ids[:, -1].unsqueeze(-1) if token_type_ids is not None: diff --git a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py index e17aa5a0d0..a70826b62b 100644 --- a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +++ b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py @@ -352,9 +352,9 @@ def prepare_inputs_for_generation( # only last token for inputs_ids if past is defined in kwargs if past_key_values: if token_idx is not None: - input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1) + input_ids = torch.index_select(input_ids, 1, token_idx - 1) if token_type_ids is not None: - token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1) + token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1) else: input_ids = input_ids[:, -1].unsqueeze(-1) if token_type_ids is not None: From 13fa10d58247b1c0bfe15005669cb24561398769 Mon Sep 17 00:00:00 2001 From: Mandy Li Date: Thu, 23 Nov 2023 07:43:44 -0800 Subject: [PATCH 8/8] Enable llama2-70b LoRA finetuning (#527) --- examples/language-modeling/README.md | 40 ++++++++++++++++++- .../llama2_ds_zero3_config.json | 15 +++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100755 examples/language-modeling/llama2_ds_zero3_config.json diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index dad97020c6..16c8e3c441 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -321,7 +321,8 @@ python run_clm.py \ ## PEFT -To run LoRA finetuning and inference. you could use `run_lora_clm.py` as an example. Multi-card examples can be simply adapted to run LoRA finetuning. Here is the CLM example with Llama1-7B and Falcon-40B: +To run LoRA finetuning, you can use `run_lora_clm.py`. +Here are single-/multi-device command examples for Llama1-7B, Falcon-40B and Llama2-70B: - Single-card finetuning of Llama1-7B: ```bash @@ -455,6 +456,43 @@ LOWER_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \ --low_cpu_mem_usage True ``` +- Multi-card finetuning of Llama2-70B with DeepSpeed ZeRO-3 optimization and LoRA: + + > The following command requires Habana DeepSpeed 1.13.0 or later. + +```bash +PT_HPU_MAX_COMPOUND_OP_SIZE=10 DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 \ +python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lora_clm.py \ + --model_name_or_path meta-llama/Llama-2-70b-hf \ + --deepspeed llama2_ds_zero3_config.json \ + --dataset_name tatsu-lab/alpaca \ + --bf16 True \ + --output_dir ./lora_out \ + --num_train_epochs 2 \ + --max_seq_len 2048 \ + --per_device_train_batch_size 10 \ + --per_device_eval_batch_size 10 \ + --gradient_checkpointing \ + --evaluation_strategy epoch \ + --eval_delay 2 \ + --save_strategy no \ + --learning_rate 0.0018 \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1 \ + --dataset_concatenation \ + --attn_softmax_bf16 True \ + --do_train \ + --do_eval \ + --use_habana \ + --use_lazy_mode \ + --pipelining_fwd_bwd \ + --throughput_warmup_steps 3 \ + --lora_rank 4 \ + --lora_target_modules "q_proj" "v_proj" "k_proj" "o_proj" \ + --validation_split_percentage 4 +```` + ## Streaming To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` with `--max_steps` specified in the command line. This is currently supported by `run_mlm.py` and `run_clm.py`. diff --git a/examples/language-modeling/llama2_ds_zero3_config.json b/examples/language-modeling/llama2_ds_zero3_config.json new file mode 100755 index 0000000000..69845e1899 --- /dev/null +++ b/examples/language-modeling/llama2_ds_zero3_config.json @@ -0,0 +1,15 @@ +{ + "steps_per_print": 64, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "gradient_accumulation_steps": "auto", + "bf16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": 3, + "overlap_comm": false, + "contiguous_gradients": false + } +}